All of lore.kernel.org
 help / color / mirror / Atom feed
From: lantianyu1986@gmail.com
To: kys@microsoft.com, haiyangz@microsoft.com,
	sthemmin@microsoft.com, sashal@kernel.org,
	michael.h.kelley@microsoft.com, david@redhat.com
Cc: Tianyu Lan <Tianyu.Lan@microsoft.com>,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
	vkuznets@redhat.com, eric.devolder@oracle.com
Subject: [RFC PATCH V2 7/10] x86/Hyper-V/Balloon: Handle mem hot-remove request
Date: Tue,  7 Jan 2020 21:09:47 +0800	[thread overview]
Message-ID: <20200107130950.2983-8-Tianyu.Lan@microsoft.com> (raw)
In-Reply-To: <20200107130950.2983-1-Tianyu.Lan@microsoft.com>

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

Linux system mem hot plug unit is 128MB and request page
number maybe not aligned with unit. The non-aligned case
will handle in the later.

Handle mem hot-remve request:
First, search memory from ha region list. If find suitable memory
block, offline & remove memory and create ha region region "gap"
struct for the range. "gap" means the range in the hot-add region
is offlined or removed. The following mem hot-add msg may add
memory in the gap range back.

If there is no suitable memory in the hot-add region, search memory
from the system memory on the target node and perform offline&remove
memory.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 drivers/hv/hv_balloon.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 184 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 43e490f492d5..3d8c09fe148a 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -56,6 +56,10 @@
 #define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16)
 #define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff)
 
+#define MAX_HOT_REMOVE_ENTRIES						\
+		((PAGE_SIZE - sizeof(struct dm_hot_remove_response))	\
+		 / sizeof(union dm_mem_page_range))
+
 enum {
 	DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
 	DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
@@ -697,6 +701,7 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 {
 	struct memory_notify *mem = (struct memory_notify *)v;
 	unsigned long pfn_count;
+	int need_unlock;
 
 	switch (val) {
 	case MEM_ONLINE:
@@ -708,7 +713,11 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 		break;
 
 	case MEM_OFFLINE:
-		mutex_lock(&dm_device.ha_lock);
+		if (dm_device.lock_thread != current) {
+			mutex_lock(&dm_device.ha_lock);
+			need_unlock = 1;
+		}
+
 		pfn_count = hv_page_offline_check(mem->start_pfn,
 						  mem->nr_pages);
 		if (pfn_count <= dm_device.num_pages_onlined) {
@@ -722,7 +731,9 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 			WARN_ON_ONCE(1);
 			dm_device.num_pages_onlined = 0;
 		}
-		mutex_unlock(&dm_device.ha_lock);
+
+		if (need_unlock)
+			mutex_unlock(&dm_device.ha_lock);
 		break;
 	case MEM_GOING_ONLINE:
 	case MEM_GOING_OFFLINE:
@@ -1046,14 +1057,183 @@ static unsigned long process_hot_add(unsigned long pg_start,
 	return handle_pg_range(pg_start, pfn_cnt);
 }
 
+static int hv_hot_remove_range(unsigned int nid, unsigned long start_pfn,
+			       unsigned long end_pfn, unsigned long nr_pages,
+			       unsigned long *request_index,
+			       union dm_mem_page_range *range_array,
+			       struct hv_hotadd_state *has)
+{
+	unsigned long block_pages = HA_CHUNK;
+	unsigned long rm_pages = nr_pages;
+	unsigned long pfn;
+	int ret;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += block_pages) {
+		struct hv_hotadd_gap *gap;
+		int in_gap;
+
+		if (*request_index >= MAX_HOT_REMOVE_ENTRIES) {
+			struct dm_hot_remove_response *resp =
+				(struct dm_hot_remove_response *)
+					balloon_up_send_buffer;
+
+			/* Flush out all hot-remove ranges. */
+			ret = hv_send_hot_remove_response(resp, *request_index,
+							  true);
+			if (ret)
+				return ret;
+
+			/* Reset request buffer. */
+			memset(resp, 0x00, PAGE_SIZE);
+			*request_index = 0;
+		}
+
+		/*
+		 * Memory in hot-add region gaps has been offlined or removed
+		 * and so skip it if remove range overlap with gap.
+		 */
+		if (has) {
+			list_for_each_entry(gap, &has->gap_list, list)
+				if (!(pfn >= gap->end_pfn ||
+				      pfn + block_pages < gap->start_pfn)) {
+					in_gap = 1;
+					break;
+				}
+
+			if (in_gap)
+				continue;
+		}
+
+		if (online_section_nr(pfn_to_section_nr(pfn))
+		    && is_mem_section_removable(pfn, block_pages)) {
+			ret = offline_and_remove_memory(nid, pfn << PAGE_SHIFT,
+					block_pages << PAGE_SHIFT);
+			if (ret)
+				continue;
+
+			range_array[*request_index].finfo.start_page = pfn;
+			range_array[*request_index].finfo.page_cnt
+					= block_pages;
+
+			(*request_index)++;
+			nr_pages -= block_pages;
+
+			if (!nr_pages)
+				break;
+		}
+	}
+
+	return rm_pages - nr_pages;
+}
+
+static int hv_hot_remove_from_ha_list(unsigned int nid, unsigned long nr_pages,
+				      unsigned long *request_index,
+				      union dm_mem_page_range *range_array)
+{
+	struct hv_hotadd_state *has;
+	unsigned long start_pfn, end_pfn;
+	int rm_pages;
+	int old_index;
+	int ret, i;
+
+	mutex_lock(&dm_device.ha_lock);
+	dm_device.lock_thread = current;
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
+		rm_pages = min(nr_pages,
+				has->covered_end_pfn - has->start_pfn);
+		start_pfn = has->start_pfn;
+		end_pfn = has->covered_end_pfn;
+		old_index = *request_index;
+
+		if (!rm_pages || pfn_to_nid(start_pfn) != nid)
+			continue;
+
+		rm_pages = hv_hot_remove_range(nid, start_pfn, end_pfn,
+				rm_pages, request_index, range_array, has);
+		if (rm_pages < 0) {
+			ret = rm_pages;
+			goto error;
+		} else if (!rm_pages) {
+			continue;
+		}
+
+		nr_pages -= rm_pages;
+		dm_device.num_pages_added -= rm_pages;
+
+		/* Create gaps for hot remove regions. */
+		for (i = old_index; i < *request_index; i++) {
+			struct hv_hotadd_gap *gap;
+
+			gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
+			if (!gap) {
+				/*
+				 * Disable dm hot-plug when fails to allocate
+				 * memory for gaps.
+				 */
+				ret = -ENOMEM;
+				do_hot_add = false;
+				goto error;
+			}
+
+			INIT_LIST_HEAD(&gap->list);
+			gap->start_pfn = range_array[i].finfo.start_page;
+			gap->end_pfn =
+				gap->start_pfn + range_array[i].finfo.page_cnt;
+			list_add_tail(&gap->list, &has->gap_list);
+		}
+
+		if (!nr_pages)
+			break;
+	}
+
+	ret = nr_pages;
+ error:
+	dm_device.lock_thread = NULL;
+	mutex_unlock(&dm_device.ha_lock);
+
+	return ret;
+}
+
+static void hv_mem_hot_remove(unsigned int nid, u64 nr_pages)
+{
+	struct dm_hot_remove_response *resp
+		= (struct dm_hot_remove_response *)balloon_up_send_buffer;
+	unsigned long start_pfn = node_start_pfn(nid);
+	unsigned long end_pfn = node_end_pfn(nid);
+	unsigned long request_index = 0;
+	int remain_pages;
+
+	/* Todo: Handle request of non-aligned page number later. */
+
+	/* Search hot-remove memory region from hot add list first.*/
+	memset(resp, 0x00, PAGE_SIZE);
+	remain_pages = hv_hot_remove_from_ha_list(nid, nr_pages,
+				&request_index,
+				resp->range_array);
+	if (remain_pages < 0) {
+		/* Send failure response msg. */
+		request_index = 0;
+	} else if (remain_pages) {
+		start_pfn = ALIGN(start_pfn, HA_CHUNK);
+		hv_hot_remove_range(nid, start_pfn, end_pfn, remain_pages,
+				    &request_index, resp->range_array, NULL);
+	}
+
+	hv_send_hot_remove_response(resp, request_index, false);
+}
+
 #endif
 
 static void hot_remove_req(union dm_msg_info *msg_info)
 {
 	struct hv_dynmem_device *dm = &dm_device;
+	unsigned int numa_node = msg_info->hot_remove.virtual_node;
+	unsigned int page_count = msg_info->hot_remove.page_count;
 
-	/* Add hot remove operation later and send failure response. */
-	hv_send_hot_remove_response((struct dm_hot_remove_response *)
+	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && do_hot_add)
+		hv_mem_hot_remove(numa_node, page_count);
+	else
+		hv_send_hot_remove_response((struct dm_hot_remove_response *)
 				balloon_up_send_buffer, 0, false);
 
 	dm->state = DM_INITIALIZED;
-- 
2.14.5


  parent reply	other threads:[~2020-01-07 13:10 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-07 13:09 [RFC PATCH V2 00/10] x86/Hyper-V: Add Dynamic memory hot-remove function lantianyu1986
2020-01-07 13:09 ` [RFC PATCH V2 1/10] mm/resource: Move child to new resource when release mem region lantianyu1986
2020-01-20 18:34   ` Michael Kelley
2020-01-20 19:20   ` Michael Kelley
2020-01-07 13:09 ` [RFC PATCH V2 2/10] mm: expose is_mem_section_removable() symbol lantianyu1986
2020-01-07 13:36   ` Michal Hocko
2020-01-10 13:41     ` David Hildenbrand
2020-01-13 14:49       ` [EXTERNAL] " Tianyu Lan
2020-01-13 15:01         ` David Hildenbrand
2020-01-14  9:50         ` Michal Hocko
2020-01-17 16:35           ` Tianyu Lan
2020-01-20 14:14             ` Michal Hocko
2020-01-07 13:09 ` [RFC PATCH V2 3/10] x86/Hyper-V/Balloon: Replace hot-add and balloon up works with a common work lantianyu1986
2020-01-20 19:12   ` Michael Kelley
2020-01-07 13:09 ` [RFC PATCH V2 4/10] x86/Hyper-V/Balloon: Convert spin lock ha_lock to mutex lantianyu1986
2020-01-07 13:09 ` [RFC PATCH V2 5/10] x86/Hyper-V/Balloon: Avoid releasing ha_lock when traverse ha_region_list lantianyu1986
2020-01-07 13:09 ` [RFC PATCH V2 6/10] x86/Hyper-V/Balloon: Enable mem hot-remove capability lantianyu1986
2020-01-07 13:09 ` lantianyu1986 [this message]
2020-01-08  9:54   ` [RFC PATCH V2 7/10] x86/Hyper-V/Balloon: Handle mem hot-remove request kbuild test robot
2020-01-08 12:03   ` kbuild test robot
2020-01-07 13:09 ` [RFC PATCH V2 8/10] x86/Hyper-V/Balloon: Handle request with non-aligned page number lantianyu1986
2020-01-07 13:09 ` [RFC PATCH V2 9/10] x86/Hyper-V/Balloon: Hot add mem in the gaps of hot add region lantianyu1986
2020-01-07 13:09 ` [RFC PATCH V2 10/10] x86/Hyper-V: Workaround Hyper-V unballoon msg bug lantianyu1986
2020-01-08 22:23   ` kbuild test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200107130950.2983-8-Tianyu.Lan@microsoft.com \
    --to=lantianyu1986@gmail.com \
    --cc=Tianyu.Lan@microsoft.com \
    --cc=david@redhat.com \
    --cc=eric.devolder@oracle.com \
    --cc=haiyangz@microsoft.com \
    --cc=kys@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael.h.kelley@microsoft.com \
    --cc=sashal@kernel.org \
    --cc=sthemmin@microsoft.com \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.