[RFC PATCH 3/4] Hyper-V/Balloon: Call add_memory() with dm_device.ha_lock.

From: lantianyu1986@gmail.com
To: kys@microsoft.com, haiyangz@microsoft.com,
	sthemmin@microsoft.com, sashal@kernel.org,
	michael.h.kelley@microsoft.com
Cc: Tianyu Lan <Tianyu.Lan@microsoft.com>,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
	vkuznets@redhat.com, eric.devolder@oracle.com
Subject: [RFC PATCH 3/4] Hyper-V/Balloon: Call add_memory() with dm_device.ha_lock.
Date: Tue, 10 Dec 2019 23:46:10 +0800	[thread overview]
Message-ID: <20191210154611.10958-4-Tianyu.Lan@microsoft.com> (raw)
In-Reply-To: <20191210154611.10958-1-Tianyu.Lan@microsoft.com>

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

The ha_lock is to protect hot-add region list ha_region_list.
When Hyper-V delivers hot-add memory message, handle_pg_range()
goes through the list to find the hot-add region state
associated with message and do hot-add memory. The lock
is released in the loop before calling hv_mem_hot_add()
and is reacquired in hv_mem_hot_add(). There is a race
that list entry maybe freed during the slot.

To avoid the race and simply the code, make hv_mem_hot_add()
under protection of ha_region_list lock. There is a dead lock
case when run add_memory() under ha_lock. add_memory() calls
hv_online_page() inside and hv_online_page() also acquires
ha_lock again. Add lock_thread in the struct hv_dynmem_device
to record hv_mem_hot_add()'s thread and check lock_thread
in hv_online_page(). hv_mem_hot_add() thread already holds
lock during traverse hot add list and so not acquire lock
in hv_online_page().

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 drivers/hv/hv_balloon.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 34bd73526afd..4d1a3b1e2490 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -545,6 +545,7 @@ struct hv_dynmem_device {
 	 * regions from ha_region_list.
 	 */
 	spinlock_t ha_lock;
+	struct task_struct *lock_thread;
 
 	/*
 	 * A list of hot-add regions.
@@ -707,12 +708,10 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 	unsigned long start_pfn;
 	unsigned long processed_pfn;
 	unsigned long total_pfn = pfn_count;
-	unsigned long flags;
 
 	for (i = 0; i < (size/HA_CHUNK); i++) {
 		start_pfn = start + (i * HA_CHUNK);
 
-		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		has->ha_end_pfn +=  HA_CHUNK;
 
 		if (total_pfn > HA_CHUNK) {
@@ -724,7 +723,6 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		}
 
 		has->covered_end_pfn +=  processed_pfn;
-		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
 		init_completion(&dm_device.ol_waitevent);
 		dm_device.ha_waiting = !memhp_auto_online;
@@ -745,10 +743,8 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 				 */
 				do_hot_add = false;
 			}
-			spin_lock_irqsave(&dm_device.ha_lock, flags);
 			has->ha_end_pfn -= HA_CHUNK;
 			has->covered_end_pfn -=  processed_pfn;
-			spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 			break;
 		}
 
@@ -771,8 +767,13 @@ static void hv_online_page(struct page *pg, unsigned int order)
 	struct hv_hotadd_state *has;
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(pg);
+	int unlocked;
+
+	if (dm_device.lock_thread != current) {
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
+		unlocked = 1;
+	}
 
-	spin_lock_irqsave(&dm_device.ha_lock, flags);
 	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/* The page belongs to a different HAS. */
 		if ((pfn < has->start_pfn) ||
@@ -782,7 +783,9 @@ static void hv_online_page(struct page *pg, unsigned int order)
 		hv_bring_pgs_online(has, pfn, 1UL << order);
 		break;
 	}
-	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
+
+	if (unlocked)
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 }
 
 static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
@@ -860,6 +863,7 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 		pg_start);
 
 	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	dm_device.lock_thread = current;
 	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/*
 		 * If the pfn range we are dealing with is not in the current
@@ -912,9 +916,7 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 			} else {
 				pfn_cnt = size;
 			}
-			spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 			hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
-			spin_lock_irqsave(&dm_device.ha_lock, flags);
 		}
 		/*
 		 * If we managed to online any pages that were given to us,
@@ -923,6 +925,7 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 		res = has->covered_end_pfn - old_covered_state;
 		break;
 	}
+	dm_device.lock_thread = NULL;
 	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
 	return res;
-- 
2.14.5