All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in acquire/release_region_mutex
@ 2015-02-17 15:20 Vitaly Kuznetsov
  2015-02-17 15:42 ` KY Srinivasan
  2015-02-18 11:18 ` [PATCH v2] " Vitaly Kuznetsov
  0 siblings, 2 replies; 3+ messages in thread
From: Vitaly Kuznetsov @ 2015-02-17 15:20 UTC (permalink / raw)
  To: K. Y. Srinivasan, devel; +Cc: Haiyang Zhang, linux-kernel, Dexuan Cui

When many memory regions are being added and automatically onlined the
following lockup is sometimes observed:

INFO: task udevd:1872 blocked for more than 120 seconds.
...
Call Trace:
 [<ffffffff816ec0bc>] schedule_timeout+0x22c/0x350
 [<ffffffff816eb98f>] wait_for_common+0x10f/0x160
 [<ffffffff81067650>] ? default_wake_function+0x0/0x20
 [<ffffffff816eb9fd>] wait_for_completion+0x1d/0x20
 [<ffffffff8144cb9c>] hv_memory_notifier+0xdc/0x120
 [<ffffffff816f298c>] notifier_call_chain+0x4c/0x70
...

When several memory blocks are going online simultaneously we got several
hv_memory_notifier() trying to acquire the ha_region_mutex. When this mutex is
being held by hot_add_req() all these competing acquire_region_mutex() do
mutex_trylock, fail, and queue themselves into wait_for_completion(..). However
when we do complete() from release_region_mutex() only one of them wakes up.
This could be solved by changing complete() -> complete_all() memory onlining
can be delayed as well, in that case we can still get several
hv_memory_notifier() runners at the same time trying to grab the mutex.
Only one of them will succeed and the others will hang for forever as
complete() is not being called. We don't see this issue often because we have
5sec onlining timeout in hv_mem_hot_add() and usually all udev events arrive
in this time frame.

Get rid of the trylock path, waiting on the mutex is supposed to provide the
required serialization.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/hv/hv_balloon.c | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index ff16938..094de89 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -534,7 +534,6 @@ struct hv_dynmem_device {
 	struct task_struct *thread;
 
 	struct mutex ha_region_mutex;
-	struct completion waiter_event;
 
 	/*
 	 * A list of hot-add regions.
@@ -554,25 +553,14 @@ static struct hv_dynmem_device dm_device;
 static void post_status(struct hv_dynmem_device *dm);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static void acquire_region_mutex(bool trylock)
+static void acquire_region_mutex(void)
 {
-	if (trylock) {
-		reinit_completion(&dm_device.waiter_event);
-		while (!mutex_trylock(&dm_device.ha_region_mutex))
-			wait_for_completion(&dm_device.waiter_event);
-	} else {
-		mutex_lock(&dm_device.ha_region_mutex);
-	}
+	mutex_lock(&dm_device.ha_region_mutex);
 }
 
-static void release_region_mutex(bool trylock)
+static void release_region_mutex(void)
 {
-	if (trylock) {
-		mutex_unlock(&dm_device.ha_region_mutex);
-	} else {
-		mutex_unlock(&dm_device.ha_region_mutex);
-		complete(&dm_device.waiter_event);
-	}
+	mutex_unlock(&dm_device.ha_region_mutex);
 }
 
 static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
@@ -580,12 +568,12 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 {
 	switch (val) {
 	case MEM_GOING_ONLINE:
-		acquire_region_mutex(true);
+		acquire_region_mutex();
 		break;
 
 	case MEM_ONLINE:
 	case MEM_CANCEL_ONLINE:
-		release_region_mutex(true);
+		release_region_mutex();
 		if (dm_device.ha_waiting) {
 			dm_device.ha_waiting = false;
 			complete(&dm_device.ol_waitevent);
@@ -646,7 +634,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		init_completion(&dm_device.ol_waitevent);
 		dm_device.ha_waiting = true;
 
-		release_region_mutex(false);
+		release_region_mutex();
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
 				(HA_CHUNK << PAGE_SHIFT));
@@ -675,7 +663,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		 * have not been "onlined" within the allowed time.
 		 */
 		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		acquire_region_mutex(false);
+		acquire_region_mutex();
 		post_status(&dm_device);
 	}
 
@@ -886,7 +874,7 @@ static void hot_add_req(struct work_struct *dummy)
 	resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-	acquire_region_mutex(false);
+	acquire_region_mutex();
 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -918,7 +906,7 @@ static void hot_add_req(struct work_struct *dummy)
 	if (do_hot_add)
 		resp.page_count = process_hot_add(pg_start, pfn_cnt,
 						rg_start, rg_sz);
-	release_region_mutex(false);
+	release_region_mutex();
 #endif
 	/*
 	 * The result field of the response structure has the
@@ -1439,7 +1427,6 @@ static int balloon_probe(struct hv_device *dev,
 	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
-	init_completion(&dm_device.waiter_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
 	mutex_init(&dm_device.ha_region_mutex);
 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in acquire/release_region_mutex
  2015-02-17 15:20 [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in acquire/release_region_mutex Vitaly Kuznetsov
@ 2015-02-17 15:42 ` KY Srinivasan
  2015-02-18 11:18 ` [PATCH v2] " Vitaly Kuznetsov
  1 sibling, 0 replies; 3+ messages in thread
From: KY Srinivasan @ 2015-02-17 15:42 UTC (permalink / raw)
  To: Vitaly Kuznetsov, devel; +Cc: Haiyang Zhang, linux-kernel, Dexuan Cui



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Tuesday, February 17, 2015 7:20 AM
> To: KY Srinivasan; devel@linuxdriverproject.org
> Cc: Haiyang Zhang; linux-kernel@vger.kernel.org; Dexuan Cui
> Subject: [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in
> acquire/release_region_mutex
> 
> When many memory regions are being added and automatically onlined the
> following lockup is sometimes observed:
> 
> INFO: task udevd:1872 blocked for more than 120 seconds.
> ...
> Call Trace:
>  [<ffffffff816ec0bc>] schedule_timeout+0x22c/0x350  [<ffffffff816eb98f>]
> wait_for_common+0x10f/0x160  [<ffffffff81067650>] ?
> default_wake_function+0x0/0x20  [<ffffffff816eb9fd>]
> wait_for_completion+0x1d/0x20  [<ffffffff8144cb9c>]
> hv_memory_notifier+0xdc/0x120  [<ffffffff816f298c>]
> notifier_call_chain+0x4c/0x70 ...
> 
> When several memory blocks are going online simultaneously we got several
> hv_memory_notifier() trying to acquire the ha_region_mutex. When this
> mutex is being held by hot_add_req() all these competing
> acquire_region_mutex() do mutex_trylock, fail, and queue themselves into
> wait_for_completion(..). However when we do complete() from
> release_region_mutex() only one of them wakes up.
> This could be solved by changing complete() -> complete_all() memory
> onlining can be delayed as well, in that case we can still get several
> hv_memory_notifier() runners at the same time trying to grab the mutex.
> Only one of them will succeed and the others will hang for forever as
> complete() is not being called. We don't see this issue often because we
> have 5sec onlining timeout in hv_mem_hot_add() and usually all udev
> events arrive in this time frame.
> 
> Get rid of the trylock path, waiting on the mutex is supposed to provide the
> required serialization.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  drivers/hv/hv_balloon.c | 33 ++++++++++-----------------------
>  1 file changed, 10 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index
> ff16938..094de89 100644
> --- a/drivers/hv/hv_balloon.c
> +++ b/drivers/hv/hv_balloon.c
> @@ -534,7 +534,6 @@ struct hv_dynmem_device {
>  	struct task_struct *thread;
> 
>  	struct mutex ha_region_mutex;
> -	struct completion waiter_event;
> 
>  	/*
>  	 * A list of hot-add regions.
> @@ -554,25 +553,14 @@ static struct hv_dynmem_device dm_device;  static
> void post_status(struct hv_dynmem_device *dm);
> 
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -static void acquire_region_mutex(bool trylock)
> +static void acquire_region_mutex(void)
>  {
> -	if (trylock) {
> -		reinit_completion(&dm_device.waiter_event);
> -		while (!mutex_trylock(&dm_device.ha_region_mutex))
> -			wait_for_completion(&dm_device.waiter_event);
> -	} else {
> -		mutex_lock(&dm_device.ha_region_mutex);
> -	}
> +	mutex_lock(&dm_device.ha_region_mutex);
>  }

Why have the wrapper; get rid of it and use mutex_lock directly.
> 
> -static void release_region_mutex(bool trylock)
> +static void release_region_mutex(void)
>  {
> -	if (trylock) {
> -		mutex_unlock(&dm_device.ha_region_mutex);
> -	} else {
> -		mutex_unlock(&dm_device.ha_region_mutex);
> -		complete(&dm_device.waiter_event);
> -	}
> +	mutex_unlock(&dm_device.ha_region_mutex);
>  }
>
No wrapper needed.
 
>  static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
> @@ -580,12 +568,12 @@ static int hv_memory_notifier(struct notifier_block
> *nb, unsigned long val,  {
>  	switch (val) {
>  	case MEM_GOING_ONLINE:
> -		acquire_region_mutex(true);
> +		acquire_region_mutex();
>  		break;
> 
>  	case MEM_ONLINE:
>  	case MEM_CANCEL_ONLINE:
> -		release_region_mutex(true);
> +		release_region_mutex();
>  		if (dm_device.ha_waiting) {
>  			dm_device.ha_waiting = false;
>  			complete(&dm_device.ol_waitevent);
> @@ -646,7 +634,7 @@ static void hv_mem_hot_add(unsigned long start,
> unsigned long size,
>  		init_completion(&dm_device.ol_waitevent);
>  		dm_device.ha_waiting = true;
> 
> -		release_region_mutex(false);
> +		release_region_mutex();
>  		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
>  		ret = add_memory(nid, PFN_PHYS((start_pfn)),
>  				(HA_CHUNK << PAGE_SHIFT));
> @@ -675,7 +663,7 @@ static void hv_mem_hot_add(unsigned long start,
> unsigned long size,
>  		 * have not been "onlined" within the allowed time.
>  		 */
>  		wait_for_completion_timeout(&dm_device.ol_waitevent,
> 5*HZ);
> -		acquire_region_mutex(false);
> +		acquire_region_mutex();
>  		post_status(&dm_device);
>  	}
> 
> @@ -886,7 +874,7 @@ static void hot_add_req(struct work_struct *dummy)
>  	resp.hdr.size = sizeof(struct dm_hot_add_response);
> 
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -	acquire_region_mutex(false);
> +	acquire_region_mutex();
>  	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
>  	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
> 
> @@ -918,7 +906,7 @@ static void hot_add_req(struct work_struct *dummy)
>  	if (do_hot_add)
>  		resp.page_count = process_hot_add(pg_start, pfn_cnt,
>  						rg_start, rg_sz);
> -	release_region_mutex(false);
> +	release_region_mutex();
>  #endif
>  	/*
>  	 * The result field of the response structure has the @@ -1439,7
> +1427,6 @@ static int balloon_probe(struct hv_device *dev,
>  	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
>  	init_completion(&dm_device.host_event);
>  	init_completion(&dm_device.config_event);
> -	init_completion(&dm_device.waiter_event);
>  	INIT_LIST_HEAD(&dm_device.ha_region_list);
>  	mutex_init(&dm_device.ha_region_mutex);
>  	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
> --
> 1.9.3

Thanks,

K. Y

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2] Drivers: hv: hv_balloon: eliminate the trylock path in acquire/release_region_mutex
  2015-02-17 15:20 [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in acquire/release_region_mutex Vitaly Kuznetsov
  2015-02-17 15:42 ` KY Srinivasan
@ 2015-02-18 11:18 ` Vitaly Kuznetsov
  1 sibling, 0 replies; 3+ messages in thread
From: Vitaly Kuznetsov @ 2015-02-18 11:18 UTC (permalink / raw)
  To: K. Y. Srinivasan, devel; +Cc: Haiyang Zhang, linux-kernel, Dexuan Cui

When many memory regions are being added and automatically onlined the
following lockup is sometimes observed:

INFO: task udevd:1872 blocked for more than 120 seconds.
...
Call Trace:
 [<ffffffff816ec0bc>] schedule_timeout+0x22c/0x350
 [<ffffffff816eb98f>] wait_for_common+0x10f/0x160
 [<ffffffff81067650>] ? default_wake_function+0x0/0x20
 [<ffffffff816eb9fd>] wait_for_completion+0x1d/0x20
 [<ffffffff8144cb9c>] hv_memory_notifier+0xdc/0x120
 [<ffffffff816f298c>] notifier_call_chain+0x4c/0x70
...

When several memory blocks are going online simultaneously we got several
hv_memory_notifier() trying to acquire the ha_region_mutex. When this mutex is
being held by hot_add_req() all these competing acquire_region_mutex() do
mutex_trylock, fail, and queue themselves into wait_for_completion(..). However
when we do complete() from release_region_mutex() only one of them wakes up.
This could be solved by changing complete() -> complete_all() memory onlining
can be delayed as well, in that case we can still get several
hv_memory_notifier() runners at the same time trying to grab the mutex.
Only one of them will succeed and the others will hang for forever as
complete() is not being called. We don't see this issue often because we have
5sec onlining timeout in hv_mem_hot_add() and usually all udev events arrive
in this time frame.

Get rid of the trylock path, waiting on the mutex is supposed to provide the
required serialization.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
Changes since v1:
- Removed redundant wrappers [K. Y. Srinivasan]
---
 drivers/hv/hv_balloon.c | 35 ++++++-----------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index ff16938..a095b70 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -534,7 +534,6 @@ struct hv_dynmem_device {
 	struct task_struct *thread;
 
 	struct mutex ha_region_mutex;
-	struct completion waiter_event;
 
 	/*
 	 * A list of hot-add regions.
@@ -554,38 +553,17 @@ static struct hv_dynmem_device dm_device;
 static void post_status(struct hv_dynmem_device *dm);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static void acquire_region_mutex(bool trylock)
-{
-	if (trylock) {
-		reinit_completion(&dm_device.waiter_event);
-		while (!mutex_trylock(&dm_device.ha_region_mutex))
-			wait_for_completion(&dm_device.waiter_event);
-	} else {
-		mutex_lock(&dm_device.ha_region_mutex);
-	}
-}
-
-static void release_region_mutex(bool trylock)
-{
-	if (trylock) {
-		mutex_unlock(&dm_device.ha_region_mutex);
-	} else {
-		mutex_unlock(&dm_device.ha_region_mutex);
-		complete(&dm_device.waiter_event);
-	}
-}
-
 static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 			      void *v)
 {
 	switch (val) {
 	case MEM_GOING_ONLINE:
-		acquire_region_mutex(true);
+		mutex_lock(&dm_device.ha_region_mutex);
 		break;
 
 	case MEM_ONLINE:
 	case MEM_CANCEL_ONLINE:
-		release_region_mutex(true);
+		mutex_unlock(&dm_device.ha_region_mutex);
 		if (dm_device.ha_waiting) {
 			dm_device.ha_waiting = false;
 			complete(&dm_device.ol_waitevent);
@@ -646,7 +624,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		init_completion(&dm_device.ol_waitevent);
 		dm_device.ha_waiting = true;
 
-		release_region_mutex(false);
+		mutex_unlock(&dm_device.ha_region_mutex);
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
 				(HA_CHUNK << PAGE_SHIFT));
@@ -675,7 +653,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		 * have not been "onlined" within the allowed time.
 		 */
 		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		acquire_region_mutex(false);
+		mutex_lock(&dm_device.ha_region_mutex);
 		post_status(&dm_device);
 	}
 
@@ -886,7 +864,7 @@ static void hot_add_req(struct work_struct *dummy)
 	resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-	acquire_region_mutex(false);
+	mutex_lock(&dm_device.ha_region_mutex);
 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -918,7 +896,7 @@ static void hot_add_req(struct work_struct *dummy)
 	if (do_hot_add)
 		resp.page_count = process_hot_add(pg_start, pfn_cnt,
 						rg_start, rg_sz);
-	release_region_mutex(false);
+	mutex_unlock(&dm_device.ha_region_mutex);
 #endif
 	/*
 	 * The result field of the response structure has the
@@ -1439,7 +1417,6 @@ static int balloon_probe(struct hv_device *dev,
 	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
-	init_completion(&dm_device.waiter_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
 	mutex_init(&dm_device.ha_region_mutex);
 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-02-18 11:18 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-17 15:20 [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in acquire/release_region_mutex Vitaly Kuznetsov
2015-02-17 15:42 ` KY Srinivasan
2015-02-18 11:18 ` [PATCH v2] " Vitaly Kuznetsov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.