All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xen/balloon: use a kernel thread instead a workqueue
@ 2021-08-27 12:32 Juergen Gross
  2021-09-08 14:47 ` Boris Ostrovsky
  0 siblings, 1 reply; 4+ messages in thread
From: Juergen Gross @ 2021-08-27 12:32 UTC (permalink / raw)
  To: xen-devel, linux-kernel
  Cc: Juergen Gross, Boris Ostrovsky, Stefano Stabellini, Jan Beulich

Today the Xen ballooning is done via delayed work in a workqueue. This
might result in workqueue hangups being reported in case of large
amounts of memory are being ballooned in one go (here 16GB):

BUG: workqueue lockup - pool cpus=6 node=0 flags=0x0 nice=0 stuck for 64s!
Showing busy workqueues and worker pools:
workqueue events: flags=0x0
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=2/256 refcnt=3
    in-flight: 229:balloon_process
    pending: cache_reap
workqueue events_freezable_power_: flags=0x84
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
    pending: disk_events_workfn
workqueue mm_percpu_wq: flags=0x8
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
    pending: vmstat_update
pool 12: cpus=6 node=0 flags=0x0 nice=0 hung=64s workers=3 idle: 2222 43

This can easily be avoided by using a dedicated kernel thread for doing
the ballooning work.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/balloon.c | 62 +++++++++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 671c71245a7b..2d2803883306 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -43,6 +43,8 @@
 #include <linux/sched.h>
 #include <linux/cred.h>
 #include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/memblock.h>
 #include <linux/pagemap.h>
@@ -115,7 +117,7 @@ static struct ctl_table xen_root[] = {
 #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
 
 /*
- * balloon_process() state:
+ * balloon_thread() state:
  *
  * BP_DONE: done or nothing to do,
  * BP_WAIT: wait to be rescheduled,
@@ -130,6 +132,8 @@ enum bp_state {
 	BP_ECANCELED
 };
 
+/* Main waiting point for xen-balloon thread. */
+static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
 
 static DEFINE_MUTEX(balloon_mutex);
 
@@ -144,10 +148,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
 static LIST_HEAD(ballooned_pages);
 static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 
-/* Main work function, always executed in process context. */
-static void balloon_process(struct work_struct *work);
-static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -366,7 +366,7 @@ static void xen_online_page(struct page *page, unsigned int order)
 static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
 {
 	if (val == MEM_ONLINE)
-		schedule_delayed_work(&balloon_worker, 0);
+		wake_up(&balloon_thread_wq);
 
 	return NOTIFY_OK;
 }
@@ -491,18 +491,43 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 }
 
 /*
- * As this is a work item it is guaranteed to run as a single instance only.
+ * Stop waiting if either state is not BP_EAGAIN and ballooning action is
+ * needed, or if the credit has changed while state is BP_EAGAIN.
+ */
+static bool balloon_thread_cond(enum bp_state state, long credit)
+{
+	if (state != BP_EAGAIN)
+		credit = 0;
+
+	return current_credit() != credit || kthread_should_stop();
+}
+
+/*
+ * As this is a kthread it is guaranteed to run as a single instance only.
  * We may of course race updates of the target counts (which are protected
  * by the balloon lock), or with changes to the Xen hard limit, but we will
  * recover from these in time.
  */
-static void balloon_process(struct work_struct *work)
+static int balloon_thread(void *unused)
 {
 	enum bp_state state = BP_DONE;
 	long credit;
+	unsigned long timeout;
+
+	set_freezable();
+	for (;;) {
+		if (state == BP_EAGAIN)
+			timeout = balloon_stats.schedule_delay * HZ;
+		else
+			timeout = 3600 * HZ;
+		credit = current_credit();
 
+		wait_event_interruptible_timeout(balloon_thread_wq,
+				 balloon_thread_cond(state, credit), timeout);
+
+		if (kthread_should_stop())
+			return 0;
 
-	do {
 		mutex_lock(&balloon_mutex);
 
 		credit = current_credit();
@@ -529,12 +554,7 @@ static void balloon_process(struct work_struct *work)
 		mutex_unlock(&balloon_mutex);
 
 		cond_resched();
-
-	} while (credit && state == BP_DONE);
-
-	/* Schedule more work if there is some still to be done. */
-	if (state == BP_EAGAIN)
-		schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
+	}
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -542,7 +562,7 @@ void balloon_set_new_target(unsigned long target)
 {
 	/* No need for lock. Not read-modify-write updates. */
 	balloon_stats.target_pages = target;
-	schedule_delayed_work(&balloon_worker, 0);
+	wake_up(&balloon_thread_wq);
 }
 EXPORT_SYMBOL_GPL(balloon_set_new_target);
 
@@ -647,7 +667,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
 
 	/* The balloon may be too large now. Shrink it if needed. */
 	if (current_credit())
-		schedule_delayed_work(&balloon_worker, 0);
+		wake_up(&balloon_thread_wq);
 
 	mutex_unlock(&balloon_mutex);
 }
@@ -679,6 +699,8 @@ static void __init balloon_add_region(unsigned long start_pfn,
 
 static int __init balloon_init(void)
 {
+	struct task_struct *task;
+
 	if (!xen_domain())
 		return -ENODEV;
 
@@ -722,6 +744,12 @@ static int __init balloon_init(void)
 	}
 #endif
 
+	task = kthread_run(balloon_thread, NULL, "xen-balloon");
+	if (IS_ERR(task)) {
+		pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
+		return PTR_ERR(task);
+	}
+
 	/* Init the xen-balloon driver. */
 	xen_balloon_init();
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] xen/balloon: use a kernel thread instead a workqueue
  2021-08-27 12:32 [PATCH] xen/balloon: use a kernel thread instead a workqueue Juergen Gross
@ 2021-09-08 14:47 ` Boris Ostrovsky
  2021-09-08 15:11   ` Juergen Gross
  0 siblings, 1 reply; 4+ messages in thread
From: Boris Ostrovsky @ 2021-09-08 14:47 UTC (permalink / raw)
  To: Juergen Gross, xen-devel, linux-kernel; +Cc: Stefano Stabellini, Jan Beulich


On 8/27/21 8:32 AM, Juergen Gross wrote:
> +static bool balloon_thread_cond(enum bp_state state, long credit)
> +{
> +	if (state != BP_EAGAIN)
> +		credit = 0;
> +
> +	return current_credit() != credit || kthread_should_stop();
> +}
> +
> +/*
> + * As this is a kthread it is guaranteed to run as a single instance only.
>   * We may of course race updates of the target counts (which are protected
>   * by the balloon lock), or with changes to the Xen hard limit, but we will
>   * recover from these in time.
>   */
> -static void balloon_process(struct work_struct *work)
> +static int balloon_thread(void *unused)
>  {
>  	enum bp_state state = BP_DONE;
>  	long credit;
> +	unsigned long timeout;
> +
> +	set_freezable();
> +	for (;;) {
> +		if (state == BP_EAGAIN)
> +			timeout = balloon_stats.schedule_delay * HZ;
> +		else
> +			timeout = 3600 * HZ;
> +		credit = current_credit();
>  
> +		wait_event_interruptible_timeout(balloon_thread_wq,
> +				 balloon_thread_cond(state, credit), timeout);


Given that wait_event_interruptible_timeout() is a bunch of nested macros do we need to worry here about overly aggressive compiler optimizing out 'credit = current_credit()'?


-boris



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xen/balloon: use a kernel thread instead a workqueue
  2021-09-08 14:47 ` Boris Ostrovsky
@ 2021-09-08 15:11   ` Juergen Gross
  2021-09-08 16:05     ` Boris Ostrovsky
  0 siblings, 1 reply; 4+ messages in thread
From: Juergen Gross @ 2021-09-08 15:11 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: Stefano Stabellini, Jan Beulich


[-- Attachment #1.1.1: Type: text/plain, Size: 1588 bytes --]

On 08.09.21 16:47, Boris Ostrovsky wrote:
> 
> On 8/27/21 8:32 AM, Juergen Gross wrote:
>> +static bool balloon_thread_cond(enum bp_state state, long credit)
>> +{
>> +	if (state != BP_EAGAIN)
>> +		credit = 0;
>> +
>> +	return current_credit() != credit || kthread_should_stop();
>> +}
>> +
>> +/*
>> + * As this is a kthread it is guaranteed to run as a single instance only.
>>    * We may of course race updates of the target counts (which are protected
>>    * by the balloon lock), or with changes to the Xen hard limit, but we will
>>    * recover from these in time.
>>    */
>> -static void balloon_process(struct work_struct *work)
>> +static int balloon_thread(void *unused)
>>   {
>>   	enum bp_state state = BP_DONE;
>>   	long credit;
>> +	unsigned long timeout;
>> +
>> +	set_freezable();
>> +	for (;;) {
>> +		if (state == BP_EAGAIN)
>> +			timeout = balloon_stats.schedule_delay * HZ;
>> +		else
>> +			timeout = 3600 * HZ;
>> +		credit = current_credit();
>>   
>> +		wait_event_interruptible_timeout(balloon_thread_wq,
>> +				 balloon_thread_cond(state, credit), timeout);
> 
> 
> Given that wait_event_interruptible_timeout() is a bunch of nested macros do we need to worry here about overly aggressive compiler optimizing out 'credit = current_credit()'?

I don't think so. current_credit() is reading from balloon_stats, which
is a global variable. So the compiler shouldn't assume the contents
won't change.

But I can add a barrier() after 'credit = current_credit()' in case
you'd feel uneasy without it.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xen/balloon: use a kernel thread instead a workqueue
  2021-09-08 15:11   ` Juergen Gross
@ 2021-09-08 16:05     ` Boris Ostrovsky
  0 siblings, 0 replies; 4+ messages in thread
From: Boris Ostrovsky @ 2021-09-08 16:05 UTC (permalink / raw)
  To: Juergen Gross, xen-devel, linux-kernel; +Cc: Stefano Stabellini, Jan Beulich


On 9/8/21 11:11 AM, Juergen Gross wrote:
> On 08.09.21 16:47, Boris Ostrovsky wrote:
>>
>>
>> Given that wait_event_interruptible_timeout() is a bunch of nested macros do we need to worry here about overly aggressive compiler optimizing out 'credit = current_credit()'?
>
> I don't think so. current_credit() is reading from balloon_stats, which
> is a global variable. So the compiler shouldn't assume the contents
> won't change.


Ah, ok --- good point. Then I guess we should be fine.


Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>


>
> But I can add a barrier() after 'credit = current_credit()' in case
> you'd feel uneasy without it.
>
>
> Juergen

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-09-08 16:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-27 12:32 [PATCH] xen/balloon: use a kernel thread instead a workqueue Juergen Gross
2021-09-08 14:47 ` Boris Ostrovsky
2021-09-08 15:11   ` Juergen Gross
2021-09-08 16:05     ` Boris Ostrovsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.