All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Jan Beulich <jbeulich@suse.com>,
	Juergen Gross <jgross@suse.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.19 36/95] xen/balloon: use a kernel thread instead a workqueue
Date: Mon,  4 Oct 2021 14:52:06 +0200	[thread overview]
Message-ID: <20211004125034.753338888@linuxfoundation.org> (raw)
In-Reply-To: <20211004125033.572932188@linuxfoundation.org>

From: Juergen Gross <jgross@suse.com>

[ Upstream commit 8480ed9c2bbd56fc86524998e5f2e3e22f5038f6 ]

Today the Xen ballooning is done via delayed work in a workqueue. This
might result in workqueue hangups being reported in case of large
amounts of memory are being ballooned in one go (here 16GB):

BUG: workqueue lockup - pool cpus=6 node=0 flags=0x0 nice=0 stuck for 64s!
Showing busy workqueues and worker pools:
workqueue events: flags=0x0
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=2/256 refcnt=3
    in-flight: 229:balloon_process
    pending: cache_reap
workqueue events_freezable_power_: flags=0x84
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
    pending: disk_events_workfn
workqueue mm_percpu_wq: flags=0x8
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
    pending: vmstat_update
pool 12: cpus=6 node=0 flags=0x0 nice=0 hung=64s workers=3 idle: 2222 43

This can easily be avoided by using a dedicated kernel thread for doing
the ballooning work.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20210827123206.15429-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/xen/balloon.c | 62 +++++++++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index b23edf64c2b2..643dbe5620e8 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -43,6 +43,8 @@
 #include <linux/sched.h>
 #include <linux/cred.h>
 #include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/pagemap.h>
@@ -120,7 +122,7 @@ static struct ctl_table xen_root[] = {
 #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
 
 /*
- * balloon_process() state:
+ * balloon_thread() state:
  *
  * BP_DONE: done or nothing to do,
  * BP_WAIT: wait to be rescheduled,
@@ -135,6 +137,8 @@ enum bp_state {
 	BP_ECANCELED
 };
 
+/* Main waiting point for xen-balloon thread. */
+static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
 
 static DEFINE_MUTEX(balloon_mutex);
 
@@ -149,10 +153,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
 static LIST_HEAD(ballooned_pages);
 static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 
-/* Main work function, always executed in process context. */
-static void balloon_process(struct work_struct *work);
-static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -383,7 +383,7 @@ static void xen_online_page(struct page *page)
 static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
 {
 	if (val == MEM_ONLINE)
-		schedule_delayed_work(&balloon_worker, 0);
+		wake_up(&balloon_thread_wq);
 
 	return NOTIFY_OK;
 }
@@ -508,18 +508,43 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 }
 
 /*
- * As this is a work item it is guaranteed to run as a single instance only.
+ * Stop waiting if either state is not BP_EAGAIN and ballooning action is
+ * needed, or if the credit has changed while state is BP_EAGAIN.
+ */
+static bool balloon_thread_cond(enum bp_state state, long credit)
+{
+	if (state != BP_EAGAIN)
+		credit = 0;
+
+	return current_credit() != credit || kthread_should_stop();
+}
+
+/*
+ * As this is a kthread it is guaranteed to run as a single instance only.
  * We may of course race updates of the target counts (which are protected
  * by the balloon lock), or with changes to the Xen hard limit, but we will
  * recover from these in time.
  */
-static void balloon_process(struct work_struct *work)
+static int balloon_thread(void *unused)
 {
 	enum bp_state state = BP_DONE;
 	long credit;
+	unsigned long timeout;
+
+	set_freezable();
+	for (;;) {
+		if (state == BP_EAGAIN)
+			timeout = balloon_stats.schedule_delay * HZ;
+		else
+			timeout = 3600 * HZ;
+		credit = current_credit();
 
+		wait_event_interruptible_timeout(balloon_thread_wq,
+				 balloon_thread_cond(state, credit), timeout);
+
+		if (kthread_should_stop())
+			return 0;
 
-	do {
 		mutex_lock(&balloon_mutex);
 
 		credit = current_credit();
@@ -546,12 +571,7 @@ static void balloon_process(struct work_struct *work)
 		mutex_unlock(&balloon_mutex);
 
 		cond_resched();
-
-	} while (credit && state == BP_DONE);
-
-	/* Schedule more work if there is some still to be done. */
-	if (state == BP_EAGAIN)
-		schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
+	}
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -559,7 +579,7 @@ void balloon_set_new_target(unsigned long target)
 {
 	/* No need for lock. Not read-modify-write updates. */
 	balloon_stats.target_pages = target;
-	schedule_delayed_work(&balloon_worker, 0);
+	wake_up(&balloon_thread_wq);
 }
 EXPORT_SYMBOL_GPL(balloon_set_new_target);
 
@@ -664,7 +684,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
 
 	/* The balloon may be too large now. Shrink it if needed. */
 	if (current_credit())
-		schedule_delayed_work(&balloon_worker, 0);
+		wake_up(&balloon_thread_wq);
 
 	mutex_unlock(&balloon_mutex);
 }
@@ -698,6 +718,8 @@ static void __init balloon_add_region(unsigned long start_pfn,
 
 static int __init balloon_init(void)
 {
+	struct task_struct *task;
+
 	if (!xen_domain())
 		return -ENODEV;
 
@@ -741,6 +763,12 @@ static int __init balloon_init(void)
 	}
 #endif
 
+	task = kthread_run(balloon_thread, NULL, "xen-balloon");
+	if (IS_ERR(task)) {
+		pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
+		return PTR_ERR(task);
+	}
+
 	/* Init the xen-balloon driver. */
 	xen_balloon_init();
 
-- 
2.33.0




  parent reply	other threads:[~2021-10-04 13:09 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-04 12:51 [PATCH 4.19 00/95] 4.19.209-rc1 review Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 01/95] ocfs2: drop acl cache for directories too Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 02/95] usb: gadget: r8a66597: fix a loop in set_feature() Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 03/95] usb: dwc2: gadget: Fix ISOC transfer complete handling for DDMA Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 04/95] usb: musb: tusb6010: uninitialized data in tusb_fifo_write_unaligned() Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 05/95] cifs: fix incorrect check for null pointer in header_assemble Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 06/95] xen/x86: fix PV trap handling on secondary processors Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 07/95] usb-storage: Add quirk for ScanLogic SL11R-IDE older than 2.6c Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 08/95] USB: serial: cp210x: add ID for GW Instek GDM-834x Digital Multimeter Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 09/95] USB: cdc-acm: fix minor-number release Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 10/95] staging: greybus: uart: fix tty use after free Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 11/95] Re-enable UAS for LaCie Rugged USB3-FW with fk quirk Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 12/95] USB: serial: mos7840: remove duplicated 0xac24 device ID Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 13/95] USB: serial: option: add Telit LN920 compositions Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 14/95] USB: serial: option: remove duplicate USB device ID Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 15/95] USB: serial: option: add device id for Foxconn T99W265 Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 16/95] mcb: fix error handling in mcb_alloc_bus() Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 17/95] serial: mvebu-uart: fix drivers tx_empty callback Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 18/95] net: hso: fix muxed tty registration Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 19/95] bnxt_en: Fix TX timeout when TX ring size is set to the smallest Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 20/95] net/smc: add missing error check in smc_clc_prfx_set() Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 21/95] gpio: uniphier: Fix void functions to remove return value Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 22/95] net/mlx4_en: Dont allow aRFS for encapsulated packets Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 23/95] scsi: iscsi: Adjust iface sysfs attr detection Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 24/95] tty: synclink_gt, drop unneeded forward declarations Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 25/95] tty: synclink_gt: rename a conflicting function name Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 26/95] fpga: machxo2-spi: Return an error on failure Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 27/95] fpga: machxo2-spi: Fix missing error code in machxo2_write_complete() Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 28/95] thermal/core: Potential buffer overflow in thermal_build_list_of_policies() Greg Kroah-Hartman
2021-10-04 12:51 ` [PATCH 4.19 29/95] irqchip/goldfish-pic: Select GENERIC_IRQ_CHIP to fix build Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 30/95] irqchip/gic-v3-its: Fix potential VPE leak on error Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 31/95] md: fix a lock order reversal in md_alloc Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 32/95] blktrace: Fix uaf in blk_trace access after removing by sysfs Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 33/95] net: macb: fix use after free on rmmod Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 34/95] net: stmmac: allow CSR clock of 300MHz Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 35/95] m68k: Double cast io functions to unsigned long Greg Kroah-Hartman
2021-10-04 12:52 ` Greg Kroah-Hartman [this message]
2021-10-04 12:52 ` [PATCH 4.19 37/95] nvme-multipath: fix ANA state updates when a namespace is not present Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 38/95] compiler.h: Introduce absolute_pointer macro Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 39/95] net: i825xx: Use absolute_pointer for memcpy from fixed memory location Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 40/95] sparc: avoid stringop-overread errors Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 41/95] qnx4: " Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 42/95] parisc: Use absolute_pointer() to define PAGE0 Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 43/95] arm64: Mark __stack_chk_guard as __ro_after_init Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 44/95] alpha: Declare virt_to_phys and virt_to_bus parameter as pointer to volatile Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 45/95] net: 6pack: Fix tx timeout and slot time Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 46/95] spi: Fix tegra20 build with CONFIG_PM=n Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 47/95] erofs: fix up erofs_lookup tracepoint Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 48/95] arm64: dts: marvell: armada-37xx: Extend PCIe MEM space Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 49/95] PCI: aardvark: Fix checking for PIO status Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 50/95] tcp: address problems caused by EDT misshaps Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 51/95] tcp: always set retrans_stamp on recovery Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 52/95] tcp: create a helper to model exponential backoff Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 53/95] tcp: adjust rto_base in retransmits_timed_out() Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 54/95] xen/balloon: fix balloon kthread freezing Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 55/95] qnx4: work around gcc false positive warning bug Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 56/95] tty: Fix out-of-bound vmalloc access in imageblit Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 57/95] cpufreq: schedutil: Use kobject release() method to free sugov_tunables Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 58/95] cpufreq: schedutil: Destroy mutex before kobject_put() frees the memory Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 59/95] mac80211: fix use-after-free in CCMP/GCMP RX Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 60/95] x86/kvmclock: Move this_cpu_pvti into kvmclock.h Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 61/95] drm/amd/display: Pass PCI deviceid into DC Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 62/95] ipvs: check that ip_vs_conn_tab_bits is between 8 and 20 Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 63/95] hwmon: (mlxreg-fan) Return non-zero value when fan current state is enforced from sysfs Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 64/95] mac80211: Fix ieee80211_amsdu_aggregate frag_tail bug Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 65/95] mac80211: limit injected vht mcs/nss in ieee80211_parse_tx_radiotap Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 66/95] sctp: break out if skb_header_pointer returns NULL in sctp_rcv_ootb Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 67/95] hwmon: (tmp421) Replace S_<PERMS> with octal values Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 68/95] hwmon: (tmp421) report /PVLD condition as fault Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 69/95] hwmon: (tmp421) fix rounding for negative values Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 70/95] e100: fix length calculation in e100_get_regs_len Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 71/95] e100: fix buffer overrun in e100_get_regs Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 72/95] Revert "block, bfq: honor already-setup queue merges" Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 73/95] scsi: csiostor: Add module softdep on cxgb4 Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 74/95] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 75/95] elf: dont use MAP_FIXED_NOREPLACE for elf interpreter mappings Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 76/95] ipack: ipoctal: fix stack information leak Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 77/95] ipack: ipoctal: fix tty registration race Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 78/95] ipack: ipoctal: fix tty-registration error handling Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 79/95] ipack: ipoctal: fix missing allocation-failure check Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 80/95] ipack: ipoctal: fix module reference leak Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 81/95] ext4: fix potential infinite loop in ext4_dx_readdir() Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 82/95] net: udp: annotate data race around udp_sk(sk)->corkflag Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 83/95] EDAC/synopsys: Fix wrong value type assignment for edac_mode Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 84/95] ARM: 9077/1: PLT: Move struct plt_entries definition to header Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 85/95] ARM: 9078/1: Add warn suppress parameter to arm_gen_branch_link() Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 86/95] ARM: 9079/1: ftrace: Add MODULE_PLTS support Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 87/95] ARM: 9098/1: ftrace: MODULE_PLT: Fix build problem without DYNAMIC_FTRACE Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 88/95] hso: fix bailout in error case of probe Greg Kroah-Hartman
2021-10-04 12:52 ` [PATCH 4.19 89/95] usb: hso: fix error handling code of hso_create_net_device Greg Kroah-Hartman
2021-10-04 12:53 ` [PATCH 4.19 90/95] usb: hso: remove the bailout parameter Greg Kroah-Hartman
2021-10-04 12:53 ` [PATCH 4.19 91/95] crypto: ccp - fix resource leaks in ccp_run_aes_gcm_cmd() Greg Kroah-Hartman
2021-10-04 12:53 ` [PATCH 4.19 92/95] HID: betop: fix slab-out-of-bounds Write in betop_probe Greg Kroah-Hartman
2021-10-04 12:53 ` [PATCH 4.19 93/95] netfilter: ipset: Fix oversized kvmalloc() calls Greg Kroah-Hartman
2021-10-04 12:53 ` [PATCH 4.19 94/95] HID: usbhid: free raw_report buffers in usbhid_stop Greg Kroah-Hartman
2021-10-04 12:53 ` [PATCH 4.19 95/95] net: mdiobus: Fix memory leak in __mdiobus_register Greg Kroah-Hartman
2021-10-04 17:40 ` [PATCH 4.19 00/95] 4.19.209-rc1 review Naresh Kamboju
2021-10-04 17:44   ` Eric Dumazet
2021-10-04 19:49     ` Shuah Khan
2021-10-04 20:34       ` Shuah Khan
2021-10-05  6:47     ` Greg Kroah-Hartman
2021-10-04 17:52   ` Pavel Machek
2021-10-05  2:15 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211004125034.753338888@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=boris.ostrovsky@oracle.com \
    --cc=jbeulich@suse.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.