All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 23/27] libxfs: use PSI information to detect memory pressure
Date: Thu, 15 Oct 2020 18:21:51 +1100	[thread overview]
Message-ID: <20201015072155.1631135-24-david@fromorbit.com> (raw)
In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

The buffer cache needs to have a reliable trigger for shrinking
the cache. Modern kernels track and report memory pressure events to
the userspace via the Pressure Stall Interface (PSI). Create a PSI
memory pressure monitoring thread to listen for memory pressure
events and use that to drive buffer cache shrinking interfaces.

Add the shrinker framework that will allow us to implement LRU
reclaim of buffers when memory pressure occues.  We also create a
low memory detection and reclaim wait mechanism to allow use to
throttle back new allocations while we are shrinking the buffer
cache.

We also include malloc heap trimming callouts so that once the
shrinker frees the memory, we trim the malloc heap to release the
freed memory back to the system.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 libxfs/buftarg.c     | 142 ++++++++++++++++++++++++++++++++++++++++++-
 libxfs/xfs_buftarg.h |   9 +++
 2 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c
index 42806e433715..6c7142d41eb1 100644
--- a/libxfs/buftarg.c
+++ b/libxfs/buftarg.c
@@ -62,6 +62,128 @@ xfs_buftarg_setsize_early(
 	return xfs_buftarg_setsize(btp, bsize);
 }
 
+/*
+ * Scan a chunk of the buffer cache and drop LRU reference counts. If the
+ * count goes to zero, dispose of the buffer.
+ */
+static void
+xfs_buftarg_shrink(
+	struct xfs_buftarg	*btc)
+{
+	/*
+	 * Make the fact we are in memory reclaim externally visible. This
+	 * allows buffer cache allocation throttling while we are trying to
+	 * free memory.
+	 */
+	atomic_inc_return(&btc->bt_low_mem);
+
+	fprintf(stderr, "Got memory pressure event. Shrinking caches!\n");
+
+	/*
+	 * Now we've free a bunch of memory, trim the heap down to release the
+	 * freed memory back to the kernel and reduce the pressure we are
+	 * placing on the system.
+	 */
+	malloc_trim(0);
+
+	/*
+	 * Done, wake anyone waiting on memory reclaim to complete.
+	 */
+	atomic_dec_return(&btc->bt_low_mem);
+	complete(&btc->bt_low_mem_wait);
+}
+
+static void *
+xfs_buftarg_shrinker(
+	void			*args)
+{
+	struct xfs_buftarg	*btp = args;
+	struct pollfd		 fds = {
+		.fd = btp->bt_psi_fd,
+		.events = POLLPRI,
+	};
+
+	rcu_register_thread();
+	while (!btp->bt_exiting) {
+		int	n;
+
+		n = poll(&fds, 1, 100);
+		if (n == 0)
+			continue;	/* timeout */
+		if (n < 0) {
+			perror("poll(PSI)");
+			break;
+		}
+		if (fds.revents & POLLERR) {
+			fprintf(stderr,
+				"poll(psi) POLLERR: event source dead?\n");
+			break;
+		}
+		if (!(fds.revents & POLLPRI)) {
+			fprintf(stderr,
+				"poll(psi): unknown event.  Ignoring.\n");
+			continue;
+		}
+
+		/* run the shrinker here */
+		xfs_buftarg_shrink(btp);
+
+	}
+	rcu_unregister_thread();
+	return NULL;
+}
+
+/*
+ * This only picks up on global memory pressure. Maybe in future we can detect
+ * whether we are running inside a container and use the PSI information for the
+ * container.
+ *
+ * We want relatively early notification of memory pressure stalls because
+ * xfs_repair will consume lots of memory. Hence set a low trigger threshold for
+ * reclaim to run - a partial stall of 5ms over a 1s sample period will trigger
+ * reclaim algorithms.
+ */
+static int
+xfs_buftarg_mempressue_init(
+	struct xfs_buftarg	*btp)
+{
+	const char		*fname = "/proc/pressure/memory";
+	const char		*trigger = "some 10000 1000000";
+	int			error;
+
+	btp->bt_psi_fd = open(fname, O_RDWR | O_NONBLOCK);
+	if (btp->bt_psi_fd < 0) {
+		perror("open(PSI)");
+		return -errno;
+	}
+	if (write(btp->bt_psi_fd, trigger, strlen(trigger) + 1) !=
+						strlen(trigger) + 1) {
+		perror("write(PSI)");
+		error = -errno;
+		goto out_close;
+	}
+
+	atomic_set(&btp->bt_low_mem, 0);
+	init_completion(&btp->bt_low_mem_wait);
+
+	/*
+	 * Now create the monitoring reclaim thread. This will run until the
+	 * buftarg is torn down.
+	 */
+	error = pthread_create(&btp->bt_psi_tid, NULL,
+				xfs_buftarg_shrinker, btp);
+	if (error)
+		goto out_close;
+
+	return 0;
+
+out_close:
+	close(btp->bt_psi_fd);
+	btp->bt_psi_fd = -1;
+	return error;
+}
+
+
 struct xfs_buftarg *
 xfs_buftarg_alloc(
 	struct xfs_mount	*mp,
@@ -74,6 +196,8 @@ xfs_buftarg_alloc(
 	btp->bt_mount = mp;
 	btp->bt_fd = libxfs_device_to_fd(bdev);
 	btp->bt_bdev = bdev;
+	btp->bt_psi_fd = -1;
+	btp->bt_exiting = false;
 
 	if (xfs_buftarg_setsize_early(btp))
 		goto error_free;
@@ -84,8 +208,13 @@ xfs_buftarg_alloc(
 	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
 		goto error_lru;
 
+	if (xfs_buftarg_mempressue_init(btp))
+		goto error_pcp;
+
 	return btp;
 
+error_pcp:
+	percpu_counter_destroy(&btp->bt_io_count);
 error_lru:
 	list_lru_destroy(&btp->bt_lru);
 error_free:
@@ -97,6 +226,12 @@ void
 xfs_buftarg_free(
 	struct xfs_buftarg	*btp)
 {
+	btp->bt_exiting = true;
+	if (btp->bt_psi_tid)
+		pthread_join(btp->bt_psi_tid, NULL);
+	if (btp->bt_psi_fd >= 0)
+		close(btp->bt_psi_fd);
+
 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
 	percpu_counter_destroy(&btp->bt_io_count);
 	platform_flush_device(btp->bt_fd, btp->bt_bdev);
@@ -121,10 +256,15 @@ xfs_buf_allocate_memory(
 	struct xfs_buf		*bp,
 	uint			flags)
 {
+	struct xfs_buftarg	*btp = bp->b_target;
 	size_t			size;
 
+	/* Throttle allocation while dealing with low memory events */
+	while (atomic_read(&btp->bt_low_mem))
+		wait_for_completion(&btp->bt_low_mem_wait);
+
 	size = BBTOB(bp->b_length);
-	bp->b_addr = memalign(bp->b_target->bt_meta_sectorsize, size);
+	bp->b_addr = memalign(btp->bt_meta_sectorsize, size);
 	if (!bp->b_addr)
 		return -ENOMEM;
 	return 0;
diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h
index 798980fdafeb..d2ce47e22545 100644
--- a/libxfs/xfs_buftarg.h
+++ b/libxfs/xfs_buftarg.h
@@ -41,7 +41,16 @@ struct xfs_buftarg {
 
 	uint32_t		bt_io_count;
 	unsigned int		flags;
+
+	/*
+	 * Memory pressure (PSI) and cache reclaim infrastructure
+	 */
 	struct list_lru		bt_lru;
+	int			bt_psi_fd;
+	pthread_t		bt_psi_tid;
+	bool			bt_exiting;
+	bool			bt_low_mem;
+	struct completion	bt_low_mem_wait;
 };
 
 /* We purged a dirty buffer and lost a write. */
-- 
2.28.0


  parent reply	other threads:[~2020-10-15  7:22 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-15  7:21 [PATCH 00/27] [RFC, WIP] xfsprogs: xfs_buf unification and AIO Dave Chinner
2020-10-15  7:21 ` [PATCH 01/27] xfsprogs: remove unused buffer tracing code Dave Chinner
2020-10-15  7:21 ` [PATCH 02/27] xfsprogs: remove unused IO_DEBUG functionality Dave Chinner
2020-11-16  2:31   ` Eric Sandeen
2020-10-15  7:21 ` [PATCH 03/27] libxfs: get rid of b_bcount from xfs_buf Dave Chinner
2020-11-23 19:53   ` Eric Sandeen
2020-10-15  7:21 ` [PATCH 04/27] libxfs: rename buftarg->dev to btdev Dave Chinner
2020-11-16  2:33   ` Eric Sandeen
2020-10-15  7:21 ` [PATCH 05/27] xfsprogs: get rid of ancient btree tracing fragments Dave Chinner
2020-11-16  2:35   ` Eric Sandeen
2020-10-15  7:21 ` [PATCH 06/27] xfsprogs: remove xfs_buf_t typedef Dave Chinner
2020-10-15 15:22   ` Darrick J. Wong
2020-10-15 20:54     ` Dave Chinner
2020-10-15  7:21 ` [PATCH 07/27] xfsprogs: introduce liburcu support Dave Chinner
2020-10-15  7:21 ` [PATCH 08/27] libxfs: add spinlock_t wrapper Dave Chinner
2020-10-15  7:21 ` [PATCH 09/27] atomic: convert to uatomic Dave Chinner
2020-10-15  7:21 ` [PATCH 10/27] libxfs: add kernel-compatible completion API Dave Chinner
2020-10-15 17:09   ` Darrick J. Wong
2020-10-19 22:21     ` Dave Chinner
2020-10-15  7:21 ` [PATCH 11/27] libxfs: add wrappers for kernel semaphores Dave Chinner
2020-10-15  7:21 ` [PATCH 12/27] xfsprogs: convert use-once buffer reads to uncached IO Dave Chinner
2020-10-15 17:12   ` Darrick J. Wong
2020-10-19 22:36     ` Dave Chinner
2020-10-15  7:21 ` [PATCH 13/27] libxfs: introduce userspace buftarg infrastructure Dave Chinner
2020-10-15  7:21 ` [PATCH 14/27] xfs: rename libxfs_buftarg_init to libxfs_open_devices() Dave Chinner
2020-10-15  7:21 ` [PATCH 15/27] libxfs: introduce userspace buftarg infrastructure Dave Chinner
2020-10-15 17:16   ` Darrick J. Wong
2020-10-15  7:21 ` [PATCH 16/27] libxfs: add a synchronous IO engine to the buftarg Dave Chinner
2020-10-15  7:21 ` [PATCH 17/27] xfsprogs: convert libxfs_readbufr to libxfs_buf_read_uncached Dave Chinner
2020-10-15  7:21 ` [PATCH 18/27] libxfs: convert libxfs_bwrite to buftarg IO Dave Chinner
2020-10-15  7:21 ` [PATCH 19/27] libxfs: add cache infrastructure to buftarg Dave Chinner
2020-10-15  7:21 ` [PATCH 20/27] libxfs: add internal lru to btcache Dave Chinner
2020-10-15  7:21 ` [PATCH 21/27] libxfs: Add kernel list_lru wrapper Dave Chinner
2020-10-15  7:21 ` [PATCH 22/27] libxfs: introduce new buffer cache infrastructure Dave Chinner
2020-10-15 17:46   ` Darrick J. Wong
2020-10-15  7:21 ` Dave Chinner [this message]
2020-10-15 17:56   ` [PATCH 23/27] libxfs: use PSI information to detect memory pressure Darrick J. Wong
2020-10-15 21:20     ` Dave Chinner
2020-10-15  7:21 ` [PATCH 24/27] libxfs: add a buftarg cache shrinker implementation Dave Chinner
2020-10-15 18:01   ` Darrick J. Wong
2020-10-15 21:33     ` Dave Chinner
2020-10-15  7:21 ` [PATCH 25/27] libxfs: switch buffer cache implementations Dave Chinner
2020-10-15  7:21 ` [PATCH 26/27] build: set platform_defs.h.in dependency correctly Dave Chinner
2020-10-15  7:21 ` [PATCH 27/27] libxfs: convert sync IO buftarg engine to AIO Dave Chinner
2020-10-15 18:26   ` Darrick J. Wong
2020-10-15 21:42     ` Dave Chinner
2020-10-15  7:29 ` [PATCH 00/27] [RFC, WIP] xfsprogs: xfs_buf unification and AIO Dave Chinner
2020-10-15 18:37 ` Darrick J. Wong
2020-10-15 22:35   ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201015072155.1631135-24-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.