All of lore.kernel.org
 help / color / mirror / Atom feed
* RFC: buffer cache backing page allocation cleanup
@ 2021-05-19 19:08 Christoph Hellwig
  2021-05-19 19:08 ` [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map Christoph Hellwig
                   ` (10 more replies)
  0 siblings, 11 replies; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Hi all,

while reviewing the patch from Dave to use alloc_pages_bulk_array I
stumbled over all kinds of lose end in the buffer cache, and I started
cleaning them up while procrastinating and avoiding more urgent work,
and rebased the alloc_pages_bulk_array patch on top.

Only run through a quick group xfstests run on a 4k file system so far.

Diffstat:
 libxfs/xfs_ag.c |    1 
 xfs_buf.c       |  256 +++++++++++++++++++++-----------------------------------
 xfs_buf.h       |    3 
 3 files changed, 99 insertions(+), 161 deletions(-)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-20 23:43   ` Darrick J. Wong
  2021-05-19 19:08 ` [PATCH 02/11] xfs: split xfs_buf_allocate_memory Christoph Hellwig
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Use a single goto label for freeing the buffer and returning an error.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 592800c8852f45..80be0333f077c0 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -721,16 +721,12 @@ xfs_buf_get_map(
 		return error;
 
 	error = xfs_buf_allocate_memory(new_bp, flags);
-	if (error) {
-		xfs_buf_free(new_bp);
-		return error;
-	}
+	if (error)
+		goto out_free_buf;
 
 	error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
-	if (error) {
-		xfs_buf_free(new_bp);
-		return error;
-	}
+	if (error)
+		goto out_free_buf;
 
 	if (bp != new_bp)
 		xfs_buf_free(new_bp);
@@ -758,6 +754,9 @@ xfs_buf_get_map(
 	trace_xfs_buf_get(bp, flags, _RET_IP_);
 	*bpp = bp;
 	return 0;
+out_free_buf:
+	xfs_buf_free(new_bp);
+	return error;
 }
 
 int
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 02/11] xfs: split xfs_buf_allocate_memory
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
  2021-05-19 19:08 ` [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 22:36   ` Dave Chinner
  2021-05-19 19:08 ` [PATCH 03/11] xfs: remove ->b_offset handling for page backed buffers Christoph Hellwig
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Split xfs_buf_allocate_memory into one helper that allocates from
slab and one that allocates using the page allocator.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 83 +++++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 80be0333f077c0..ac85ec6f0a2fab 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -347,11 +347,41 @@ xfs_buf_free(
 	kmem_cache_free(xfs_buf_zone, bp);
 }
 
+static int
+xfs_buf_alloc_slab(
+	struct xfs_buf		*bp,
+	unsigned int		flags)
+{
+	struct xfs_buftarg	*btp = bp->b_target;
+	int			align = xfs_buftarg_dma_alignment(btp);
+	size_t			size = BBTOB(bp->b_length);
+	xfs_km_flags_t		km_flags = KM_ZERO;
+
+	if (!(flags & XBF_READ))
+		km_flags |= KM_ZERO;
+	bp->b_addr = kmem_alloc_io(size, align, km_flags);
+	if (!bp->b_addr)
+		return -ENOMEM;
+	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
+	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
+		/* b_addr spans two pages - use alloc_page instead */
+		kmem_free(bp->b_addr);
+		bp->b_addr = NULL;
+		return -ENOMEM;
+	}
+	bp->b_offset = offset_in_page(bp->b_addr);
+	bp->b_pages = bp->b_page_array;
+	bp->b_pages[0] = kmem_to_page(bp->b_addr);
+	bp->b_page_count = 1;
+	bp->b_flags |= _XBF_KMEM;
+	return 0;
+}
+
 /*
  * Allocates all the pages for buffer in question and builds it's page list.
  */
-STATIC int
-xfs_buf_allocate_memory(
+static int
+xfs_buf_alloc_pages(
 	struct xfs_buf		*bp,
 	uint			flags)
 {
@@ -361,47 +391,14 @@ xfs_buf_allocate_memory(
 	unsigned short		page_count, i;
 	xfs_off_t		start, end;
 	int			error;
-	xfs_km_flags_t		kmflag_mask = 0;
 
 	/*
 	 * assure zeroed buffer for non-read cases.
 	 */
-	if (!(flags & XBF_READ)) {
-		kmflag_mask |= KM_ZERO;
+	if (!(flags & XBF_READ))
 		gfp_mask |= __GFP_ZERO;
-	}
 
-	/*
-	 * for buffers that are contained within a single page, just allocate
-	 * the memory from the heap - there's no need for the complexity of
-	 * page arrays to keep allocation down to order 0.
-	 */
 	size = BBTOB(bp->b_length);
-	if (size < PAGE_SIZE) {
-		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
-		bp->b_addr = kmem_alloc_io(size, align_mask,
-					   KM_NOFS | kmflag_mask);
-		if (!bp->b_addr) {
-			/* low memory - use alloc_page loop instead */
-			goto use_alloc_page;
-		}
-
-		if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
-		    ((unsigned long)bp->b_addr & PAGE_MASK)) {
-			/* b_addr spans two pages - use alloc_page instead */
-			kmem_free(bp->b_addr);
-			bp->b_addr = NULL;
-			goto use_alloc_page;
-		}
-		bp->b_offset = offset_in_page(bp->b_addr);
-		bp->b_pages = bp->b_page_array;
-		bp->b_pages[0] = kmem_to_page(bp->b_addr);
-		bp->b_page_count = 1;
-		bp->b_flags |= _XBF_KMEM;
-		return 0;
-	}
-
-use_alloc_page:
 	start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
 	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
 								>> PAGE_SHIFT;
@@ -720,9 +717,17 @@ xfs_buf_get_map(
 	if (error)
 		return error;
 
-	error = xfs_buf_allocate_memory(new_bp, flags);
-	if (error)
-		goto out_free_buf;
+	/*
+	 * For buffers that are contained within a single page, just allocate
+	 * the memory from the heap - there's no need for the complexity of
+	 * page arrays to keep allocation down to order 0.
+	 */
+	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
+	    xfs_buf_alloc_slab(new_bp, flags) < 0) {
+		error = xfs_buf_alloc_pages(new_bp, flags);
+		if (error)
+			goto out_free_buf;
+	}
 
 	error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
 	if (error)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 03/11] xfs: remove ->b_offset handling for page backed buffers
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
  2021-05-19 19:08 ` [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map Christoph Hellwig
  2021-05-19 19:08 ` [PATCH 02/11] xfs: split xfs_buf_allocate_memory Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 22:27   ` Dave Chinner
  2021-05-19 19:08 ` [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages Christoph Hellwig
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

->b_offset can only be non-zero for SLAB backed buffers, so remove all
code dealing with it for page backed buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 15 +++++----------
 fs/xfs/xfs_buf.h |  3 ++-
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ac85ec6f0a2fab..392b85d059bff5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -79,7 +79,7 @@ static inline int
 xfs_buf_vmap_len(
 	struct xfs_buf	*bp)
 {
-	return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+	return (bp->b_page_count * PAGE_SIZE);
 }
 
 /*
@@ -329,8 +329,7 @@ xfs_buf_free(
 		uint		i;
 
 		if (xfs_buf_is_vmapped(bp))
-			vm_unmap_ram(bp->b_addr - bp->b_offset,
-					bp->b_page_count);
+			vm_unmap_ram(bp->b_addr, bp->b_page_count);
 
 		for (i = 0; i < bp->b_page_count; i++) {
 			struct page	*page = bp->b_pages[i];
@@ -386,7 +385,7 @@ xfs_buf_alloc_pages(
 	uint			flags)
 {
 	size_t			size;
-	size_t			nbytes, offset;
+	size_t			nbytes;
 	gfp_t			gfp_mask = xb_to_gfp(flags);
 	unsigned short		page_count, i;
 	xfs_off_t		start, end;
@@ -407,7 +406,6 @@ xfs_buf_alloc_pages(
 	if (unlikely(error))
 		return error;
 
-	offset = bp->b_offset;
 	bp->b_flags |= _XBF_PAGES;
 
 	for (i = 0; i < bp->b_page_count; i++) {
@@ -441,10 +439,9 @@ xfs_buf_alloc_pages(
 
 		XFS_STATS_INC(bp->b_mount, xb_page_found);
 
-		nbytes = min_t(size_t, size, PAGE_SIZE - offset);
+		nbytes = min_t(size_t, size, PAGE_SIZE);
 		size -= nbytes;
 		bp->b_pages[i] = page;
-		offset = 0;
 	}
 	return 0;
 
@@ -466,7 +463,7 @@ _xfs_buf_map_pages(
 	ASSERT(bp->b_flags & _XBF_PAGES);
 	if (bp->b_page_count == 1) {
 		/* A single page buffer is always mappable */
-		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+		bp->b_addr = page_address(bp->b_pages[0]);
 	} else if (flags & XBF_UNMAPPED) {
 		bp->b_addr = NULL;
 	} else {
@@ -493,7 +490,6 @@ _xfs_buf_map_pages(
 
 		if (!bp->b_addr)
 			return -ENOMEM;
-		bp->b_addr += bp->b_offset;
 	}
 
 	return 0;
@@ -1726,7 +1722,6 @@ xfs_buf_offset(
 	if (bp->b_addr)
 		return bp->b_addr + offset;
 
-	offset += bp->b_offset;
 	page = bp->b_pages[offset >> PAGE_SHIFT];
 	return page_address(page) + (offset & (PAGE_SIZE-1));
 }
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 459ca34f26f588..21b4c58fd2fa87 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -167,7 +167,8 @@ struct xfs_buf {
 	atomic_t		b_pin_count;	/* pin count */
 	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
 	unsigned int		b_page_count;	/* size of page array */
-	unsigned int		b_offset;	/* page offset in first page */
+	unsigned int		b_offset;	/* page offset in first page,
+						   only used for SLAB buffers */
 	int			b_error;	/* error code on I/O */
 
 	/*
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (2 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 03/11] xfs: remove ->b_offset handling for page backed buffers Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 22:40   ` Dave Chinner
  2021-05-19 19:08 ` [PATCH 05/11] xfs: remove the xb_page_found stat counter in xfs_buf_alloc_pages Christoph Hellwig
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Remove the check for an existing b_pages array as this function is always
called right after allocating a buffer, so this can't happen.  Also
use kmem_zalloc to allocate the page array instead of doing a manual
memset gіven that the inline array is already pre-zeroed as part of the
freshly allocated buffer anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 392b85d059bff5..9c64c374411081 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -281,19 +281,18 @@ _xfs_buf_get_pages(
 	struct xfs_buf		*bp,
 	int			page_count)
 {
-	/* Make sure that we have a page list */
-	if (bp->b_pages == NULL) {
-		bp->b_page_count = page_count;
-		if (page_count <= XB_PAGES) {
-			bp->b_pages = bp->b_page_array;
-		} else {
-			bp->b_pages = kmem_alloc(sizeof(struct page *) *
-						 page_count, KM_NOFS);
-			if (bp->b_pages == NULL)
-				return -ENOMEM;
-		}
-		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
+	ASSERT(bp->b_pages == NULL);
+
+	bp->b_page_count = page_count;
+	if (page_count > XB_PAGES) {
+		bp->b_pages = kmem_zalloc(sizeof(struct page *) * page_count,
+					  KM_NOFS);
+		if (!bp->b_pages)
+			return -ENOMEM;
+	} else {
+		bp->b_pages = bp->b_page_array;
 	}
+
 	return 0;
 }
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 05/11] xfs: remove the xb_page_found stat counter in xfs_buf_alloc_pages
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (3 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 22:55   ` Dave Chinner
  2021-05-19 19:08 ` [PATCH 06/11] xfs: remove the size and nbytes variables " Christoph Hellwig
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

We did not find any page, we're allocating them all from the page
allocator.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9c64c374411081..76240d84d58b61 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -436,8 +436,6 @@ xfs_buf_alloc_pages(
 			goto retry;
 		}
 
-		XFS_STATS_INC(bp->b_mount, xb_page_found);
-
 		nbytes = min_t(size_t, size, PAGE_SIZE);
 		size -= nbytes;
 		bp->b_pages[i] = page;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 06/11] xfs: remove the size and nbytes variables in xfs_buf_alloc_pages
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (4 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 05/11] xfs: remove the xb_page_found stat counter in xfs_buf_alloc_pages Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 22:56   ` Dave Chinner
  2021-05-19 19:08 ` [PATCH 07/11] xfs: simplify the b_page_count calculation Christoph Hellwig
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

These variables are not used for anything but recursively updating each
other.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 76240d84d58b61..08c8667e6027fc 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -383,8 +383,6 @@ xfs_buf_alloc_pages(
 	struct xfs_buf		*bp,
 	uint			flags)
 {
-	size_t			size;
-	size_t			nbytes;
 	gfp_t			gfp_mask = xb_to_gfp(flags);
 	unsigned short		page_count, i;
 	xfs_off_t		start, end;
@@ -396,7 +394,6 @@ xfs_buf_alloc_pages(
 	if (!(flags & XBF_READ))
 		gfp_mask |= __GFP_ZERO;
 
-	size = BBTOB(bp->b_length);
 	start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
 	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
 								>> PAGE_SHIFT;
@@ -436,8 +433,6 @@ xfs_buf_alloc_pages(
 			goto retry;
 		}
 
-		nbytes = min_t(size_t, size, PAGE_SIZE);
-		size -= nbytes;
 		bp->b_pages[i] = page;
 	}
 	return 0;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 07/11] xfs: simplify the b_page_count calculation
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (5 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 06/11] xfs: remove the size and nbytes variables " Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 19:08 ` [PATCH 08/11] xfs: centralize page allocation and freeing for buffers Christoph Hellwig
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Ever since we stopped using the Linux page cache to back XFS buffes
there is no need to take the start sector into account for calculating
the number of pages in a buffer, as the data always start from the
beginning of the buffer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 08c8667e6027fc..76a107e3cb2a22 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -278,15 +278,14 @@ _xfs_buf_alloc(
  */
 STATIC int
 _xfs_buf_get_pages(
-	struct xfs_buf		*bp,
-	int			page_count)
+	struct xfs_buf		*bp)
 {
 	ASSERT(bp->b_pages == NULL);
 
-	bp->b_page_count = page_count;
-	if (page_count > XB_PAGES) {
-		bp->b_pages = kmem_zalloc(sizeof(struct page *) * page_count,
-					  KM_NOFS);
+	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
+	if (bp->b_page_count > XB_PAGES) {
+		bp->b_pages = kmem_zalloc(sizeof(struct page *) *
+						bp->b_page_count, KM_NOFS);
 		if (!bp->b_pages)
 			return -ENOMEM;
 	} else {
@@ -384,8 +383,7 @@ xfs_buf_alloc_pages(
 	uint			flags)
 {
 	gfp_t			gfp_mask = xb_to_gfp(flags);
-	unsigned short		page_count, i;
-	xfs_off_t		start, end;
+	unsigned short		i;
 	int			error;
 
 	/*
@@ -394,11 +392,7 @@ xfs_buf_alloc_pages(
 	if (!(flags & XBF_READ))
 		gfp_mask |= __GFP_ZERO;
 
-	start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
-	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
-								>> PAGE_SHIFT;
-	page_count = end - start;
-	error = _xfs_buf_get_pages(bp, page_count);
+	error = _xfs_buf_get_pages(bp);
 	if (unlikely(error))
 		return error;
 
@@ -942,7 +936,6 @@ xfs_buf_get_uncached(
 	int			flags,
 	struct xfs_buf		**bpp)
 {
-	unsigned long		page_count;
 	int			error, i;
 	struct xfs_buf		*bp;
 	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
@@ -954,12 +947,11 @@ xfs_buf_get_uncached(
 	if (error)
 		goto fail;
 
-	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
-	error = _xfs_buf_get_pages(bp, page_count);
+	error = _xfs_buf_get_pages(bp);
 	if (error)
 		goto fail_free_buf;
 
-	for (i = 0; i < page_count; i++) {
+	for (i = 0; i < bp->b_page_count; i++) {
 		bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
 		if (!bp->b_pages[i]) {
 			error = -ENOMEM;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 08/11] xfs: centralize page allocation and freeing for buffers
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (6 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 07/11] xfs: simplify the b_page_count calculation Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 23:22   ` Dave Chinner
  2021-05-19 19:08 ` [PATCH 09/11] xfs: lift the buffer zeroing logic into xfs_buf_alloc_pages Christoph Hellwig
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Factor out two helpers that do everything needed for allocating and
freeing pages that back a buffer, and remove the duplication between
the different interfaces.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 110 ++++++++++++++++-------------------------------
 1 file changed, 37 insertions(+), 73 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 76a107e3cb2a22..31aff8323605cd 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -273,35 +273,17 @@ _xfs_buf_alloc(
 }
 
 /*
- *	Allocate a page array capable of holding a specified number
- *	of pages, and point the page buf at it.
+ * Free all pages allocated to the buffer including the page map.
  */
-STATIC int
-_xfs_buf_get_pages(
-	struct xfs_buf		*bp)
+static void
+xfs_buf_free_pages(
+	struct xfs_buf	*bp)
 {
-	ASSERT(bp->b_pages == NULL);
-
-	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
-	if (bp->b_page_count > XB_PAGES) {
-		bp->b_pages = kmem_zalloc(sizeof(struct page *) *
-						bp->b_page_count, KM_NOFS);
-		if (!bp->b_pages)
-			return -ENOMEM;
-	} else {
-		bp->b_pages = bp->b_page_array;
-	}
+	unsigned int	i;
 
-	return 0;
-}
+	for (i = 0; i < bp->b_page_count; i++)
+		__free_page(bp->b_pages[i]);
 
-/*
- *	Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
-	struct xfs_buf	*bp)
-{
 	if (bp->b_pages != bp->b_page_array) {
 		kmem_free(bp->b_pages);
 		bp->b_pages = NULL;
@@ -324,22 +306,14 @@ xfs_buf_free(
 	ASSERT(list_empty(&bp->b_lru));
 
 	if (bp->b_flags & _XBF_PAGES) {
-		uint		i;
-
 		if (xfs_buf_is_vmapped(bp))
 			vm_unmap_ram(bp->b_addr, bp->b_page_count);
-
-		for (i = 0; i < bp->b_page_count; i++) {
-			struct page	*page = bp->b_pages[i];
-
-			__free_page(page);
-		}
+		xfs_buf_free_pages(bp);
 		if (current->reclaim_state)
 			current->reclaim_state->reclaimed_slab +=
 							bp->b_page_count;
 	} else if (bp->b_flags & _XBF_KMEM)
 		kmem_free(bp->b_addr);
-	_xfs_buf_free_pages(bp);
 	xfs_buf_free_maps(bp);
 	kmem_cache_free(xfs_buf_zone, bp);
 }
@@ -380,34 +354,33 @@ xfs_buf_alloc_slab(
 static int
 xfs_buf_alloc_pages(
 	struct xfs_buf		*bp,
-	uint			flags)
+	gfp_t			gfp_mask,
+	bool			fail_fast)
 {
-	gfp_t			gfp_mask = xb_to_gfp(flags);
-	unsigned short		i;
-	int			error;
-
-	/*
-	 * assure zeroed buffer for non-read cases.
-	 */
-	if (!(flags & XBF_READ))
-		gfp_mask |= __GFP_ZERO;
+	int			i;
 
-	error = _xfs_buf_get_pages(bp);
-	if (unlikely(error))
-		return error;
+	ASSERT(bp->b_pages == NULL);
 
-	bp->b_flags |= _XBF_PAGES;
+	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
+	if (bp->b_page_count > XB_PAGES) {
+		bp->b_pages = kmem_zalloc(sizeof(struct page *) *
+						bp->b_page_count, KM_NOFS);
+		if (!bp->b_pages)
+			return -ENOMEM;
+	} else {
+		bp->b_pages = bp->b_page_array;
+	}
 
 	for (i = 0; i < bp->b_page_count; i++) {
 		struct page	*page;
 		uint		retries = 0;
 retry:
 		page = alloc_page(gfp_mask);
-		if (unlikely(page == NULL)) {
-			if (flags & XBF_READ_AHEAD) {
+		if (unlikely(!page)) {
+			if (fail_fast) {
 				bp->b_page_count = i;
-				error = -ENOMEM;
-				goto out_free_pages;
+				xfs_buf_free_pages(bp);
+				return -ENOMEM;
 			}
 
 			/*
@@ -429,13 +402,9 @@ xfs_buf_alloc_pages(
 
 		bp->b_pages[i] = page;
 	}
-	return 0;
 
-out_free_pages:
-	for (i = 0; i < bp->b_page_count; i++)
-		__free_page(bp->b_pages[i]);
-	bp->b_flags &= ~_XBF_PAGES;
-	return error;
+	bp->b_flags |= _XBF_PAGES;
+	return 0;
 }
 
 /*
@@ -706,7 +675,13 @@ xfs_buf_get_map(
 	 */
 	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
 	    xfs_buf_alloc_slab(new_bp, flags) < 0) {
-		error = xfs_buf_alloc_pages(new_bp, flags);
+		gfp_t			gfp_mask = xb_to_gfp(flags);
+
+		/* assure a zeroed buffer for non-read cases */
+		if (!(flags & XBF_READ))
+			gfp_mask |= __GFP_ZERO;
+		error = xfs_buf_alloc_pages(new_bp, gfp_mask,
+					   flags & XBF_READ_AHEAD);
 		if (error)
 			goto out_free_buf;
 	}
@@ -936,7 +911,7 @@ xfs_buf_get_uncached(
 	int			flags,
 	struct xfs_buf		**bpp)
 {
-	int			error, i;
+	int			error;
 	struct xfs_buf		*bp;
 	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
 
@@ -947,19 +922,10 @@ xfs_buf_get_uncached(
 	if (error)
 		goto fail;
 
-	error = _xfs_buf_get_pages(bp);
+	error = xfs_buf_alloc_pages(bp, xb_to_gfp(flags), true);
 	if (error)
 		goto fail_free_buf;
 
-	for (i = 0; i < bp->b_page_count; i++) {
-		bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-		if (!bp->b_pages[i]) {
-			error = -ENOMEM;
-			goto fail_free_mem;
-		}
-	}
-	bp->b_flags |= _XBF_PAGES;
-
 	error = _xfs_buf_map_pages(bp, 0);
 	if (unlikely(error)) {
 		xfs_warn(target->bt_mount,
@@ -972,9 +938,7 @@ xfs_buf_get_uncached(
 	return 0;
 
  fail_free_mem:
-	while (--i >= 0)
-		__free_page(bp->b_pages[i]);
-	_xfs_buf_free_pages(bp);
+	xfs_buf_free_pages(bp);
  fail_free_buf:
 	xfs_buf_free_maps(bp);
 	kmem_cache_free(xfs_buf_zone, bp);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 09/11] xfs: lift the buffer zeroing logic into xfs_buf_alloc_pages
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (7 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 08/11] xfs: centralize page allocation and freeing for buffers Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 19:08 ` [PATCH 10/11] xfs: retry allocations from xfs_buf_get_uncached as well Christoph Hellwig
  2021-05-19 19:09 ` [PATCH 11/11] xfs: use alloc_pages_bulk_array() for buffers Christoph Hellwig
  10 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

Lift the buffer zeroing logic from xfs_buf_get_map into so that it also
covers uncached buffers, and remove the now obsolete manual zeroing in
the only direct caller of xfs_buf_get_uncached.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_ag.c |  1 -
 fs/xfs/xfs_buf.c       | 24 +++++++++++++-----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index c68a3668847499..be0087825ae06b 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -43,7 +43,6 @@ xfs_get_aghdr_buf(
 	if (error)
 		return error;
 
-	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
 	bp->b_bn = blkno;
 	bp->b_maps[0].bm_bn = blkno;
 	bp->b_ops = ops;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 31aff8323605cd..b3519a43759235 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -22,9 +22,6 @@
 
 static kmem_zone_t *xfs_buf_zone;
 
-#define xb_to_gfp(flags) \
-	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
-
 /*
  * Locking orders
  *
@@ -354,11 +351,21 @@ xfs_buf_alloc_slab(
 static int
 xfs_buf_alloc_pages(
 	struct xfs_buf		*bp,
-	gfp_t			gfp_mask,
+	xfs_buf_flags_t		flags,
 	bool			fail_fast)
 {
+	gfp_t			gfp_mask = __GFP_NOWARN;
 	int			i;
 
+	if (flags & XBF_READ_AHEAD)
+		gfp_mask |= __GFP_NORETRY;
+	else
+		gfp_mask |= GFP_NOFS;
+
+	/* assure a zeroed buffer for non-read cases */
+	if (!(flags & XBF_READ))
+		gfp_mask |= __GFP_ZERO;
+
 	ASSERT(bp->b_pages == NULL);
 
 	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
@@ -675,12 +682,7 @@ xfs_buf_get_map(
 	 */
 	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
 	    xfs_buf_alloc_slab(new_bp, flags) < 0) {
-		gfp_t			gfp_mask = xb_to_gfp(flags);
-
-		/* assure a zeroed buffer for non-read cases */
-		if (!(flags & XBF_READ))
-			gfp_mask |= __GFP_ZERO;
-		error = xfs_buf_alloc_pages(new_bp, gfp_mask,
+		error = xfs_buf_alloc_pages(new_bp, flags,
 					   flags & XBF_READ_AHEAD);
 		if (error)
 			goto out_free_buf;
@@ -922,7 +924,7 @@ xfs_buf_get_uncached(
 	if (error)
 		goto fail;
 
-	error = xfs_buf_alloc_pages(bp, xb_to_gfp(flags), true);
+	error = xfs_buf_alloc_pages(bp, flags, true);
 	if (error)
 		goto fail_free_buf;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 10/11] xfs: retry allocations from xfs_buf_get_uncached as well
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (8 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 09/11] xfs: lift the buffer zeroing logic into xfs_buf_alloc_pages Christoph Hellwig
@ 2021-05-19 19:08 ` Christoph Hellwig
  2021-05-19 19:09 ` [PATCH 11/11] xfs: use alloc_pages_bulk_array() for buffers Christoph Hellwig
  10 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:08 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner

There is no good reason why xfs_buf_get_uncached should fail on the
first allocation failure, so make it behave the same as the normal
xfs_buf_get_map path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b3519a43759235..a1295b5b6f0ca6 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -351,8 +351,7 @@ xfs_buf_alloc_slab(
 static int
 xfs_buf_alloc_pages(
 	struct xfs_buf		*bp,
-	xfs_buf_flags_t		flags,
-	bool			fail_fast)
+	xfs_buf_flags_t		flags)
 {
 	gfp_t			gfp_mask = __GFP_NOWARN;
 	int			i;
@@ -384,7 +383,7 @@ xfs_buf_alloc_pages(
 retry:
 		page = alloc_page(gfp_mask);
 		if (unlikely(!page)) {
-			if (fail_fast) {
+			if (flags & XBF_READ_AHEAD) {
 				bp->b_page_count = i;
 				xfs_buf_free_pages(bp);
 				return -ENOMEM;
@@ -682,8 +681,7 @@ xfs_buf_get_map(
 	 */
 	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
 	    xfs_buf_alloc_slab(new_bp, flags) < 0) {
-		error = xfs_buf_alloc_pages(new_bp, flags,
-					   flags & XBF_READ_AHEAD);
+		error = xfs_buf_alloc_pages(new_bp, flags);
 		if (error)
 			goto out_free_buf;
 	}
@@ -924,7 +922,7 @@ xfs_buf_get_uncached(
 	if (error)
 		goto fail;
 
-	error = xfs_buf_alloc_pages(bp, flags, true);
+	error = xfs_buf_alloc_pages(bp, flags);
 	if (error)
 		goto fail_free_buf;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 11/11] xfs: use alloc_pages_bulk_array() for buffers
  2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
                   ` (9 preceding siblings ...)
  2021-05-19 19:08 ` [PATCH 10/11] xfs: retry allocations from xfs_buf_get_uncached as well Christoph Hellwig
@ 2021-05-19 19:09 ` Christoph Hellwig
  10 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-19 19:09 UTC (permalink / raw)
  To: linux-xfs; +Cc: Dave Chinner, Dave Chinner

From: Dave Chinner <dchinner@redhat.com>

Because it's more efficient than allocating pages one at a time in a
loop.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
[hch: rebased ontop of a bunch of cleanups]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a1295b5b6f0ca6..e2439503fc13bb 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -354,7 +354,7 @@ xfs_buf_alloc_pages(
 	xfs_buf_flags_t		flags)
 {
 	gfp_t			gfp_mask = __GFP_NOWARN;
-	int			i;
+	unsigned long		filled = 0;
 
 	if (flags & XBF_READ_AHEAD)
 		gfp_mask |= __GFP_NORETRY;
@@ -377,36 +377,27 @@ xfs_buf_alloc_pages(
 		bp->b_pages = bp->b_page_array;
 	}
 
-	for (i = 0; i < bp->b_page_count; i++) {
-		struct page	*page;
-		uint		retries = 0;
-retry:
-		page = alloc_page(gfp_mask);
-		if (unlikely(!page)) {
+	/*
+	 * Bulk filling of pages can take multiple calls. Not filling the entire
+	 * array is not an allocation failure, so don't back off if we get at
+	 * least one extra page.
+	 */
+	for (;;) {
+		unsigned long	last = filled;
+
+		filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count,
+						bp->b_pages);
+		if (filled == bp->b_page_count)
+			break;
+		if (filled == last) {
 			if (flags & XBF_READ_AHEAD) {
-				bp->b_page_count = i;
+				bp->b_page_count = filled;
 				xfs_buf_free_pages(bp);
 				return -ENOMEM;
 			}
-
-			/*
-			 * This could deadlock.
-			 *
-			 * But until all the XFS lowlevel code is revamped to
-			 * handle buffer allocation failures we can't do much.
-			 */
-			if (!(++retries % 100))
-				xfs_err(NULL,
-		"%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
-					current->comm, current->pid,
-					__func__, gfp_mask);
-
 			XFS_STATS_INC(bp->b_mount, xb_page_retries);
 			congestion_wait(BLK_RW_ASYNC, HZ/50);
-			goto retry;
 		}
-
-		bp->b_pages[i] = page;
 	}
 
 	bp->b_flags |= _XBF_PAGES;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 03/11] xfs: remove ->b_offset handling for page backed buffers
  2021-05-19 19:08 ` [PATCH 03/11] xfs: remove ->b_offset handling for page backed buffers Christoph Hellwig
@ 2021-05-19 22:27   ` Dave Chinner
  0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2021-05-19 22:27 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 19, 2021 at 09:08:52PM +0200, Christoph Hellwig wrote:
> ->b_offset can only be non-zero for SLAB backed buffers, so remove all
> code dealing with it for page backed buffers.

Can you refer to these as _XBF_KMEM buffers, not "SLAB backed"? That
way there is no confusion as to what type of buffer needs to pay
attention to b_offset...

> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 459ca34f26f588..21b4c58fd2fa87 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -167,7 +167,8 @@ struct xfs_buf {
>  	atomic_t		b_pin_count;	/* pin count */
>  	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
>  	unsigned int		b_page_count;	/* size of page array */
> -	unsigned int		b_offset;	/* page offset in first page */
> +	unsigned int		b_offset;	/* page offset in first page,
> +						   only used for SLAB buffers */

Here too.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 02/11] xfs: split xfs_buf_allocate_memory
  2021-05-19 19:08 ` [PATCH 02/11] xfs: split xfs_buf_allocate_memory Christoph Hellwig
@ 2021-05-19 22:36   ` Dave Chinner
  0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2021-05-19 22:36 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 19, 2021 at 09:08:51PM +0200, Christoph Hellwig wrote:
> Split xfs_buf_allocate_memory into one helper that allocates from
> slab and one that allocates using the page allocator.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
....
> +static int
> +xfs_buf_alloc_slab(
> +	struct xfs_buf		*bp,
> +	unsigned int		flags)
> +{

xfs_buf_alloc_kmem() or xfs_buf_alloc_heap() would be better, I
think, because it matches the flag used to indicate how the memory
associated with the buffer was allocated.

> @@ -720,9 +717,17 @@ xfs_buf_get_map(
>  	if (error)
>  		return error;
>  
> -	error = xfs_buf_allocate_memory(new_bp, flags);
> -	if (error)
> -		goto out_free_buf;
> +	/*
> +	 * For buffers that are contained within a single page, just allocate
> +	 * the memory from the heap - there's no need for the complexity of
> +	 * page arrays to keep allocation down to order 0.
> +	 */
> +	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
> +	    xfs_buf_alloc_slab(new_bp, flags) < 0) {
> +		error = xfs_buf_alloc_pages(new_bp, flags);
> +		if (error)
> +			goto out_free_buf;
> +	}

Took me a moment to grok the logic pattern here, then I realised the
comment didn't help as it makes no indication that the heap
allocation is best effort and will fall back to pages. A small tweak
like:

	/*
	 * For buffers that fit entirely within a single page, first
	 * attempt to allocate the memory from the heap to minimise
	 * memory usage. If we can't get heap memory for these small
	 * buffers, we fall back to using the page allocator.
	 */

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages
  2021-05-19 19:08 ` [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages Christoph Hellwig
@ 2021-05-19 22:40   ` Dave Chinner
  2021-05-20  5:23     ` Christoph Hellwig
  0 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2021-05-19 22:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 19, 2021 at 09:08:53PM +0200, Christoph Hellwig wrote:
> Remove the check for an existing b_pages array as this function is always
> called right after allocating a buffer, so this can't happen.  Also
> use kmem_zalloc to allocate the page array instead of doing a manual
> memset gіven that the inline array is already pre-zeroed as part of the
> freshly allocated buffer anyway.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 23 +++++++++++------------
>  1 file changed, 11 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 392b85d059bff5..9c64c374411081 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -281,19 +281,18 @@ _xfs_buf_get_pages(
>  	struct xfs_buf		*bp,
>  	int			page_count)
>  {
> -	/* Make sure that we have a page list */
> -	if (bp->b_pages == NULL) {
> -		bp->b_page_count = page_count;
> -		if (page_count <= XB_PAGES) {
> -			bp->b_pages = bp->b_page_array;
> -		} else {
> -			bp->b_pages = kmem_alloc(sizeof(struct page *) *
> -						 page_count, KM_NOFS);
> -			if (bp->b_pages == NULL)
> -				return -ENOMEM;
> -		}
> -		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
> +	ASSERT(bp->b_pages == NULL);
> +
> +	bp->b_page_count = page_count;
> +	if (page_count > XB_PAGES) {
> +		bp->b_pages = kmem_zalloc(sizeof(struct page *) * page_count,
> +					  KM_NOFS);
> +		if (!bp->b_pages)
> +			return -ENOMEM;
> +	} else {
> +		bp->b_pages = bp->b_page_array;
>  	}
> +
>  	return 0;
>  }

This will not apply (and break) the bulk alloc patch I sent out - we
have to ensure that the b_pages array is always zeroed before we
call the bulk alloc function, hence I moved the memset() in this
function to be unconditional. I almost cleaned up this function in
that patchset....

Just doing this:

	bp->b_page_count = page_count;
	if (page_count > XB_PAGES) {
		bp->b_pages = kmem_alloc(sizeof(struct page *) * page_count,
					 KM_NOFS);
		if (!bp->b_pages)
			return -ENOMEM;
	} else {
		bp->b_pages = bp->b_page_array;
	}
	memset(bp->b_pages, 0, sizeof(struct page *) * page_count);

	return 0;

will make it work fine with bulk alloc.

Cheers,

Dave.

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 05/11] xfs: remove the xb_page_found stat counter in xfs_buf_alloc_pages
  2021-05-19 19:08 ` [PATCH 05/11] xfs: remove the xb_page_found stat counter in xfs_buf_alloc_pages Christoph Hellwig
@ 2021-05-19 22:55   ` Dave Chinner
  0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2021-05-19 22:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 19, 2021 at 09:08:54PM +0200, Christoph Hellwig wrote:
> We did not find any page, we're allocating them all from the page
> allocator.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 9c64c374411081..76240d84d58b61 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -436,8 +436,6 @@ xfs_buf_alloc_pages(
>  			goto retry;
>  		}
>  
> -		XFS_STATS_INC(bp->b_mount, xb_page_found);
> -
>  		nbytes = min_t(size_t, size, PAGE_SIZE);
>  		size -= nbytes;
>  		bp->b_pages[i] = page;

NACK. This is actually telling us that a page was allocated
successfully. I just used this very stat in combination with the
page allocate failure stat (xb_page_retries) to determine that the
bulk alloc code was failing to allocate all the pages asked for at
least 20% of the time.

$ xfs_stats.pl
.....
    xs_ig_dup............             0  Buf Statistics
    xs_ig_reclaims.......      38377759    pb_get................     432887411
    xs_ig_attrchg........             1    pb_create.............       1839653
  Log Operations                           pb_get_locked.........     431047794
    xs_log_writes........         71572    pb_get_locked_waited..           346
    xs_log_blocks........      36644864    pb_busy_locked........         13615
    xs_log_noiclogs......           265    pb_miss_locked........       1839651
    xs_log_force.........           521    pb_page_retries.......        488537
    xs_log_force_sleep...           495    pb_page_found.........       1839431
                                           pb_get_read...........           577


See the pb_miss_locked, pb_page_found and pb_page_retries numbers?
Almost all cache misses required page (rather than heap) allocation,
and 25% of them bulk allocation failed to allocate all pages in a
single call.

So, yeah, the buffer cache stats are useful diagnostic information
that I use a lot...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 06/11] xfs: remove the size and nbytes variables in xfs_buf_alloc_pages
  2021-05-19 19:08 ` [PATCH 06/11] xfs: remove the size and nbytes variables " Christoph Hellwig
@ 2021-05-19 22:56   ` Dave Chinner
  0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2021-05-19 22:56 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 19, 2021 at 09:08:55PM +0200, Christoph Hellwig wrote:
> These variables are not used for anything but recursively updating each
> other.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 5 -----
>  1 file changed, 5 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 76240d84d58b61..08c8667e6027fc 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -383,8 +383,6 @@ xfs_buf_alloc_pages(
>  	struct xfs_buf		*bp,
>  	uint			flags)
>  {
> -	size_t			size;
> -	size_t			nbytes;
>  	gfp_t			gfp_mask = xb_to_gfp(flags);
>  	unsigned short		page_count, i;
>  	xfs_off_t		start, end;
> @@ -396,7 +394,6 @@ xfs_buf_alloc_pages(
>  	if (!(flags & XBF_READ))
>  		gfp_mask |= __GFP_ZERO;
>  
> -	size = BBTOB(bp->b_length);
>  	start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
>  	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
>  								>> PAGE_SHIFT;
> @@ -436,8 +433,6 @@ xfs_buf_alloc_pages(
>  			goto retry;
>  		}
>  
> -		nbytes = min_t(size_t, size, PAGE_SIZE);
> -		size -= nbytes;
>  		bp->b_pages[i] = page;
>  	}
>  	return 0;

These have already gone away with the bulk allocation patch. I think
you should rebase this series on top of that...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 08/11] xfs: centralize page allocation and freeing for buffers
  2021-05-19 19:08 ` [PATCH 08/11] xfs: centralize page allocation and freeing for buffers Christoph Hellwig
@ 2021-05-19 23:22   ` Dave Chinner
  2021-05-20  5:35     ` Christoph Hellwig
  0 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2021-05-19 23:22 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 19, 2021 at 09:08:57PM +0200, Christoph Hellwig wrote:
> Factor out two helpers that do everything needed for allocating and
> freeing pages that back a buffer, and remove the duplication between
> the different interfaces.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

This seems really confused.

Up until this point in the patch set you are pulling code out
of xfs_buf_alloc_pages() into helpers. Now you are getting rid of
the helpers and putting the slightly modified code back into
xfs_buf_alloc_pages(). This doesn't make any sense at all.

The freeing helper now requires the buffer state to be
manipulated on allocation failure so that the free function doesn't
run off the end of the bp->b_pages array. That's a bit of a
landmine, and it doesn't really clean anything up much at all.

And on the allocation side there is new "fail fast" behaviour
because you've lifted the readahead out of xfs_buf_alloc_pages. You
also lifted the zeroing checks, which I note that you immediately
put back inside xfs_buf_alloc_pages() in the next patch.

The stuff up to this point in the series makes sense. From this
patch onwards it seems to me that you're just undoing the factoring
and cleanups from the first few patches...

I mean, like the factoring of xfs_buf_alloc_slab(), you could have
just factored out xfs_buf_alloc_pages(bp, page_count) from
xfs_buf_allocate_memory() and used that directly in
xfs_buf_get_uncached() and avoided a bunch of this factoring, make a
slight logic modification and recombine churn. And it would be
trivial to do on top of the bulk allocation patch which already
converts both of these functions to use bulk allocation....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages
  2021-05-19 22:40   ` Dave Chinner
@ 2021-05-20  5:23     ` Christoph Hellwig
  2021-05-25 22:43       ` Dave Chinner
  0 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-20  5:23 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs

On Thu, May 20, 2021 at 08:40:28AM +1000, Dave Chinner wrote:
> This will not apply (and break) the bulk alloc patch I sent out - we
> have to ensure that the b_pages array is always zeroed before we
> call the bulk alloc function, hence I moved the memset() in this
> function to be unconditional. I almost cleaned up this function in
> that patchset....

The buffer is freshly allocated here using kmem_cache_zalloc, so
b_pages can't be set, b_page_array is already zeroed from
kmem_cache_zalloc, and the separate b_pages allocation is swithced
to use kmem_zalloc.  I thought the commit log covers this, but maybe
I need to improve it?

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 08/11] xfs: centralize page allocation and freeing for buffers
  2021-05-19 23:22   ` Dave Chinner
@ 2021-05-20  5:35     ` Christoph Hellwig
  2021-05-25 23:59       ` Dave Chinner
  0 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2021-05-20  5:35 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs

On Thu, May 20, 2021 at 09:22:45AM +1000, Dave Chinner wrote:
> Up until this point in the patch set you are pulling code out
> of xfs_buf_alloc_pages() into helpers. Now you are getting rid of
> the helpers and putting the slightly modified code back into
> xfs_buf_alloc_pages(). This doesn't make any sense at all.

It makes a whole lot of sense, but it seems you don't like the
structure :)

As stated in the commit log we now have one helper that sets a
_XBF_PAGES backing with pages and the map, and one helper to
tear it down.   I think it makes a whole lot of sense this way.

> The freeing helper now requires the buffer state to be
> manipulated on allocation failure so that the free function doesn't
> run off the end of the bp->b_pages array. That's a bit of a
> landmine, and it doesn't really clean anything up much at all.

It is something we also do elsewhere in the kernel.  Another
alternative would be to do a NULL check on the page, or to just
pointlessly duplicate the freeing loop.

> And on the allocation side there is new "fail fast" behaviour
> because you've lifted the readahead out of xfs_buf_alloc_pages. You
> also lifted the zeroing checks, which I note that you immediately
> put back inside xfs_buf_alloc_pages() in the next patch.

This is to clearly split code consolidatation from behavior changes.
I could move both earlier at the downside of adding a lot of new
code first that later gets removed.

> I mean, like the factoring of xfs_buf_alloc_slab(), you could have
> just factored out xfs_buf_alloc_pages(bp, page_count) from
> xfs_buf_allocate_memory() and used that directly in
> xfs_buf_get_uncached() and avoided a bunch of this factoring, make a
> slight logic modification and recombine churn. And it would be
> trivial to do on top of the bulk allocation patch which already
> converts both of these functions to use bulk allocation....

As mentioned in the cover letter: the bulk allocation review is what
trigger this as it tripped me following various lose ends.  And as
usual I'd rather have that kind of change at the end where the
surrounding code makes sense, so the rebased version is now is patch 11
of this series.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map
  2021-05-19 19:08 ` [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map Christoph Hellwig
@ 2021-05-20 23:43   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2021-05-20 23:43 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Dave Chinner

On Wed, May 19, 2021 at 09:08:50PM +0200, Christoph Hellwig wrote:
> Use a single goto label for freeing the buffer and returning an error.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  fs/xfs/xfs_buf.c | 15 +++++++--------
>  1 file changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 592800c8852f45..80be0333f077c0 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -721,16 +721,12 @@ xfs_buf_get_map(
>  		return error;
>  
>  	error = xfs_buf_allocate_memory(new_bp, flags);
> -	if (error) {
> -		xfs_buf_free(new_bp);
> -		return error;
> -	}
> +	if (error)
> +		goto out_free_buf;
>  
>  	error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
> -	if (error) {
> -		xfs_buf_free(new_bp);
> -		return error;
> -	}
> +	if (error)
> +		goto out_free_buf;
>  
>  	if (bp != new_bp)
>  		xfs_buf_free(new_bp);
> @@ -758,6 +754,9 @@ xfs_buf_get_map(
>  	trace_xfs_buf_get(bp, flags, _RET_IP_);
>  	*bpp = bp;
>  	return 0;
> +out_free_buf:
> +	xfs_buf_free(new_bp);
> +	return error;
>  }
>  
>  int
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages
  2021-05-20  5:23     ` Christoph Hellwig
@ 2021-05-25 22:43       ` Dave Chinner
  0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2021-05-25 22:43 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, May 20, 2021 at 07:23:35AM +0200, Christoph Hellwig wrote:
> On Thu, May 20, 2021 at 08:40:28AM +1000, Dave Chinner wrote:
> > This will not apply (and break) the bulk alloc patch I sent out - we
> > have to ensure that the b_pages array is always zeroed before we
> > call the bulk alloc function, hence I moved the memset() in this
> > function to be unconditional. I almost cleaned up this function in
> > that patchset....
> 
> The buffer is freshly allocated here using kmem_cache_zalloc, so
> b_pages can't be set, b_page_array is already zeroed from
> kmem_cache_zalloc, and the separate b_pages allocation is swithced
> to use kmem_zalloc.  I thought the commit log covers this, but maybe
> I need to improve it?

I think I'm still living in the past a bit, where the page array in
an active uncached buffer could change via the old "associate
memory" interface. We still actually have that interface in
userspace, but we don't have anything in the kernel that uses it any
more.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 08/11] xfs: centralize page allocation and freeing for buffers
  2021-05-20  5:35     ` Christoph Hellwig
@ 2021-05-25 23:59       ` Dave Chinner
  0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2021-05-25 23:59 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, May 20, 2021 at 07:35:04AM +0200, Christoph Hellwig wrote:
> On Thu, May 20, 2021 at 09:22:45AM +1000, Dave Chinner wrote:
> > Up until this point in the patch set you are pulling code out
> > of xfs_buf_alloc_pages() into helpers. Now you are getting rid of
> > the helpers and putting the slightly modified code back into
> > xfs_buf_alloc_pages(). This doesn't make any sense at all.
> 
> It makes a whole lot of sense, but it seems you don't like the
> structure :)
> 
> As stated in the commit log we now have one helper that sets a
> _XBF_PAGES backing with pages and the map, and one helper to
> tear it down.   I think it makes a whole lot of sense this way.

I don't like the way the patchset is built. It creates temporary
infrastructure, then tears it down again to return the code to
almost exactly the same structure that it originally had. In doing
this, you change the semantics of functions and helpers multiple
times yet, eventually, we end up with the same semantics as we
started with.

It's much more obvious to factor out the end helpers first, with the
exact semantics that the current have and will end up with, and then
just convert and clean up the code in and around those helpers. It's
much easier to follow and very correct if the function call
semnatics and behaviour don't keep changing...

> > The freeing helper now requires the buffer state to be
> > manipulated on allocation failure so that the free function doesn't
> > run off the end of the bp->b_pages array. That's a bit of a
> > landmine, and it doesn't really clean anything up much at all.
> 
> It is something we also do elsewhere in the kernel.  Another
> alternative would be to do a NULL check on the page, or to just
> pointlessly duplicate the freeing loop.

A null check in the freeing code is much simpler to understand at a
glance. It's easy to miss that the error handling only works because
callers have a single extra line of code that makes error handling
work correctly. This is a bad pattern because it's easy for new code
to get it wrong and have nobody notice that it's wrong.

> > And on the allocation side there is new "fail fast" behaviour
> > because you've lifted the readahead out of xfs_buf_alloc_pages. You
> > also lifted the zeroing checks, which I note that you immediately
> > put back inside xfs_buf_alloc_pages() in the next patch.
> 
> This is to clearly split code consolidatation from behavior changes.
> I could move both earlier at the downside of adding a lot of new
> code first that later gets removed.

Ah, what new code? factoring out the _alloc_pages() code at the same
time as the alloc_kmem() code is the only "new" code that is
necessary. Everything else is then consolidation, and this doesn't
require repeatedly changing behaviour and moving code out and back
into helpers....

> > I mean, like the factoring of xfs_buf_alloc_slab(), you could have
> > just factored out xfs_buf_alloc_pages(bp, page_count) from
> > xfs_buf_allocate_memory() and used that directly in
> > xfs_buf_get_uncached() and avoided a bunch of this factoring, make a
> > slight logic modification and recombine churn. And it would be
> > trivial to do on top of the bulk allocation patch which already
> > converts both of these functions to use bulk allocation....
> 
> As mentioned in the cover letter: the bulk allocation review is what
> trigger this as it tripped me following various lose ends.  And as
> usual I'd rather have that kind of change at the end where the
> surrounding code makes sense, so the rebased version is now is patch 11
> of this series.

I've re-written my patch based on this cleanup series. It largely
does all the same things, and ends up largely in the same place, but
does things in an order that doesn't keep changing behaviour and
repeatedly moving the same code around. I'll post it once I've QA'd
it.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2021-05-25 23:59 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-19 19:08 RFC: buffer cache backing page allocation cleanup Christoph Hellwig
2021-05-19 19:08 ` [PATCH 01/11] xfs: cleanup error handling in xfs_buf_get_map Christoph Hellwig
2021-05-20 23:43   ` Darrick J. Wong
2021-05-19 19:08 ` [PATCH 02/11] xfs: split xfs_buf_allocate_memory Christoph Hellwig
2021-05-19 22:36   ` Dave Chinner
2021-05-19 19:08 ` [PATCH 03/11] xfs: remove ->b_offset handling for page backed buffers Christoph Hellwig
2021-05-19 22:27   ` Dave Chinner
2021-05-19 19:08 ` [PATCH 04/11] xfs: cleanup _xfs_buf_get_pages Christoph Hellwig
2021-05-19 22:40   ` Dave Chinner
2021-05-20  5:23     ` Christoph Hellwig
2021-05-25 22:43       ` Dave Chinner
2021-05-19 19:08 ` [PATCH 05/11] xfs: remove the xb_page_found stat counter in xfs_buf_alloc_pages Christoph Hellwig
2021-05-19 22:55   ` Dave Chinner
2021-05-19 19:08 ` [PATCH 06/11] xfs: remove the size and nbytes variables " Christoph Hellwig
2021-05-19 22:56   ` Dave Chinner
2021-05-19 19:08 ` [PATCH 07/11] xfs: simplify the b_page_count calculation Christoph Hellwig
2021-05-19 19:08 ` [PATCH 08/11] xfs: centralize page allocation and freeing for buffers Christoph Hellwig
2021-05-19 23:22   ` Dave Chinner
2021-05-20  5:35     ` Christoph Hellwig
2021-05-25 23:59       ` Dave Chinner
2021-05-19 19:08 ` [PATCH 09/11] xfs: lift the buffer zeroing logic into xfs_buf_alloc_pages Christoph Hellwig
2021-05-19 19:08 ` [PATCH 10/11] xfs: retry allocations from xfs_buf_get_uncached as well Christoph Hellwig
2021-05-19 19:09 ` [PATCH 11/11] xfs: use alloc_pages_bulk_array() for buffers Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.