* [PATCH 1/3] mm: Add kvrealloc()
2021-07-14 2:34 [PATCH 0/3 v3] xfs, mm: memory allocation improvements Dave Chinner
@ 2021-07-14 2:34 ` Dave Chinner
2021-07-14 11:05 ` Mel Gorman
2021-07-14 21:15 ` Darrick J. Wong
2021-07-14 2:34 ` [PATCH 2/3] xfs: remove kmem_alloc_io() Dave Chinner
2021-07-14 2:34 ` [PATCH 3/3] xfs: replace kmem_alloc_large() with kvmalloc() Dave Chinner
2 siblings, 2 replies; 6+ messages in thread
From: Dave Chinner @ 2021-07-14 2:34 UTC (permalink / raw)
To: linux-xfs; +Cc: linux-mm
From: Dave Chinner <dchinner@redhat.com>
During log recovery of an XFS filesystem with 64kB directory
buffers, rebuilding a buffer split across two log records results
in a memory allocation warning from krealloc like this:
xfs filesystem being mounted at /mnt/scratch supports timestamps until 2038 (0x7fffffff)
XFS (dm-0): Unmounting Filesystem
XFS (dm-0): Mounting V5 Filesystem
XFS (dm-0): Starting recovery (logdev: internal)
------------[ cut here ]------------
WARNING: CPU: 5 PID: 3435170 at mm/page_alloc.c:3539 get_page_from_freelist+0xdee/0xe40
.....
RIP: 0010:get_page_from_freelist+0xdee/0xe40
Call Trace:
? complete+0x3f/0x50
__alloc_pages+0x16f/0x300
alloc_pages+0x87/0x110
kmalloc_order+0x2c/0x90
kmalloc_order_trace+0x1d/0x90
__kmalloc_track_caller+0x215/0x270
? xlog_recover_add_to_cont_trans+0x63/0x1f0
krealloc+0x54/0xb0
xlog_recover_add_to_cont_trans+0x63/0x1f0
xlog_recovery_process_trans+0xc1/0xd0
xlog_recover_process_ophdr+0x86/0x130
xlog_recover_process_data+0x9f/0x160
xlog_recover_process+0xa2/0x120
xlog_do_recovery_pass+0x40b/0x7d0
? __irq_work_queue_local+0x4f/0x60
? irq_work_queue+0x3a/0x50
xlog_do_log_recovery+0x70/0x150
xlog_do_recover+0x38/0x1d0
xlog_recover+0xd8/0x170
xfs_log_mount+0x181/0x300
xfs_mountfs+0x4a1/0x9b0
xfs_fs_fill_super+0x3c0/0x7b0
get_tree_bdev+0x171/0x270
? suffix_kstrtoint.constprop.0+0xf0/0xf0
xfs_fs_get_tree+0x15/0x20
vfs_get_tree+0x24/0xc0
path_mount+0x2f5/0xaf0
__x64_sys_mount+0x108/0x140
do_syscall_64+0x3a/0x70
entry_SYSCALL_64_after_hwframe+0x44/0xae
Essentially, we are taking a multi-order allocation from kmem_alloc()
(which has an open coded no fail, no warn loop) and then
reallocating it out to 64kB using krealloc(__GFP_NOFAIL) and that is
then triggering the above warning.
This is a regression caused by converting this code from an open
coded no fail/no warn reallocation loop to using __GFP_NOFAIL.
What we actually need here is kvrealloc(), so that if contiguous
page allocation fails we fall back to vmalloc() and we don't
get nasty warnings happening in XFS.
Fixes: 771915c4f688 ("xfs: remove kmem_realloc()")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log_recover.c | 4 +++-
include/linux/mm.h | 2 ++
mm/util.c | 15 +++++++++++++++
3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1721fce2ec94..3c08d495844d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2062,7 +2062,9 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = krealloc(old_ptr, len + old_len, GFP_KERNEL | __GFP_NOFAIL);
+ ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
memcpy(&ptr[old_len], dp, len);
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57453dba41b9..ff9c00bfb76d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -829,6 +829,8 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
return kvmalloc_array(n, size, flags | __GFP_ZERO);
}
+extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
+ gfp_t flags);
extern void kvfree(const void *addr);
extern void kvfree_sensitive(const void *addr, size_t len);
diff --git a/mm/util.c b/mm/util.c
index 99c6cc77de9e..b7698d6b2226 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -635,6 +635,21 @@ void kvfree_sensitive(const void *addr, size_t len)
}
EXPORT_SYMBOL(kvfree_sensitive);
+void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+{
+ void *newp;
+
+ if (oldsize >= newsize)
+ return (void *)p;
+ newp = kvmalloc(newsize, flags);
+ if (!newp)
+ return NULL;
+ memcpy(newp, p, oldsize);
+ kvfree(p);
+ return newp;
+}
+EXPORT_SYMBOL(kvrealloc);
+
static inline void *__page_rmapping(struct page *page)
{
unsigned long mapping;
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] mm: Add kvrealloc()
2021-07-14 2:34 ` [PATCH 1/3] mm: Add kvrealloc() Dave Chinner
@ 2021-07-14 11:05 ` Mel Gorman
2021-07-14 21:15 ` Darrick J. Wong
1 sibling, 0 replies; 6+ messages in thread
From: Mel Gorman @ 2021-07-14 11:05 UTC (permalink / raw)
To: Dave Chinner; +Cc: linux-xfs, linux-mm
On Wed, Jul 14, 2021 at 12:34:38PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> During log recovery of an XFS filesystem with 64kB directory
> buffers, rebuilding a buffer split across two log records results
> in a memory allocation warning from krealloc like this:
>
> xfs filesystem being mounted at /mnt/scratch supports timestamps until 2038 (0x7fffffff)
> XFS (dm-0): Unmounting Filesystem
> XFS (dm-0): Mounting V5 Filesystem
> XFS (dm-0): Starting recovery (logdev: internal)
> ------------[ cut here ]------------
> WARNING: CPU: 5 PID: 3435170 at mm/page_alloc.c:3539 get_page_from_freelist+0xdee/0xe40
> .....
> RIP: 0010:get_page_from_freelist+0xdee/0xe40
> Call Trace:
> ? complete+0x3f/0x50
> __alloc_pages+0x16f/0x300
> alloc_pages+0x87/0x110
> kmalloc_order+0x2c/0x90
> kmalloc_order_trace+0x1d/0x90
> __kmalloc_track_caller+0x215/0x270
> ? xlog_recover_add_to_cont_trans+0x63/0x1f0
> krealloc+0x54/0xb0
> xlog_recover_add_to_cont_trans+0x63/0x1f0
> xlog_recovery_process_trans+0xc1/0xd0
> xlog_recover_process_ophdr+0x86/0x130
> xlog_recover_process_data+0x9f/0x160
> xlog_recover_process+0xa2/0x120
> xlog_do_recovery_pass+0x40b/0x7d0
> ? __irq_work_queue_local+0x4f/0x60
> ? irq_work_queue+0x3a/0x50
> xlog_do_log_recovery+0x70/0x150
> xlog_do_recover+0x38/0x1d0
> xlog_recover+0xd8/0x170
> xfs_log_mount+0x181/0x300
> xfs_mountfs+0x4a1/0x9b0
> xfs_fs_fill_super+0x3c0/0x7b0
> get_tree_bdev+0x171/0x270
> ? suffix_kstrtoint.constprop.0+0xf0/0xf0
> xfs_fs_get_tree+0x15/0x20
> vfs_get_tree+0x24/0xc0
> path_mount+0x2f5/0xaf0
> __x64_sys_mount+0x108/0x140
> do_syscall_64+0x3a/0x70
> entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Essentially, we are taking a multi-order allocation from kmem_alloc()
> (which has an open coded no fail, no warn loop) and then
> reallocating it out to 64kB using krealloc(__GFP_NOFAIL) and that is
> then triggering the above warning.
>
> This is a regression caused by converting this code from an open
> coded no fail/no warn reallocation loop to using __GFP_NOFAIL.
>
> What we actually need here is kvrealloc(), so that if contiguous
> page allocation fails we fall back to vmalloc() and we don't
> get nasty warnings happening in XFS.
>
> Fixes: 771915c4f688 ("xfs: remove kmem_realloc()")
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
--
Mel Gorman
SUSE Labs
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] mm: Add kvrealloc()
2021-07-14 2:34 ` [PATCH 1/3] mm: Add kvrealloc() Dave Chinner
2021-07-14 11:05 ` Mel Gorman
@ 2021-07-14 21:15 ` Darrick J. Wong
1 sibling, 0 replies; 6+ messages in thread
From: Darrick J. Wong @ 2021-07-14 21:15 UTC (permalink / raw)
To: Dave Chinner; +Cc: linux-xfs, linux-mm
On Wed, Jul 14, 2021 at 12:34:38PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> During log recovery of an XFS filesystem with 64kB directory
> buffers, rebuilding a buffer split across two log records results
> in a memory allocation warning from krealloc like this:
>
> xfs filesystem being mounted at /mnt/scratch supports timestamps until 2038 (0x7fffffff)
> XFS (dm-0): Unmounting Filesystem
> XFS (dm-0): Mounting V5 Filesystem
> XFS (dm-0): Starting recovery (logdev: internal)
> ------------[ cut here ]------------
> WARNING: CPU: 5 PID: 3435170 at mm/page_alloc.c:3539 get_page_from_freelist+0xdee/0xe40
> .....
> RIP: 0010:get_page_from_freelist+0xdee/0xe40
> Call Trace:
> ? complete+0x3f/0x50
> __alloc_pages+0x16f/0x300
> alloc_pages+0x87/0x110
> kmalloc_order+0x2c/0x90
> kmalloc_order_trace+0x1d/0x90
> __kmalloc_track_caller+0x215/0x270
> ? xlog_recover_add_to_cont_trans+0x63/0x1f0
> krealloc+0x54/0xb0
> xlog_recover_add_to_cont_trans+0x63/0x1f0
> xlog_recovery_process_trans+0xc1/0xd0
> xlog_recover_process_ophdr+0x86/0x130
> xlog_recover_process_data+0x9f/0x160
> xlog_recover_process+0xa2/0x120
> xlog_do_recovery_pass+0x40b/0x7d0
> ? __irq_work_queue_local+0x4f/0x60
> ? irq_work_queue+0x3a/0x50
> xlog_do_log_recovery+0x70/0x150
> xlog_do_recover+0x38/0x1d0
> xlog_recover+0xd8/0x170
> xfs_log_mount+0x181/0x300
> xfs_mountfs+0x4a1/0x9b0
> xfs_fs_fill_super+0x3c0/0x7b0
> get_tree_bdev+0x171/0x270
> ? suffix_kstrtoint.constprop.0+0xf0/0xf0
> xfs_fs_get_tree+0x15/0x20
> vfs_get_tree+0x24/0xc0
> path_mount+0x2f5/0xaf0
> __x64_sys_mount+0x108/0x140
> do_syscall_64+0x3a/0x70
> entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Essentially, we are taking a multi-order allocation from kmem_alloc()
> (which has an open coded no fail, no warn loop) and then
> reallocating it out to 64kB using krealloc(__GFP_NOFAIL) and that is
> then triggering the above warning.
>
> This is a regression caused by converting this code from an open
> coded no fail/no warn reallocation loop to using __GFP_NOFAIL.
>
> What we actually need here is kvrealloc(), so that if contiguous
> page allocation fails we fall back to vmalloc() and we don't
> get nasty warnings happening in XFS.
>
> Fixes: 771915c4f688 ("xfs: remove kmem_realloc()")
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
Looks good now,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
--D
> ---
> fs/xfs/xfs_log_recover.c | 4 +++-
> include/linux/mm.h | 2 ++
> mm/util.c | 15 +++++++++++++++
> 3 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 1721fce2ec94..3c08d495844d 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2062,7 +2062,9 @@ xlog_recover_add_to_cont_trans(
> old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
> old_len = item->ri_buf[item->ri_cnt-1].i_len;
>
> - ptr = krealloc(old_ptr, len + old_len, GFP_KERNEL | __GFP_NOFAIL);
> + ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL);
> + if (!ptr)
> + return -ENOMEM;
> memcpy(&ptr[old_len], dp, len);
> item->ri_buf[item->ri_cnt-1].i_len += len;
> item->ri_buf[item->ri_cnt-1].i_addr = ptr;
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 57453dba41b9..ff9c00bfb76d 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -829,6 +829,8 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
> return kvmalloc_array(n, size, flags | __GFP_ZERO);
> }
>
> +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
> + gfp_t flags);
> extern void kvfree(const void *addr);
> extern void kvfree_sensitive(const void *addr, size_t len);
>
> diff --git a/mm/util.c b/mm/util.c
> index 99c6cc77de9e..b7698d6b2226 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -635,6 +635,21 @@ void kvfree_sensitive(const void *addr, size_t len)
> }
> EXPORT_SYMBOL(kvfree_sensitive);
>
> +void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
> +{
> + void *newp;
> +
> + if (oldsize >= newsize)
> + return (void *)p;
> + newp = kvmalloc(newsize, flags);
> + if (!newp)
> + return NULL;
> + memcpy(newp, p, oldsize);
> + kvfree(p);
> + return newp;
> +}
> +EXPORT_SYMBOL(kvrealloc);
> +
> static inline void *__page_rmapping(struct page *page)
> {
> unsigned long mapping;
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/3] xfs: remove kmem_alloc_io()
2021-07-14 2:34 [PATCH 0/3 v3] xfs, mm: memory allocation improvements Dave Chinner
2021-07-14 2:34 ` [PATCH 1/3] mm: Add kvrealloc() Dave Chinner
@ 2021-07-14 2:34 ` Dave Chinner
2021-07-14 2:34 ` [PATCH 3/3] xfs: replace kmem_alloc_large() with kvmalloc() Dave Chinner
2 siblings, 0 replies; 6+ messages in thread
From: Dave Chinner @ 2021-07-14 2:34 UTC (permalink / raw)
To: linux-xfs; +Cc: linux-mm
From: Dave Chinner <dchinner@redhat.com>
Since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment
for kmalloc(power-of-two)"), the core slab code now guarantees slab
alignment in all situations sufficient for IO purposes (i.e. minimum
of 512 byte alignment of >= 512 byte sized heap allocations) we no
longer need the workaround in the XFS code to provide this
guarantee.
Replace the use of kmem_alloc_io() with kmem_alloc() or
kmem_alloc_large() appropriately, and remove the kmem_alloc_io()
interface altogether.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/kmem.c | 25 -------------------------
fs/xfs/kmem.h | 1 -
fs/xfs/xfs_buf.c | 3 +--
fs/xfs/xfs_buf.h | 6 ------
fs/xfs/xfs_log.c | 3 +--
fs/xfs/xfs_log_recover.c | 4 +---
fs/xfs/xfs_trace.h | 1 -
7 files changed, 3 insertions(+), 40 deletions(-)
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index e986b95d94c9..3f2979fd2f2b 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -56,31 +56,6 @@ __kmem_vmalloc(size_t size, xfs_km_flags_t flags)
return ptr;
}
-/*
- * Same as kmem_alloc_large, except we guarantee the buffer returned is aligned
- * to the @align_mask. We only guarantee alignment up to page size, we'll clamp
- * alignment at page size if it is larger. vmalloc always returns a PAGE_SIZE
- * aligned region.
- */
-void *
-kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags)
-{
- void *ptr;
-
- trace_kmem_alloc_io(size, flags, _RET_IP_);
-
- if (WARN_ON_ONCE(align_mask >= PAGE_SIZE))
- align_mask = PAGE_SIZE - 1;
-
- ptr = kmem_alloc(size, flags | KM_MAYFAIL);
- if (ptr) {
- if (!((uintptr_t)ptr & align_mask))
- return ptr;
- kfree(ptr);
- }
- return __kmem_vmalloc(size, flags);
-}
-
void *
kmem_alloc_large(size_t size, xfs_km_flags_t flags)
{
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 38007117697e..9ff20047f8b8 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -57,7 +57,6 @@ kmem_flags_convert(xfs_km_flags_t flags)
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
-extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags);
extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
{
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8ff42b3585e0..a5ef1f9eb622 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -315,7 +315,6 @@ xfs_buf_alloc_kmem(
struct xfs_buf *bp,
xfs_buf_flags_t flags)
{
- int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
xfs_km_flags_t kmflag_mask = KM_NOFS;
size_t size = BBTOB(bp->b_length);
@@ -323,7 +322,7 @@ xfs_buf_alloc_kmem(
if (!(flags & XBF_READ))
kmflag_mask |= KM_ZERO;
- bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
+ bp->b_addr = kmem_alloc(size, kmflag_mask);
if (!bp->b_addr)
return -ENOMEM;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 464dc548fa23..cfbe37d73293 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -355,12 +355,6 @@ extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
-static inline int
-xfs_buftarg_dma_alignment(struct xfs_buftarg *bt)
-{
- return queue_dma_alignment(bt->bt_bdev->bd_disk->queue);
-}
-
int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 36fa2650b081..826b3cf5fd72 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1451,7 +1451,6 @@ xlog_alloc_log(
*/
ASSERT(log->l_iclog_size >= 4096);
for (i = 0; i < log->l_iclog_bufs; i++) {
- int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp);
size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
sizeof(struct bio_vec);
@@ -1463,7 +1462,7 @@ xlog_alloc_log(
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
- iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
+ iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
KM_MAYFAIL | KM_ZERO);
if (!iclog->ic_data)
goto out_free_iclog;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 3c08d495844d..d55fc7caa227 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -79,8 +79,6 @@ xlog_alloc_buffer(
struct xlog *log,
int nbblks)
{
- int align_mask = xfs_buftarg_dma_alignment(log->l_targ);
-
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
@@ -108,7 +106,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO);
+ return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL | KM_ZERO);
}
/*
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f9d8d605f9b1..6865e838a71b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3689,7 +3689,6 @@ DEFINE_EVENT(xfs_kmem_class, name, \
TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \
TP_ARGS(size, flags, caller_ip))
DEFINE_KMEM_EVENT(kmem_alloc);
-DEFINE_KMEM_EVENT(kmem_alloc_io);
DEFINE_KMEM_EVENT(kmem_alloc_large);
TRACE_EVENT(xfs_check_new_dalign,
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/3] xfs: replace kmem_alloc_large() with kvmalloc()
2021-07-14 2:34 [PATCH 0/3 v3] xfs, mm: memory allocation improvements Dave Chinner
2021-07-14 2:34 ` [PATCH 1/3] mm: Add kvrealloc() Dave Chinner
2021-07-14 2:34 ` [PATCH 2/3] xfs: remove kmem_alloc_io() Dave Chinner
@ 2021-07-14 2:34 ` Dave Chinner
2 siblings, 0 replies; 6+ messages in thread
From: Dave Chinner @ 2021-07-14 2:34 UTC (permalink / raw)
To: linux-xfs; +Cc: linux-mm
From: Dave Chinner <dchinner@redhat.com>
There is no reason for this wrapper existing anymore. All the places
that use KM_NOFS allocation are within transaction contexts and
hence covered by memalloc_nofs_save/restore contexts. Hence we don't
need any special handling of vmalloc for large IOs anymore and
so special casing this code isn't necessary.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/kmem.c | 39 -----------------------------------
fs/xfs/kmem.h | 1 -
fs/xfs/libxfs/xfs_attr_leaf.c | 2 +-
fs/xfs/scrub/attr.c | 14 +++++++------
fs/xfs/scrub/attr.h | 3 ---
fs/xfs/xfs_log.c | 4 ++--
fs/xfs/xfs_log_cil.c | 10 ++++++++-
fs/xfs/xfs_log_recover.c | 2 +-
fs/xfs/xfs_trace.h | 1 -
9 files changed, 21 insertions(+), 55 deletions(-)
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 3f2979fd2f2b..6f49bf39183c 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -29,42 +29,3 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}
-
-
-/*
- * __vmalloc() will allocate data pages and auxiliary structures (e.g.
- * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context here. Hence
- * we need to tell memory reclaim that we are in such a context via
- * PF_MEMALLOC_NOFS to prevent memory reclaim re-entering the filesystem here
- * and potentially deadlocking.
- */
-static void *
-__kmem_vmalloc(size_t size, xfs_km_flags_t flags)
-{
- unsigned nofs_flag = 0;
- void *ptr;
- gfp_t lflags = kmem_flags_convert(flags);
-
- if (flags & KM_NOFS)
- nofs_flag = memalloc_nofs_save();
-
- ptr = __vmalloc(size, lflags);
-
- if (flags & KM_NOFS)
- memalloc_nofs_restore(nofs_flag);
-
- return ptr;
-}
-
-void *
-kmem_alloc_large(size_t size, xfs_km_flags_t flags)
-{
- void *ptr;
-
- trace_kmem_alloc_large(size, flags, _RET_IP_);
-
- ptr = kmem_alloc(size, flags | KM_MAYFAIL);
- if (ptr)
- return ptr;
- return __kmem_vmalloc(size, flags);
-}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 9ff20047f8b8..54da6d717a06 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -57,7 +57,6 @@ kmem_flags_convert(xfs_km_flags_t flags)
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
-extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
{
kvfree(ptr);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index b910bd209949..16d64872acc0 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -489,7 +489,7 @@ xfs_attr_copy_value(
}
if (!args->value) {
- args->value = kmem_alloc_large(valuelen, KM_NOLOCKDEP);
+ args->value = kvmalloc(valuelen, GFP_KERNEL | __GFP_NOLOCKDEP);
if (!args->value)
return -ENOMEM;
}
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 552af0cf8482..6c36af6dbd35 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -25,11 +25,11 @@
* reallocating the buffer if necessary. Buffer contents are not preserved
* across a reallocation.
*/
-int
+static int
xchk_setup_xattr_buf(
struct xfs_scrub *sc,
size_t value_size,
- xfs_km_flags_t flags)
+ gfp_t flags)
{
size_t sz;
struct xchk_xattr_buf *ab = sc->buf;
@@ -57,7 +57,7 @@ xchk_setup_xattr_buf(
* Don't zero the buffer upon allocation to avoid runtime overhead.
* All users must be careful never to read uninitialized contents.
*/
- ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+ ab = kvmalloc(sizeof(*ab) + sz, flags);
if (!ab)
return -ENOMEM;
@@ -79,7 +79,7 @@ xchk_setup_xattr(
* without the inode lock held, which means we can sleep.
*/
if (sc->flags & XCHK_TRY_HARDER) {
- error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, 0);
+ error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL);
if (error)
return error;
}
@@ -138,7 +138,8 @@ xchk_xattr_listent(
* doesn't work, we overload the seen_enough variable to convey
* the error message back to the main scrub function.
*/
- error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+ error = xchk_setup_xattr_buf(sx->sc, valuelen,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (error == -ENOMEM)
error = -EDEADLOCK;
if (error) {
@@ -323,7 +324,8 @@ xchk_xattr_block(
return 0;
/* Allocate memory for block usage checking. */
- error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+ error = xchk_setup_xattr_buf(ds->sc, 0,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (error == -ENOMEM)
return -EDEADLOCK;
if (error)
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
index 13a1d2e8424d..1719e1c4da59 100644
--- a/fs/xfs/scrub/attr.h
+++ b/fs/xfs/scrub/attr.h
@@ -65,7 +65,4 @@ xchk_xattr_dstmap(
BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
}
-int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
- xfs_km_flags_t flags);
-
#endif /* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 826b3cf5fd72..ec3668508acf 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1462,8 +1462,8 @@ xlog_alloc_log(
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
- iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
- KM_MAYFAIL | KM_ZERO);
+ iclog->ic_data = kvzalloc(log->l_iclog_size,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!iclog->ic_data)
goto out_free_iclog;
#ifdef DEBUG
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b128aaa9b870..d162e8b83e90 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -185,7 +185,15 @@ xlog_cil_alloc_shadow_bufs(
*/
kmem_free(lip->li_lv_shadow);
- lv = kmem_alloc_large(buf_size, KM_NOFS);
+ /*
+ * We are in transaction context, which means this
+ * allocation will pick up GFP_NOFS from the
+ * memalloc_nofs_save/restore context the transaction
+ * holds. This means we can use GFP_KERNEL here so the
+ * generic kvmalloc() code will run vmalloc on
+ * contiguous page allocation failure as we require.
+ */
+ lv = kvmalloc(buf_size, GFP_KERNEL);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d55fc7caa227..16df33f838cc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -106,7 +106,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL | KM_ZERO);
+ return kvzalloc(BBTOB(nbblks), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
}
/*
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6865e838a71b..38f2f67303f7 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3689,7 +3689,6 @@ DEFINE_EVENT(xfs_kmem_class, name, \
TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \
TP_ARGS(size, flags, caller_ip))
DEFINE_KMEM_EVENT(kmem_alloc);
-DEFINE_KMEM_EVENT(kmem_alloc_large);
TRACE_EVENT(xfs_check_new_dalign,
TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread