* [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
@ 2022-05-10 11:38 Qi Zheng
2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-10 11:38 UTC (permalink / raw)
To: akinobu.mita, akpm, vbabka, gregkh, jirislaby, rostedt
Cc: linux-kernel, linux-mm, Qi Zheng
We expect no warnings to be issued when we specify __GFP_NOWARN, but
currently in paths like alloc_pages() and kmalloc(), there are still
some warnings printed, fix it.
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
include/linux/fault-inject.h | 2 ++
lib/fault-inject.c | 3 +++
mm/failslab.c | 3 +++
mm/internal.h | 11 +++++++++++
mm/page_alloc.c | 22 ++++++++++++----------
5 files changed, 31 insertions(+), 10 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 2d04f6448cde..9f6e25467844 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,6 +20,7 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
+ bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -39,6 +40,7 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
+ .no_warn = false, \
}
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index ce12621b4275..423784d9c058 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr)
{
+ if (attr->no_warn)
+ return;
+
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
diff --git a/mm/failslab.c b/mm/failslab.c
index f92fed91ac23..58df9789f1d2 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;
+ if (gfpflags & __GFP_NOWARN)
+ failslab.attr.no_warn = true;
+
return should_fail(&failslab.attr, s->object_size);
}
diff --git a/mm/internal.h b/mm/internal.h
index cf16280ce132..7a268fac6559 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -35,6 +35,17 @@ struct folio_batch;
/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
+#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
void page_writeback_init(void);
static inline void *folio_raw_mapping(struct folio *folio)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0e42038382c1..2bf4ce4d0e2f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
* We most definitely don't want callers attempting to
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
- WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+ WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), gfp_flags);
do {
page = NULL;
@@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
+ if (gfp_mask & __GFP_NOWARN)
+ fail_page_alloc.attr.no_warn = true;
+
return should_fail(&fail_page_alloc.attr, 1 << order);
}
@@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
*/
/* Exhausted what can be done so it's blame time */
- if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
+ if (out_of_memory(&oc) ||
+ WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
*did_some_progress = 1;
/*
@@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* We also sanity check to catch abuse of atomic reserves being used by
* callers that are not in atomic context.
*/
- if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
- (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+ if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+ (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
gfp_mask &= ~__GFP_ATOMIC;
retry_cpuset:
@@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* All existing users of the __GFP_NOFAIL are blockable, so warn
* of any new users that actually require GFP_NOWAIT
*/
- if (WARN_ON_ONCE(!can_direct_reclaim))
+ if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
goto fail;
/*
@@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* because we cannot reclaim anything and only can loop waiting
* for somebody to do a work for us
*/
- WARN_ON_ONCE(current->flags & PF_MEMALLOC);
+ WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
/*
* non failing costly orders are a hard requirement which we
@@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* so that we can identify them and convert them to something
* else.
*/
- WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
+ WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
/*
* Help non-failing allocations by giving them access to memory
@@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
* There are several places where we assume that the order value is sane
* so bail out early if the request is out of bound.
*/
- if (unlikely(order >= MAX_ORDER)) {
- WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
+ if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
return NULL;
- }
gfp &= gfp_allowed_mask;
/*
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock
2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
@ 2022-05-10 11:38 ` Qi Zheng
2022-05-11 5:57 ` Jiri Slaby
2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
2022-05-11 5:12 ` Qi Zheng
2 siblings, 1 reply; 10+ messages in thread
From: Qi Zheng @ 2022-05-10 11:38 UTC (permalink / raw)
To: akinobu.mita, akpm, vbabka, gregkh, jirislaby, rostedt
Cc: linux-kernel, linux-mm, Qi Zheng
The pty_write() invokes kmalloc() which may invoke a normal printk() to
print failure message. This can cause a deadlock in the scenario reported
by syz-bot below:
CPU0 CPU1 CPU2
---- ---- ----
lock(console_owner);
lock(&port_lock_key);
lock(&port->lock);
lock(&port_lock_key);
lock(&port->lock);
lock(console_owner);
As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
load balance console writes") said, such deadlock can be prevented by
using printk_deferred() in kmalloc() (which is invoked in the section
guarded by the port->lock). But there are too many printk() on the
kmalloc() path, and kmalloc() can be called from anywhere, so changing
printk() to printk_deferred() is too complicated and inelegant.
Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
that printk() will not be called, and this deadlock problem can be avoided.
Syz-bot reported the following lockdep error:
======================================================
WARNING: possible circular locking dependency detected
5.4.143-00237-g08ccc19a-dirty #10 Not tainted
------------------------------------------------------
syz-executor.4/29420 is trying to acquire lock:
ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: console_trylock_spinning kernel/printk/printk.c:1752 [inline]
ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
but task is already holding lock:
ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 drivers/tty/pty.c:120
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&port->lock){-.-.}-{2:2}:
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
tty_port_tty_get drivers/tty/tty_port.c:288 [inline] <-- lock(&port->lock);
tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
serial8250_tx_chars+0x530/0xa80 drivers/tty/serial/8250/8250_port.c:1767
serial8250_handle_irq.part.0+0x31f/0x3d0 drivers/tty/serial/8250/8250_port.c:1854
serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 [inline] <-- lock(&port_lock_key);
serial8250_default_handle_irq+0xb2/0x220 drivers/tty/serial/8250/8250_port.c:1870
serial8250_interrupt+0xfd/0x200 drivers/tty/serial/8250/8250_core.c:126
__handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
ret_from_intr+0x0/0x19
native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
cpuidle_idle_call kernel/sched/idle.c:184 [inline]
do_idle+0x44c/0x590 kernel/sched/idle.c:294
cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
-> #1 (&port_lock_key){-.-.}-{2:2}:
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
serial8250_console_write+0x184/0xa40 drivers/tty/serial/8250/8250_port.c:3198
<-- lock(&port_lock_key);
call_console_drivers kernel/printk/printk.c:1819 [inline]
console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
vprintk_emit+0x1b5/0x470 kernel/printk/printk.c:2024 <-- lock(console_owner);
vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
printk+0xba/0xed kernel/printk/printk.c:2084
register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
univ8250_console_init+0x3a/0x46 drivers/tty/serial/8250/8250_core.c:681
console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
start_kernel+0x5e9/0x879 init/main.c:713
secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
-> #0 (console_owner){....}-{0:0}:
check_prev_add kernel/locking/lockdep.c:2600 [inline]
check_prevs_add kernel/locking/lockdep.c:2705 [inline]
validate_chain kernel/locking/lockdep.c:3095 [inline]
__lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
console_trylock_spinning kernel/printk/printk.c:1773 [inline] <-- lock(console_owner);
vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
printk+0xba/0xed kernel/printk/printk.c:2084
fail_dump lib/fault-inject.c:45 [inline]
should_fail+0x67b/0x7c0 lib/fault-inject.c:144
__should_failslab+0x152/0x1c0 mm/failslab.c:33
should_failslab+0x5/0x10 mm/slab_common.c:1224
slab_pre_alloc_hook mm/slab.h:468 [inline]
slab_alloc_node mm/slub.c:2723 [inline]
slab_alloc mm/slub.c:2807 [inline]
__kmalloc+0x72/0x300 mm/slub.c:3871
kmalloc include/linux/slab.h:582 [inline]
tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
__tty_buffer_request_room+0x156/0x2a0 drivers/tty/tty_buffer.c:273
tty_insert_flip_string_fixed_flag+0x93/0x250 drivers/tty/tty_buffer.c:318
tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
pty_write+0x126/0x1f0 drivers/tty/pty.c:122 <-- lock(&port->lock);
n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
do_tty_write drivers/tty/tty_io.c:961 [inline]
tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
__vfs_write+0x76/0x100 fs/read_write.c:494
vfs_write+0x268/0x5c0 fs/read_write.c:558
ksys_write+0x12d/0x250 fs/read_write.c:611
do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Chain exists of:
console_owner --> &port_lock_key --> &port->lock
Fixes: b6da31b2c07c ("tty: Fix data race in tty_insert_flip_string_fixed_flag")
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
drivers/tty/tty_buffer.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 646510476c30..bfa431a8e690 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
*/
if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
return NULL;
- p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
+ p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
+ GFP_ATOMIC | __GFP_NOWARN);
if (p == NULL)
return NULL;
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
@ 2022-05-10 18:59 ` Andrew Morton
2022-05-11 2:19 ` Qi Zheng
2022-05-11 5:12 ` Qi Zheng
2 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2022-05-10 18:59 UTC (permalink / raw)
To: Qi Zheng
Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm
On Tue, 10 May 2022 19:38:08 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> We expect no warnings to be issued when we specify __GFP_NOWARN, but
> currently in paths like alloc_pages() and kmalloc(), there are still
> some warnings printed, fix it.
Looks sane to me.
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -35,6 +35,17 @@ struct folio_batch;
> /* Do not use these with a slab allocator */
> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>
> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
> + static bool __section(".data.once") __warned; \
> + int __ret_warn_once = !!(cond); \
> + \
> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> + __warned = true; \
> + WARN_ON(1); \
> + } \
> + unlikely(__ret_warn_once); \
> +})
I don't think WARN_ON_ONCE_GFP is a good name for this. But
WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.
> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * We also sanity check to catch abuse of atomic reserves being used by
> * callers that are not in atomic context.
> */
> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> gfp_mask &= ~__GFP_ATOMIC;
>
> retry_cpuset:
I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
(https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
deleted this code.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
@ 2022-05-11 2:19 ` Qi Zheng
2022-05-11 2:32 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: Qi Zheng @ 2022-05-11 2:19 UTC (permalink / raw)
To: Andrew Morton
Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm
On 2022/5/11 2:59 AM, Andrew Morton wrote:
> On Tue, 10 May 2022 19:38:08 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>
>> We expect no warnings to be issued when we specify __GFP_NOWARN, but
>> currently in paths like alloc_pages() and kmalloc(), there are still
>> some warnings printed, fix it.
>
> Looks sane to me.
>
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -35,6 +35,17 @@ struct folio_batch;
>> /* Do not use these with a slab allocator */
>> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>>
>> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
>> + static bool __section(".data.once") __warned; \
>> + int __ret_warn_once = !!(cond); \
>> + \
>> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
>> + __warned = true; \
>> + WARN_ON(1); \
>> + } \
>> + unlikely(__ret_warn_once); \
>> +})
>
> I don't think WARN_ON_ONCE_GFP is a good name for this. But
> WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
>
> WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.
I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?
>
>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> * We also sanity check to catch abuse of atomic reserves being used by
>> * callers that are not in atomic context.
>> */
>> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>> gfp_mask &= ~__GFP_ATOMIC;
>>
>> retry_cpuset:
>
> I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
> (https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
> deleted this code.
>
This series is based on v5.18-rc5, I will rebase it to the latest next
branch and check if there are any missing WARN_ON_ONCEs that are not
being handled.
Thanks,
Qi
--
Thanks,
Qi
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
2022-05-11 2:19 ` Qi Zheng
@ 2022-05-11 2:32 ` Andrew Morton
2022-05-11 2:36 ` Qi Zheng
0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2022-05-11 2:32 UTC (permalink / raw)
To: Qi Zheng
Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm
On Wed, 11 May 2022 10:19:48 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>
> ,,,
> >> --- a/mm/internal.h
> >> +++ b/mm/internal.h
> >> @@ -35,6 +35,17 @@ struct folio_batch;
> >> /* Do not use these with a slab allocator */
> >> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
> >>
> >> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
> >> + static bool __section(".data.once") __warned; \
> >> + int __ret_warn_once = !!(cond); \
> >> + \
> >> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> >> + __warned = true; \
> >> + WARN_ON(1); \
> >> + } \
> >> + unlikely(__ret_warn_once); \
> >> +})
> >
> > I don't think WARN_ON_ONCE_GFP is a good name for this. But
> > WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
> >
> > WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.
>
> I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
> putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?
I dunno. WARN_ON_ONCE_GFP isn't too bad I suppose. Add a comment over
the definition explaining it?
> >
> >> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> >> * We also sanity check to catch abuse of atomic reserves being used by
> >> * callers that are not in atomic context.
> >> */
> >> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> >> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> >> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> >> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> >> gfp_mask &= ~__GFP_ATOMIC;
> >>
> >> retry_cpuset:
> >
> > I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
> > (https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
> > deleted this code.
> >
>
> This series is based on v5.18-rc5, I will rebase it to the latest next
> branch and check if there are any missing WARN_ON_ONCEs that are not
> being handled.
Against git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm branch
mm-unstable, please. That ends up in linux-next, with a delay.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
2022-05-11 2:32 ` Andrew Morton
@ 2022-05-11 2:36 ` Qi Zheng
0 siblings, 0 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-11 2:36 UTC (permalink / raw)
To: Andrew Morton
Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm
On 2022/5/11 10:32 AM, Andrew Morton wrote:
> On Wed, 11 May 2022 10:19:48 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>
>>
>> ,,,
>>>> --- a/mm/internal.h
>>>> +++ b/mm/internal.h
>>>> @@ -35,6 +35,17 @@ struct folio_batch;
>>>> /* Do not use these with a slab allocator */
>>>> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>>>>
>>>> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
>>>> + static bool __section(".data.once") __warned; \
>>>> + int __ret_warn_once = !!(cond); \
>>>> + \
>>>> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
>>>> + __warned = true; \
>>>> + WARN_ON(1); \
>>>> + } \
>>>> + unlikely(__ret_warn_once); \
>>>> +})
>>>
>>> I don't think WARN_ON_ONCE_GFP is a good name for this. But
>>> WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
>>>
>>> WARN_ON_ONCE_NOWARN might be better. No strong opinion here, really.
>>
>> I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
>> putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?
>
> I dunno. WARN_ON_ONCE_GFP isn't too bad I suppose. Add a comment over
> the definition explaining it?
OK, I will add a comment to it.
>
>>>
>>>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>>> * We also sanity check to catch abuse of atomic reserves being used by
>>>> * callers that are not in atomic context.
>>>> */
>>>> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>>>> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>>>> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>>>> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>>>> gfp_mask &= ~__GFP_ATOMIC;
>>>>
>>>> retry_cpuset:
>>>
>>> I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
>>> (https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
>>> deleted this code.
>>>
>>
>> This series is based on v5.18-rc5, I will rebase it to the latest next
>> branch and check if there are any missing WARN_ON_ONCEs that are not
>> being handled.
>
> Against git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm branch
> mm-unstable, please. That ends up in linux-next, with a delay.
OK, will do.
--
Thanks,
Qi
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
@ 2022-05-11 5:12 ` Qi Zheng
2022-05-11 5:28 ` Qi Zheng
2 siblings, 1 reply; 10+ messages in thread
From: Qi Zheng @ 2022-05-11 5:12 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, akinobu.mita, vbabka, gregkh, jirislaby, rostedt
On 2022/5/10 7:38 PM, Qi Zheng wrote:
> We expect no warnings to be issued when we specify __GFP_NOWARN, but
> currently in paths like alloc_pages() and kmalloc(), there are still
> some warnings printed, fix it.
Hi Andrew,
Maybe we only need to deal with memory allocation failures, such as
should_fail() (This leads to deadlock in PATCH[2/2]). These
WARN_ON_ONCE()s report usage problems and should not be printed. If they
are printed, we should fix these usage problems.
Thanks,
Qi
>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
> include/linux/fault-inject.h | 2 ++
> lib/fault-inject.c | 3 +++
> mm/failslab.c | 3 +++
> mm/internal.h | 11 +++++++++++
> mm/page_alloc.c | 22 ++++++++++++----------
> 5 files changed, 31 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index 2d04f6448cde..9f6e25467844 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -20,6 +20,7 @@ struct fault_attr {
> atomic_t space;
> unsigned long verbose;
> bool task_filter;
> + bool no_warn;
> unsigned long stacktrace_depth;
> unsigned long require_start;
> unsigned long require_end;
> @@ -39,6 +40,7 @@ struct fault_attr {
> .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
> .verbose = 2, \
> .dname = NULL, \
> + .no_warn = false, \
> }
>
> #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
> index ce12621b4275..423784d9c058 100644
> --- a/lib/fault-inject.c
> +++ b/lib/fault-inject.c
> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>
> static void fail_dump(struct fault_attr *attr)
> {
> + if (attr->no_warn)
> + return;
> +
> if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
> printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
> "name %pd, interval %lu, probability %lu, "
> diff --git a/mm/failslab.c b/mm/failslab.c
> index f92fed91ac23..58df9789f1d2 100644
> --- a/mm/failslab.c
> +++ b/mm/failslab.c
> @@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
> return false;
>
> + if (gfpflags & __GFP_NOWARN)
> + failslab.attr.no_warn = true;
> +
> return should_fail(&failslab.attr, s->object_size);
> }
>
> diff --git a/mm/internal.h b/mm/internal.h
> index cf16280ce132..7a268fac6559 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -35,6 +35,17 @@ struct folio_batch;
> /* Do not use these with a slab allocator */
> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>
> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
> + static bool __section(".data.once") __warned; \
> + int __ret_warn_once = !!(cond); \
> + \
> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> + __warned = true; \
> + WARN_ON(1); \
> + } \
> + unlikely(__ret_warn_once); \
> +})
> +
> void page_writeback_init(void);
>
> static inline void *folio_raw_mapping(struct folio *folio)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0e42038382c1..2bf4ce4d0e2f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
> * We most definitely don't want callers attempting to
> * allocate greater than order-1 page units with __GFP_NOFAIL.
> */
> - WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
> + WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), gfp_flags);
>
> do {
> page = NULL;
> @@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
> (gfp_mask & __GFP_DIRECT_RECLAIM))
> return false;
>
> + if (gfp_mask & __GFP_NOWARN)
> + fail_page_alloc.attr.no_warn = true;
> +
> return should_fail(&fail_page_alloc.attr, 1 << order);
> }
>
> @@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> */
>
> /* Exhausted what can be done so it's blame time */
> - if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
> + if (out_of_memory(&oc) ||
> + WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
> *did_some_progress = 1;
>
> /*
> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * We also sanity check to catch abuse of atomic reserves being used by
> * callers that are not in atomic context.
> */
> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> + if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> gfp_mask &= ~__GFP_ATOMIC;
>
> retry_cpuset:
> @@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * All existing users of the __GFP_NOFAIL are blockable, so warn
> * of any new users that actually require GFP_NOWAIT
> */
> - if (WARN_ON_ONCE(!can_direct_reclaim))
> + if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
> goto fail;
>
> /*
> @@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * because we cannot reclaim anything and only can loop waiting
> * for somebody to do a work for us
> */
> - WARN_ON_ONCE(current->flags & PF_MEMALLOC);
> + WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
>
> /*
> * non failing costly orders are a hard requirement which we
> @@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> * so that we can identify them and convert them to something
> * else.
> */
> - WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
> + WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
>
> /*
> * Help non-failing allocations by giving them access to memory
> @@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
> * There are several places where we assume that the order value is sane
> * so bail out early if the request is out of bound.
> */
> - if (unlikely(order >= MAX_ORDER)) {
> - WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
> + if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
> return NULL;
> - }
>
> gfp &= gfp_allowed_mask;
> /*
--
Thanks,
Qi
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
2022-05-11 5:12 ` Qi Zheng
@ 2022-05-11 5:28 ` Qi Zheng
0 siblings, 0 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-11 5:28 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, akinobu.mita, vbabka, gregkh, jirislaby, rostedt
On 2022/5/11 1:12 PM, Qi Zheng wrote:
>
>
> On 2022/5/10 7:38 PM, Qi Zheng wrote:
>> We expect no warnings to be issued when we specify __GFP_NOWARN, but
>> currently in paths like alloc_pages() and kmalloc(), there are still
>> some warnings printed, fix it.
>
> Hi Andrew,
>
> Maybe we only need to deal with memory allocation failures, such as
> should_fail() (This leads to deadlock in PATCH[2/2]). These
> WARN_ON_ONCE()s report usage problems and should not be printed. If they
such as WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1),
gfp_flags).
> are printed, we should fix these usage problems.
>
> Thanks,
> Qi
>
>
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>> include/linux/fault-inject.h | 2 ++
>> lib/fault-inject.c | 3 +++
>> mm/failslab.c | 3 +++
>> mm/internal.h | 11 +++++++++++
>> mm/page_alloc.c | 22 ++++++++++++----------
>> 5 files changed, 31 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>> index 2d04f6448cde..9f6e25467844 100644
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -20,6 +20,7 @@ struct fault_attr {
>> atomic_t space;
>> unsigned long verbose;
>> bool task_filter;
>> + bool no_warn;
>> unsigned long stacktrace_depth;
>> unsigned long require_start;
>> unsigned long require_end;
>> @@ -39,6 +40,7 @@ struct fault_attr {
>> .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
>> .verbose = 2, \
>> .dname = NULL, \
>> + .no_warn = false, \
>> }
>> #define DECLARE_FAULT_ATTR(name) struct fault_attr name =
>> FAULT_ATTR_INITIALIZER
>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>> index ce12621b4275..423784d9c058 100644
>> --- a/lib/fault-inject.c
>> +++ b/lib/fault-inject.c
>> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>> static void fail_dump(struct fault_attr *attr)
>> {
>> + if (attr->no_warn)
>> + return;
>> +
>> if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>> printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>> "name %pd, interval %lu, probability %lu, "
>> diff --git a/mm/failslab.c b/mm/failslab.c
>> index f92fed91ac23..58df9789f1d2 100644
>> --- a/mm/failslab.c
>> +++ b/mm/failslab.c
>> @@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t
>> gfpflags)
>> if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>> return false;
>> + if (gfpflags & __GFP_NOWARN)
>> + failslab.attr.no_warn = true;
>> +
>> return should_fail(&failslab.attr, s->object_size);
>> }
>> diff --git a/mm/internal.h b/mm/internal.h
>> index cf16280ce132..7a268fac6559 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -35,6 +35,17 @@ struct folio_batch;
>> /* Do not use these with a slab allocator */
>> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>> +#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
>> + static bool __section(".data.once") __warned; \
>> + int __ret_warn_once = !!(cond); \
>> + \
>> + if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once &&
>> !__warned)) { \
>> + __warned = true; \
>> + WARN_ON(1); \
>> + } \
>> + unlikely(__ret_warn_once); \
>> +})
>> +
>> void page_writeback_init(void);
>> static inline void *folio_raw_mapping(struct folio *folio)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 0e42038382c1..2bf4ce4d0e2f 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
>> * We most definitely don't want callers attempting to
>> * allocate greater than order-1 page units with __GFP_NOFAIL.
>> */
>> - WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
>> + WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1),
>> gfp_flags);
>> do {
>> page = NULL;
>> @@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t
>> gfp_mask, unsigned int order)
>> (gfp_mask & __GFP_DIRECT_RECLAIM))
>> return false;
>> + if (gfp_mask & __GFP_NOWARN)
>> + fail_page_alloc.attr.no_warn = true;
>> +
>> return should_fail(&fail_page_alloc.attr, 1 << order);
>> }
>> @@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned
>> int order,
>> */
>> /* Exhausted what can be done so it's blame time */
>> - if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
>> + if (out_of_memory(&oc) ||
>> + WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
>> *did_some_progress = 1;
>> /*
>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>> * We also sanity check to catch abuse of atomic reserves being
>> used by
>> * callers that are not in atomic context.
>> */
>> - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>> + if (WARN_ON_ONCE_GFP((gfp_mask &
>> (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>> gfp_mask &= ~__GFP_ATOMIC;
>> retry_cpuset:
>> @@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>> * All existing users of the __GFP_NOFAIL are blockable, so
>> warn
>> * of any new users that actually require GFP_NOWAIT
>> */
>> - if (WARN_ON_ONCE(!can_direct_reclaim))
>> + if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
>> goto fail;
>> /*
>> @@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>> * because we cannot reclaim anything and only can loop waiting
>> * for somebody to do a work for us
>> */
>> - WARN_ON_ONCE(current->flags & PF_MEMALLOC);
>> + WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
>> /*
>> * non failing costly orders are a hard requirement which we
>> @@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned
>> int order,
>> * so that we can identify them and convert them to something
>> * else.
>> */
>> - WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
>> + WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
>> /*
>> * Help non-failing allocations by giving them access to memory
>> @@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned
>> int order, int preferred_nid,
>> * There are several places where we assume that the order value
>> is sane
>> * so bail out early if the request is out of bound.
>> */
>> - if (unlikely(order >= MAX_ORDER)) {
>> - WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
>> + if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
>> return NULL;
>> - }
>> gfp &= gfp_allowed_mask;
>> /*
>
--
Thanks,
Qi
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock
2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
@ 2022-05-11 5:57 ` Jiri Slaby
2022-05-11 6:11 ` Qi Zheng
0 siblings, 1 reply; 10+ messages in thread
From: Jiri Slaby @ 2022-05-11 5:57 UTC (permalink / raw)
To: Qi Zheng, akinobu.mita, akpm, vbabka, gregkh, rostedt
Cc: linux-kernel, linux-mm
Hi,
LGTM:
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Minor comments below.
On 10. 05. 22, 13:38, Qi Zheng wrote:
> The pty_write() invokes kmalloc() which may invoke a normal printk() to
> print failure message. This can cause a deadlock in the scenario reported
> by syz-bot below:
>
> CPU0 CPU1 CPU2
> ---- ---- ----
> lock(console_owner);
> lock(&port_lock_key);
> lock(&port->lock);
> lock(&port_lock_key);
> lock(&port->lock);
> lock(console_owner);
>
> As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
> load balance console writes") said, such deadlock can be prevented by
> using printk_deferred() in kmalloc() (which is invoked in the section
> guarded by the port->lock). But there are too many printk() on the
> kmalloc() path, and kmalloc() can be called from anywhere, so changing
> printk() to printk_deferred() is too complicated and inelegant.
>
> Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
> that printk() will not be called, and this deadlock problem can be avoided.
>
> Syz-bot reported the following lockdep error:
>
> ======================================================
> WARNING: possible circular locking dependency detected
> 5.4.143-00237-g08ccc19a-dirty #10 Not tainted
> ------------------------------------------------------
> syz-executor.4/29420 is trying to acquire lock:
> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: console_trylock_spinning kernel/printk/printk.c:1752 [inline]
> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
>
> but task is already holding lock:
> ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 drivers/tty/pty.c:120
>
> which lock already depends on the new lock.
>
> the existing dependency chain (in reverse order) is:
Maybe trim the stack traces a bit, the commit message is unnecessarily
long...
> -> #2 (&port->lock){-.-.}-{2:2}:
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
> tty_port_tty_get drivers/tty/tty_port.c:288 [inline] <-- lock(&port->lock);
> tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
> serial8250_tx_chars+0x530/0xa80 drivers/tty/serial/8250/8250_port.c:1767
> serial8250_handle_irq.part.0+0x31f/0x3d0 drivers/tty/serial/8250/8250_port.c:1854
> serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 [inline] <-- lock(&port_lock_key);
> serial8250_default_handle_irq+0xb2/0x220 drivers/tty/serial/8250/8250_port.c:1870
> serial8250_interrupt+0xfd/0x200 drivers/tty/serial/8250/8250_core.c:126
> __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
Stop this trace here and trim the rest?
> handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
> handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
> handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
> generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
> do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
> ret_from_intr+0x0/0x19
> native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
> arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
> default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
> cpuidle_idle_call kernel/sched/idle.c:184 [inline]
> do_idle+0x44c/0x590 kernel/sched/idle.c:294
> cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
> start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
> secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>
> -> #1 (&port_lock_key){-.-.}-{2:2}:
> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
> _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
> serial8250_console_write+0x184/0xa40 drivers/tty/serial/8250/8250_port.c:3198
> <-- lock(&port_lock_key);
> call_console_drivers kernel/printk/printk.c:1819 [inline]
> console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
> vprintk_emit+0x1b5/0x470 kernel/printk/printk.c:2024 <-- lock(console_owner);
> vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
> printk+0xba/0xed kernel/printk/printk.c:2084
> register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
> univ8250_console_init+0x3a/0x46 drivers/tty/serial/8250/8250_core.c:681
> console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
> start_kernel+0x5e9/0x879 init/main.c:713
> secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>
> -> #0 (console_owner){....}-{0:0}:
> check_prev_add kernel/locking/lockdep.c:2600 [inline]
> check_prevs_add kernel/locking/lockdep.c:2705 [inline]
> validate_chain kernel/locking/lockdep.c:3095 [inline]
> __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
Delete the above 4 lines?
> lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
> console_trylock_spinning kernel/printk/printk.c:1773 [inline] <-- lock(console_owner);
> vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
> vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
> printk+0xba/0xed kernel/printk/printk.c:2084
> fail_dump lib/fault-inject.c:45 [inline]
> should_fail+0x67b/0x7c0 lib/fault-inject.c:144
> __should_failslab+0x152/0x1c0 mm/failslab.c:33
> should_failslab+0x5/0x10 mm/slab_common.c:1224
> slab_pre_alloc_hook mm/slab.h:468 [inline]
> slab_alloc_node mm/slub.c:2723 [inline]
> slab_alloc mm/slub.c:2807 [inline]
> __kmalloc+0x72/0x300 mm/slub.c:3871
> kmalloc include/linux/slab.h:582 [inline]
> tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
> __tty_buffer_request_room+0x156/0x2a0 drivers/tty/tty_buffer.c:273
> tty_insert_flip_string_fixed_flag+0x93/0x250 drivers/tty/tty_buffer.c:318
> tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
> pty_write+0x126/0x1f0 drivers/tty/pty.c:122 <-- lock(&port->lock);
> n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
> do_tty_write drivers/tty/tty_io.c:961 [inline]
> tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
> __vfs_write+0x76/0x100 fs/read_write.c:494
And stop here?
> vfs_write+0x268/0x5c0 fs/read_write.c:558
> ksys_write+0x12d/0x250 fs/read_write.c:611
> do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> other info that might help us debug this:
>
> Chain exists of:
> console_owner --> &port_lock_key --> &port->lock
>
> Fixes: b6da31b2c07c ("tty: Fix data race in tty_insert_flip_string_fixed_flag")
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
> drivers/tty/tty_buffer.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
> index 646510476c30..bfa431a8e690 100644
> --- a/drivers/tty/tty_buffer.c
> +++ b/drivers/tty/tty_buffer.c
> @@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
> */
> if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
> return NULL;
> - p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
> + p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
> + GFP_ATOMIC | __GFP_NOWARN);
> if (p == NULL)
> return NULL;
>
--
js
suse labs
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock
2022-05-11 5:57 ` Jiri Slaby
@ 2022-05-11 6:11 ` Qi Zheng
0 siblings, 0 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-11 6:11 UTC (permalink / raw)
To: Jiri Slaby, akinobu.mita, akpm, vbabka, gregkh, rostedt
Cc: linux-kernel, linux-mm
On 2022/5/11 1:57 PM, Jiri Slaby wrote:
> Hi,
>
> LGTM:
> Acked-by: Jiri Slaby <jirislaby@kernel.org>
Thanks.
>
> Minor comments below.
>
> On 10. 05. 22, 13:38, Qi Zheng wrote:
>> The pty_write() invokes kmalloc() which may invoke a normal printk() to
>> print failure message. This can cause a deadlock in the scenario reported
>> by syz-bot below:
>>
>> CPU0 CPU1 CPU2
>> ---- ---- ----
>> lock(console_owner);
>> lock(&port_lock_key);
>> lock(&port->lock);
>> lock(&port_lock_key);
>> lock(&port->lock);
>> lock(console_owner);
>>
>> As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
>> load balance console writes") said, such deadlock can be prevented by
>> using printk_deferred() in kmalloc() (which is invoked in the section
>> guarded by the port->lock). But there are too many printk() on the
>> kmalloc() path, and kmalloc() can be called from anywhere, so changing
>> printk() to printk_deferred() is too complicated and inelegant.
>>
>> Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
>> that printk() will not be called, and this deadlock problem can be
>> avoided.
>>
>> Syz-bot reported the following lockdep error:
>>
>> ======================================================
>> WARNING: possible circular locking dependency detected
>> 5.4.143-00237-g08ccc19a-dirty #10 Not tainted
>> ------------------------------------------------------
>> syz-executor.4/29420 is trying to acquire lock:
>> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at:
>> console_trylock_spinning kernel/printk/printk.c:1752 [inline]
>> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at:
>> vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
>>
>> but task is already holding lock:
>> ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0
>> drivers/tty/pty.c:120
>>
>> which lock already depends on the new lock.
>>
>> the existing dependency chain (in reverse order) is:
>
> Maybe trim the stack traces a bit, the commit message is unnecessarily
> long...
Agree, will do.
>
>> -> #2 (&port->lock){-.-.}-{2:2}:
>> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110
>> [inline]
>> _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>> tty_port_tty_get drivers/tty/tty_port.c:288
>> [inline] <-- lock(&port->lock);
>> tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
>> serial8250_tx_chars+0x530/0xa80
>> drivers/tty/serial/8250/8250_port.c:1767
>> serial8250_handle_irq.part.0+0x31f/0x3d0
>> drivers/tty/serial/8250/8250_port.c:1854
>> serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827
>> [inline] <-- lock(&port_lock_key);
>> serial8250_default_handle_irq+0xb2/0x220
>> drivers/tty/serial/8250/8250_port.c:1870
>> serial8250_interrupt+0xfd/0x200
>> drivers/tty/serial/8250/8250_core.c:126
>> __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
>
> Stop this trace here and trim the rest?
Will do.
>
>> handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
>> handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
>> handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
>> generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
>> do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
>> ret_from_intr+0x0/0x19
>> native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
>> arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
>> default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
>> cpuidle_idle_call kernel/sched/idle.c:184 [inline]
>> do_idle+0x44c/0x590 kernel/sched/idle.c:294
>> cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
>> start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
>> secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>>
>> -> #1 (&port_lock_key){-.-.}-{2:2}:
>> __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110
>> [inline]
>> _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>> serial8250_console_write+0x184/0xa40
>> drivers/tty/serial/8250/8250_port.c:3198
>> <-- lock(&port_lock_key);
>> call_console_drivers kernel/printk/printk.c:1819 [inline]
>> console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
>> vprintk_emit+0x1b5/0x470
>> kernel/printk/printk.c:2024 <-- lock(console_owner);
>> vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>> printk+0xba/0xed kernel/printk/printk.c:2084
>> register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
>> univ8250_console_init+0x3a/0x46
>> drivers/tty/serial/8250/8250_core.c:681
>> console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
>> start_kernel+0x5e9/0x879 init/main.c:713
>> secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>>
>> -> #0 (console_owner){....}-{0:0}:
>> check_prev_add kernel/locking/lockdep.c:2600 [inline]
>> check_prevs_add kernel/locking/lockdep.c:2705 [inline]
>> validate_chain kernel/locking/lockdep.c:3095 [inline]
>> __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
>
> Delete the above 4 lines?
Will do.
>
>> lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
>> console_trylock_spinning kernel/printk/printk.c:1773
>> [inline] <-- lock(console_owner);
>> vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
>> vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>> printk+0xba/0xed kernel/printk/printk.c:2084
>> fail_dump lib/fault-inject.c:45 [inline]
>> should_fail+0x67b/0x7c0 lib/fault-inject.c:144
>> __should_failslab+0x152/0x1c0 mm/failslab.c:33
>> should_failslab+0x5/0x10 mm/slab_common.c:1224
>> slab_pre_alloc_hook mm/slab.h:468 [inline]
>> slab_alloc_node mm/slub.c:2723 [inline]
>> slab_alloc mm/slub.c:2807 [inline]
>> __kmalloc+0x72/0x300 mm/slub.c:3871
>> kmalloc include/linux/slab.h:582 [inline]
>> tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
>> __tty_buffer_request_room+0x156/0x2a0
>> drivers/tty/tty_buffer.c:273
>> tty_insert_flip_string_fixed_flag+0x93/0x250
>> drivers/tty/tty_buffer.c:318
>> tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
>> pty_write+0x126/0x1f0 drivers/tty/pty.c:122 <--
>> lock(&port->lock);
>> n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
>> do_tty_write drivers/tty/tty_io.c:961 [inline]
>> tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
>> __vfs_write+0x76/0x100 fs/read_write.c:494
>
> And stop here?
Will do.
>
>> vfs_write+0x268/0x5c0 fs/read_write.c:558
>> ksys_write+0x12d/0x250 fs/read_write.c:611
>> do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
>> entry_SYSCALL_64_after_hwframe+0x49/0xbe
>>
>> other info that might help us debug this:
>>
>> Chain exists of:
>> console_owner --> &port_lock_key --> &port->lock
>>
>> Fixes: b6da31b2c07c ("tty: Fix data race in
>> tty_insert_flip_string_fixed_flag")
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>> drivers/tty/tty_buffer.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
>> index 646510476c30..bfa431a8e690 100644
>> --- a/drivers/tty/tty_buffer.c
>> +++ b/drivers/tty/tty_buffer.c
>> @@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct
>> tty_port *port, size_t size)
>> */
>> if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
>> return NULL;
>> - p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
>> + p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
>> + GFP_ATOMIC | __GFP_NOWARN);
>> if (p == NULL)
>> return NULL;
>
>
--
Thanks,
Qi
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2022-05-11 6:11 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
2022-05-11 5:57 ` Jiri Slaby
2022-05-11 6:11 ` Qi Zheng
2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
2022-05-11 2:19 ` Qi Zheng
2022-05-11 2:32 ` Andrew Morton
2022-05-11 2:36 ` Qi Zheng
2022-05-11 5:12 ` Qi Zheng
2022-05-11 5:28 ` Qi Zheng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).