linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
@ 2022-05-10 11:38 Qi Zheng
  2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-10 11:38 UTC (permalink / raw)
  To: akinobu.mita, akpm, vbabka, gregkh, jirislaby, rostedt
  Cc: linux-kernel, linux-mm, Qi Zheng

We expect no warnings to be issued when we specify __GFP_NOWARN, but
currently in paths like alloc_pages() and kmalloc(), there are still
some warnings printed, fix it.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 include/linux/fault-inject.h |  2 ++
 lib/fault-inject.c           |  3 +++
 mm/failslab.c                |  3 +++
 mm/internal.h                | 11 +++++++++++
 mm/page_alloc.c              | 22 ++++++++++++----------
 5 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 2d04f6448cde..9f6e25467844 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,6 +20,7 @@ struct fault_attr {
 	atomic_t space;
 	unsigned long verbose;
 	bool task_filter;
+	bool no_warn;
 	unsigned long stacktrace_depth;
 	unsigned long require_start;
 	unsigned long require_end;
@@ -39,6 +40,7 @@ struct fault_attr {
 		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
 		.verbose = 2,						\
 		.dname = NULL,						\
+		.no_warn = false,					\
 	}
 
 #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index ce12621b4275..423784d9c058 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
 
 static void fail_dump(struct fault_attr *attr)
 {
+	if (attr->no_warn)
+		return;
+
 	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
 		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
 		       "name %pd, interval %lu, probability %lu, "
diff --git a/mm/failslab.c b/mm/failslab.c
index f92fed91ac23..58df9789f1d2 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
 		return false;
 
+	if (gfpflags & __GFP_NOWARN)
+		failslab.attr.no_warn = true;
+
 	return should_fail(&failslab.attr, s->object_size);
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index cf16280ce132..7a268fac6559 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -35,6 +35,17 @@ struct folio_batch;
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+#define WARN_ON_ONCE_GFP(cond, gfp)	({				\
+	static bool __section(".data.once") __warned;			\
+	int __ret_warn_once = !!(cond);					\
+									\
+	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
+		__warned = true;					\
+		WARN_ON(1);						\
+	}								\
+	unlikely(__ret_warn_once);					\
+})
+
 void page_writeback_init(void);
 
 static inline void *folio_raw_mapping(struct folio *folio)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0e42038382c1..2bf4ce4d0e2f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
 	 * We most definitely don't want callers attempting to
 	 * allocate greater than order-1 page units with __GFP_NOFAIL.
 	 */
-	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+	WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), gfp_flags);
 
 	do {
 		page = NULL;
@@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 			(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
+	if (gfp_mask & __GFP_NOWARN)
+		fail_page_alloc.attr.no_warn = true;
+
 	return should_fail(&fail_page_alloc.attr, 1 << order);
 }
 
@@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	 */
 
 	/* Exhausted what can be done so it's blame time */
-	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
+	if (out_of_memory(&oc) ||
+	    WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
 		*did_some_progress = 1;
 
 		/*
@@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 * We also sanity check to catch abuse of atomic reserves being used by
 	 * callers that are not in atomic context.
 	 */
-	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
-				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+	if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
 		gfp_mask &= ~__GFP_ATOMIC;
 
 retry_cpuset:
@@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		 * All existing users of the __GFP_NOFAIL are blockable, so warn
 		 * of any new users that actually require GFP_NOWAIT
 		 */
-		if (WARN_ON_ONCE(!can_direct_reclaim))
+		if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
 			goto fail;
 
 		/*
@@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		 * because we cannot reclaim anything and only can loop waiting
 		 * for somebody to do a work for us
 		 */
-		WARN_ON_ONCE(current->flags & PF_MEMALLOC);
+		WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
 
 		/*
 		 * non failing costly orders are a hard requirement which we
@@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		 * so that we can identify them and convert them to something
 		 * else.
 		 */
-		WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
+		WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
 
 		/*
 		 * Help non-failing allocations by giving them access to memory
@@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
 	 * There are several places where we assume that the order value is sane
 	 * so bail out early if the request is out of bound.
 	 */
-	if (unlikely(order >= MAX_ORDER)) {
-		WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
+	if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
 		return NULL;
-	}
 
 	gfp &= gfp_allowed_mask;
 	/*
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock
  2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
@ 2022-05-10 11:38 ` Qi Zheng
  2022-05-11  5:57   ` Jiri Slaby
  2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
  2022-05-11  5:12 ` Qi Zheng
  2 siblings, 1 reply; 10+ messages in thread
From: Qi Zheng @ 2022-05-10 11:38 UTC (permalink / raw)
  To: akinobu.mita, akpm, vbabka, gregkh, jirislaby, rostedt
  Cc: linux-kernel, linux-mm, Qi Zheng

The pty_write() invokes kmalloc() which may invoke a normal printk() to
print failure message. This can cause a deadlock in the scenario reported
by syz-bot below:

       CPU0              CPU1                    CPU2
       ----              ----                    ----
                         lock(console_owner);
                                                 lock(&port_lock_key);
  lock(&port->lock);
                         lock(&port_lock_key);
                                                 lock(&port->lock);
  lock(console_owner);

As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
load balance console writes") said, such deadlock can be prevented by
using printk_deferred() in kmalloc() (which is invoked in the section
guarded by the port->lock). But there are too many printk() on the
kmalloc() path, and kmalloc() can be called from anywhere, so changing
printk() to printk_deferred() is too complicated and inelegant.

Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
that printk() will not be called, and this deadlock problem can be avoided.

Syz-bot reported the following lockdep error:

======================================================
WARNING: possible circular locking dependency detected
5.4.143-00237-g08ccc19a-dirty #10 Not tainted
------------------------------------------------------
syz-executor.4/29420 is trying to acquire lock:
ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: console_trylock_spinning kernel/printk/printk.c:1752 [inline]
ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023

but task is already holding lock:
ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 drivers/tty/pty.c:120

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&port->lock){-.-.}-{2:2}:
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
       tty_port_tty_get drivers/tty/tty_port.c:288 [inline]          		<-- lock(&port->lock);
       tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
       serial8250_tx_chars+0x530/0xa80 drivers/tty/serial/8250/8250_port.c:1767
       serial8250_handle_irq.part.0+0x31f/0x3d0 drivers/tty/serial/8250/8250_port.c:1854
       serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 [inline] 	<-- lock(&port_lock_key);
       serial8250_default_handle_irq+0xb2/0x220 drivers/tty/serial/8250/8250_port.c:1870
       serial8250_interrupt+0xfd/0x200 drivers/tty/serial/8250/8250_core.c:126
       __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
       handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
       handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
       handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
       generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
       do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
       ret_from_intr+0x0/0x19
       native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
       arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
       default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
       cpuidle_idle_call kernel/sched/idle.c:184 [inline]
       do_idle+0x44c/0x590 kernel/sched/idle.c:294
       cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
       start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
       secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241

-> #1 (&port_lock_key){-.-.}-{2:2}:
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
       serial8250_console_write+0x184/0xa40 drivers/tty/serial/8250/8250_port.c:3198
										<-- lock(&port_lock_key);
       call_console_drivers kernel/printk/printk.c:1819 [inline]
       console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
       vprintk_emit+0x1b5/0x470 kernel/printk/printk.c:2024			<-- lock(console_owner);
       vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
       printk+0xba/0xed kernel/printk/printk.c:2084
       register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
       univ8250_console_init+0x3a/0x46 drivers/tty/serial/8250/8250_core.c:681
       console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
       start_kernel+0x5e9/0x879 init/main.c:713
       secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241

-> #0 (console_owner){....}-{0:0}:
       check_prev_add kernel/locking/lockdep.c:2600 [inline]
       check_prevs_add kernel/locking/lockdep.c:2705 [inline]
       validate_chain kernel/locking/lockdep.c:3095 [inline]
       __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
       lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
       console_trylock_spinning kernel/printk/printk.c:1773 [inline]		<-- lock(console_owner);
       vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
       vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
       printk+0xba/0xed kernel/printk/printk.c:2084
       fail_dump lib/fault-inject.c:45 [inline]
       should_fail+0x67b/0x7c0 lib/fault-inject.c:144
       __should_failslab+0x152/0x1c0 mm/failslab.c:33
       should_failslab+0x5/0x10 mm/slab_common.c:1224
       slab_pre_alloc_hook mm/slab.h:468 [inline]
       slab_alloc_node mm/slub.c:2723 [inline]
       slab_alloc mm/slub.c:2807 [inline]
       __kmalloc+0x72/0x300 mm/slub.c:3871
       kmalloc include/linux/slab.h:582 [inline]
       tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
       __tty_buffer_request_room+0x156/0x2a0 drivers/tty/tty_buffer.c:273
       tty_insert_flip_string_fixed_flag+0x93/0x250 drivers/tty/tty_buffer.c:318
       tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
       pty_write+0x126/0x1f0 drivers/tty/pty.c:122				<-- lock(&port->lock);
       n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
       do_tty_write drivers/tty/tty_io.c:961 [inline]
       tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
       __vfs_write+0x76/0x100 fs/read_write.c:494
       vfs_write+0x268/0x5c0 fs/read_write.c:558
       ksys_write+0x12d/0x250 fs/read_write.c:611
       do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
  console_owner --> &port_lock_key --> &port->lock

Fixes: b6da31b2c07c ("tty: Fix data race in tty_insert_flip_string_fixed_flag")
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 drivers/tty/tty_buffer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 646510476c30..bfa431a8e690 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
 	 */
 	if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
 		return NULL;
-	p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
+	p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
+		    GFP_ATOMIC | __GFP_NOWARN);
 	if (p == NULL)
 		return NULL;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
  2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
  2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
@ 2022-05-10 18:59 ` Andrew Morton
  2022-05-11  2:19   ` Qi Zheng
  2022-05-11  5:12 ` Qi Zheng
  2 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2022-05-10 18:59 UTC (permalink / raw)
  To: Qi Zheng
  Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm

On Tue, 10 May 2022 19:38:08 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:

> We expect no warnings to be issued when we specify __GFP_NOWARN, but
> currently in paths like alloc_pages() and kmalloc(), there are still
> some warnings printed, fix it.

Looks sane to me.

> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -35,6 +35,17 @@ struct folio_batch;
>  /* Do not use these with a slab allocator */
>  #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>  
> +#define WARN_ON_ONCE_GFP(cond, gfp)	({				\
> +	static bool __section(".data.once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})

I don't think WARN_ON_ONCE_GFP is a good name for this.  But
WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(

WARN_ON_ONCE_NOWARN might be better.  No strong opinion here, really.

> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	 * We also sanity check to catch abuse of atomic reserves being used by
>  	 * callers that are not in atomic context.
>  	 */
> -	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> -				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> +	if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> +				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>  		gfp_mask &= ~__GFP_ATOMIC;
>  
>  retry_cpuset:

I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
(https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
deleted this code.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
  2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
@ 2022-05-11  2:19   ` Qi Zheng
  2022-05-11  2:32     ` Andrew Morton
  0 siblings, 1 reply; 10+ messages in thread
From: Qi Zheng @ 2022-05-11  2:19 UTC (permalink / raw)
  To: Andrew Morton
  Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm



On 2022/5/11 2:59 AM, Andrew Morton wrote:
> On Tue, 10 May 2022 19:38:08 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
>> We expect no warnings to be issued when we specify __GFP_NOWARN, but
>> currently in paths like alloc_pages() and kmalloc(), there are still
>> some warnings printed, fix it.
> 
> Looks sane to me.
> 
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -35,6 +35,17 @@ struct folio_batch;
>>   /* Do not use these with a slab allocator */
>>   #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>>   
>> +#define WARN_ON_ONCE_GFP(cond, gfp)	({				\
>> +	static bool __section(".data.once") __warned;			\
>> +	int __ret_warn_once = !!(cond);					\
>> +									\
>> +	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
>> +		__warned = true;					\
>> +		WARN_ON(1);						\
>> +	}								\
>> +	unlikely(__ret_warn_once);					\
>> +})
> 
> I don't think WARN_ON_ONCE_GFP is a good name for this.  But
> WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
> 
> WARN_ON_ONCE_NOWARN might be better.  No strong opinion here, really.

I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird 
putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?

> 
>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>   	 * We also sanity check to catch abuse of atomic reserves being used by
>>   	 * callers that are not in atomic context.
>>   	 */
>> -	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> -				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>> +	if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> +				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>>   		gfp_mask &= ~__GFP_ATOMIC;
>>   
>>   retry_cpuset:
> 
> I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
> (https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
> deleted this code.
> 

This series is based on v5.18-rc5, I will rebase it to the latest next
branch and check if there are any missing WARN_ON_ONCEs that are not
being handled.

Thanks,
Qi

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
  2022-05-11  2:19   ` Qi Zheng
@ 2022-05-11  2:32     ` Andrew Morton
  2022-05-11  2:36       ` Qi Zheng
  0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2022-05-11  2:32 UTC (permalink / raw)
  To: Qi Zheng
  Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm

On Wed, 11 May 2022 10:19:48 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:

> 
> ,,,
> >> --- a/mm/internal.h
> >> +++ b/mm/internal.h
> >> @@ -35,6 +35,17 @@ struct folio_batch;
> >>   /* Do not use these with a slab allocator */
> >>   #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
> >>   
> >> +#define WARN_ON_ONCE_GFP(cond, gfp)	({				\
> >> +	static bool __section(".data.once") __warned;			\
> >> +	int __ret_warn_once = !!(cond);					\
> >> +									\
> >> +	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> >> +		__warned = true;					\
> >> +		WARN_ON(1);						\
> >> +	}								\
> >> +	unlikely(__ret_warn_once);					\
> >> +})
> > 
> > I don't think WARN_ON_ONCE_GFP is a good name for this.  But
> > WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
> > 
> > WARN_ON_ONCE_NOWARN might be better.  No strong opinion here, really.
> 
> I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird 
> putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?

I dunno.  WARN_ON_ONCE_GFP isn't too bad I suppose.  Add a comment over
the definition explaining it?

> > 
> >> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> >>   	 * We also sanity check to catch abuse of atomic reserves being used by
> >>   	 * callers that are not in atomic context.
> >>   	 */
> >> -	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> >> -				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> >> +	if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> >> +				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
> >>   		gfp_mask &= ~__GFP_ATOMIC;
> >>   
> >>   retry_cpuset:
> > 
> > I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
> > (https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
> > deleted this code.
> > 
> 
> This series is based on v5.18-rc5, I will rebase it to the latest next
> branch and check if there are any missing WARN_ON_ONCEs that are not
> being handled.

Against git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm branch
mm-unstable, please.  That ends up in linux-next, with a delay.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
  2022-05-11  2:32     ` Andrew Morton
@ 2022-05-11  2:36       ` Qi Zheng
  0 siblings, 0 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-11  2:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: akinobu.mita, vbabka, gregkh, jirislaby, rostedt, linux-kernel, linux-mm



On 2022/5/11 10:32 AM, Andrew Morton wrote:
> On Wed, 11 May 2022 10:19:48 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
>>
>> ,,,
>>>> --- a/mm/internal.h
>>>> +++ b/mm/internal.h
>>>> @@ -35,6 +35,17 @@ struct folio_batch;
>>>>    /* Do not use these with a slab allocator */
>>>>    #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>>>>    
>>>> +#define WARN_ON_ONCE_GFP(cond, gfp)	({				\
>>>> +	static bool __section(".data.once") __warned;			\
>>>> +	int __ret_warn_once = !!(cond);					\
>>>> +									\
>>>> +	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
>>>> +		__warned = true;					\
>>>> +		WARN_ON(1);						\
>>>> +	}								\
>>>> +	unlikely(__ret_warn_once);					\
>>>> +})
>>>
>>> I don't think WARN_ON_ONCE_GFP is a good name for this.  But
>>> WARN_ON_ONCE_IF_NOT_GFP_NOWARN is too long :(
>>>
>>> WARN_ON_ONCE_NOWARN might be better.  No strong opinion here, really.
>>
>> I've thought about WARN_ON_ONCE_NOWARN, but I feel a little weird
>> putting 'WARN' and 'NOWARN' together, how about WARN_ON_ONCE_IF_ALLOWED?
> 
> I dunno.  WARN_ON_ONCE_GFP isn't too bad I suppose.  Add a comment over
> the definition explaining it?

OK, I will add a comment to it.

> 
>>>
>>>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>>>    	 * We also sanity check to catch abuse of atomic reserves being used by
>>>>    	 * callers that are not in atomic context.
>>>>    	 */
>>>> -	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>>>> -				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>>>> +	if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>>>> +				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>>>>    		gfp_mask &= ~__GFP_ATOMIC;
>>>>    
>>>>    retry_cpuset:
>>>
>>> I dropped this hunk - Neil's "mm: discard __GFP_ATOMIC"
>>> (https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name)
>>> deleted this code.
>>>
>>
>> This series is based on v5.18-rc5, I will rebase it to the latest next
>> branch and check if there are any missing WARN_ON_ONCEs that are not
>> being handled.
> 
> Against git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm branch
> mm-unstable, please.  That ends up in linux-next, with a delay.

OK, will do.

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
  2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
  2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
  2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
@ 2022-05-11  5:12 ` Qi Zheng
  2022-05-11  5:28   ` Qi Zheng
  2 siblings, 1 reply; 10+ messages in thread
From: Qi Zheng @ 2022-05-11  5:12 UTC (permalink / raw)
  To: akpm
  Cc: linux-kernel, linux-mm, akinobu.mita, vbabka, gregkh, jirislaby, rostedt



On 2022/5/10 7:38 PM, Qi Zheng wrote:
> We expect no warnings to be issued when we specify __GFP_NOWARN, but
> currently in paths like alloc_pages() and kmalloc(), there are still
> some warnings printed, fix it.

Hi Andrew,

Maybe we only need to deal with memory allocation failures, such as
should_fail() (This leads to deadlock in PATCH[2/2]). These 
WARN_ON_ONCE()s report usage problems and should not be printed. If they
are printed, we should fix these usage problems.

Thanks,
Qi


> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>   include/linux/fault-inject.h |  2 ++
>   lib/fault-inject.c           |  3 +++
>   mm/failslab.c                |  3 +++
>   mm/internal.h                | 11 +++++++++++
>   mm/page_alloc.c              | 22 ++++++++++++----------
>   5 files changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index 2d04f6448cde..9f6e25467844 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -20,6 +20,7 @@ struct fault_attr {
>   	atomic_t space;
>   	unsigned long verbose;
>   	bool task_filter;
> +	bool no_warn;
>   	unsigned long stacktrace_depth;
>   	unsigned long require_start;
>   	unsigned long require_end;
> @@ -39,6 +40,7 @@ struct fault_attr {
>   		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
>   		.verbose = 2,						\
>   		.dname = NULL,						\
> +		.no_warn = false,					\
>   	}
>   
>   #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
> index ce12621b4275..423784d9c058 100644
> --- a/lib/fault-inject.c
> +++ b/lib/fault-inject.c
> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>   
>   static void fail_dump(struct fault_attr *attr)
>   {
> +	if (attr->no_warn)
> +		return;
> +
>   	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>   		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>   		       "name %pd, interval %lu, probability %lu, "
> diff --git a/mm/failslab.c b/mm/failslab.c
> index f92fed91ac23..58df9789f1d2 100644
> --- a/mm/failslab.c
> +++ b/mm/failslab.c
> @@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>   	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>   		return false;
>   
> +	if (gfpflags & __GFP_NOWARN)
> +		failslab.attr.no_warn = true;
> +
>   	return should_fail(&failslab.attr, s->object_size);
>   }
>   
> diff --git a/mm/internal.h b/mm/internal.h
> index cf16280ce132..7a268fac6559 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -35,6 +35,17 @@ struct folio_batch;
>   /* Do not use these with a slab allocator */
>   #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>   
> +#define WARN_ON_ONCE_GFP(cond, gfp)	({				\
> +	static bool __section(".data.once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})
> +
>   void page_writeback_init(void);
>   
>   static inline void *folio_raw_mapping(struct folio *folio)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0e42038382c1..2bf4ce4d0e2f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
>   	 * We most definitely don't want callers attempting to
>   	 * allocate greater than order-1 page units with __GFP_NOFAIL.
>   	 */
> -	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
> +	WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), gfp_flags);
>   
>   	do {
>   		page = NULL;
> @@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>   			(gfp_mask & __GFP_DIRECT_RECLAIM))
>   		return false;
>   
> +	if (gfp_mask & __GFP_NOWARN)
> +		fail_page_alloc.attr.no_warn = true;
> +
>   	return should_fail(&fail_page_alloc.attr, 1 << order);
>   }
>   
> @@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>   	 */
>   
>   	/* Exhausted what can be done so it's blame time */
> -	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
> +	if (out_of_memory(&oc) ||
> +	    WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
>   		*did_some_progress = 1;
>   
>   		/*
> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>   	 * We also sanity check to catch abuse of atomic reserves being used by
>   	 * callers that are not in atomic context.
>   	 */
> -	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> -				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
> +	if (WARN_ON_ONCE_GFP((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
> +				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>   		gfp_mask &= ~__GFP_ATOMIC;
>   
>   retry_cpuset:
> @@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>   		 * All existing users of the __GFP_NOFAIL are blockable, so warn
>   		 * of any new users that actually require GFP_NOWAIT
>   		 */
> -		if (WARN_ON_ONCE(!can_direct_reclaim))
> +		if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
>   			goto fail;
>   
>   		/*
> @@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>   		 * because we cannot reclaim anything and only can loop waiting
>   		 * for somebody to do a work for us
>   		 */
> -		WARN_ON_ONCE(current->flags & PF_MEMALLOC);
> +		WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
>   
>   		/*
>   		 * non failing costly orders are a hard requirement which we
> @@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>   		 * so that we can identify them and convert them to something
>   		 * else.
>   		 */
> -		WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
> +		WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
>   
>   		/*
>   		 * Help non-failing allocations by giving them access to memory
> @@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
>   	 * There are several places where we assume that the order value is sane
>   	 * so bail out early if the request is out of bound.
>   	 */
> -	if (unlikely(order >= MAX_ORDER)) {
> -		WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
> +	if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
>   		return NULL;
> -	}
>   
>   	gfp &= gfp_allowed_mask;
>   	/*

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN
  2022-05-11  5:12 ` Qi Zheng
@ 2022-05-11  5:28   ` Qi Zheng
  0 siblings, 0 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-11  5:28 UTC (permalink / raw)
  To: akpm
  Cc: linux-kernel, linux-mm, akinobu.mita, vbabka, gregkh, jirislaby, rostedt



On 2022/5/11 1:12 PM, Qi Zheng wrote:
> 
> 
> On 2022/5/10 7:38 PM, Qi Zheng wrote:
>> We expect no warnings to be issued when we specify __GFP_NOWARN, but
>> currently in paths like alloc_pages() and kmalloc(), there are still
>> some warnings printed, fix it.
> 
> Hi Andrew,
> 
> Maybe we only need to deal with memory allocation failures, such as
> should_fail() (This leads to deadlock in PATCH[2/2]). These 
> WARN_ON_ONCE()s report usage problems and should not be printed. If they

such as WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1),
gfp_flags).

> are printed, we should fix these usage problems.
> 
> Thanks,
> Qi
> 
> 
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   include/linux/fault-inject.h |  2 ++
>>   lib/fault-inject.c           |  3 +++
>>   mm/failslab.c                |  3 +++
>>   mm/internal.h                | 11 +++++++++++
>>   mm/page_alloc.c              | 22 ++++++++++++----------
>>   5 files changed, 31 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>> index 2d04f6448cde..9f6e25467844 100644
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -20,6 +20,7 @@ struct fault_attr {
>>       atomic_t space;
>>       unsigned long verbose;
>>       bool task_filter;
>> +    bool no_warn;
>>       unsigned long stacktrace_depth;
>>       unsigned long require_start;
>>       unsigned long require_end;
>> @@ -39,6 +40,7 @@ struct fault_attr {
>>           .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,    \
>>           .verbose = 2,                        \
>>           .dname = NULL,                        \
>> +        .no_warn = false,                    \
>>       }
>>   #define DECLARE_FAULT_ATTR(name) struct fault_attr name = 
>> FAULT_ATTR_INITIALIZER
>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>> index ce12621b4275..423784d9c058 100644
>> --- a/lib/fault-inject.c
>> +++ b/lib/fault-inject.c
>> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>>   static void fail_dump(struct fault_attr *attr)
>>   {
>> +    if (attr->no_warn)
>> +        return;
>> +
>>       if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>>           printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>>                  "name %pd, interval %lu, probability %lu, "
>> diff --git a/mm/failslab.c b/mm/failslab.c
>> index f92fed91ac23..58df9789f1d2 100644
>> --- a/mm/failslab.c
>> +++ b/mm/failslab.c
>> @@ -30,6 +30,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t 
>> gfpflags)
>>       if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>>           return false;
>> +    if (gfpflags & __GFP_NOWARN)
>> +        failslab.attr.no_warn = true;
>> +
>>       return should_fail(&failslab.attr, s->object_size);
>>   }
>> diff --git a/mm/internal.h b/mm/internal.h
>> index cf16280ce132..7a268fac6559 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -35,6 +35,17 @@ struct folio_batch;
>>   /* Do not use these with a slab allocator */
>>   #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>> +#define WARN_ON_ONCE_GFP(cond, gfp)    ({                \
>> +    static bool __section(".data.once") __warned;            \
>> +    int __ret_warn_once = !!(cond);                    \
>> +                                    \
>> +    if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && 
>> !__warned)) { \
>> +        __warned = true;                    \
>> +        WARN_ON(1);                        \
>> +    }                                \
>> +    unlikely(__ret_warn_once);                    \
>> +})
>> +
>>   void page_writeback_init(void);
>>   static inline void *folio_raw_mapping(struct folio *folio)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 0e42038382c1..2bf4ce4d0e2f 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -3722,7 +3722,7 @@ struct page *rmqueue(struct zone *preferred_zone,
>>        * We most definitely don't want callers attempting to
>>        * allocate greater than order-1 page units with __GFP_NOFAIL.
>>        */
>> -    WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
>> +    WARN_ON_ONCE_GFP((gfp_flags & __GFP_NOFAIL) && (order > 1), 
>> gfp_flags);
>>       do {
>>           page = NULL;
>> @@ -3799,6 +3799,9 @@ static bool __should_fail_alloc_page(gfp_t 
>> gfp_mask, unsigned int order)
>>               (gfp_mask & __GFP_DIRECT_RECLAIM))
>>           return false;
>> +    if (gfp_mask & __GFP_NOWARN)
>> +        fail_page_alloc.attr.no_warn = true;
>> +
>>       return should_fail(&fail_page_alloc.attr, 1 << order);
>>   }
>> @@ -4346,7 +4349,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned 
>> int order,
>>        */
>>       /* Exhausted what can be done so it's blame time */
>> -    if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
>> +    if (out_of_memory(&oc) ||
>> +        WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
>>           *did_some_progress = 1;
>>           /*
>> @@ -4902,8 +4906,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned 
>> int order,
>>        * We also sanity check to catch abuse of atomic reserves being 
>> used by
>>        * callers that are not in atomic context.
>>        */
>> -    if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> -                (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
>> +    if (WARN_ON_ONCE_GFP((gfp_mask & 
>> (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
>> +                (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM), gfp_mask))
>>           gfp_mask &= ~__GFP_ATOMIC;
>>   retry_cpuset:
>> @@ -5117,7 +5121,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned 
>> int order,
>>            * All existing users of the __GFP_NOFAIL are blockable, so 
>> warn
>>            * of any new users that actually require GFP_NOWAIT
>>            */
>> -        if (WARN_ON_ONCE(!can_direct_reclaim))
>> +        if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
>>               goto fail;
>>           /*
>> @@ -5125,7 +5129,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned 
>> int order,
>>            * because we cannot reclaim anything and only can loop waiting
>>            * for somebody to do a work for us
>>            */
>> -        WARN_ON_ONCE(current->flags & PF_MEMALLOC);
>> +        WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
>>           /*
>>            * non failing costly orders are a hard requirement which we
>> @@ -5133,7 +5137,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned 
>> int order,
>>            * so that we can identify them and convert them to something
>>            * else.
>>            */
>> -        WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
>> +        WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
>>           /*
>>            * Help non-failing allocations by giving them access to memory
>> @@ -5379,10 +5383,8 @@ struct page *__alloc_pages(gfp_t gfp, unsigned 
>> int order, int preferred_nid,
>>        * There are several places where we assume that the order value 
>> is sane
>>        * so bail out early if the request is out of bound.
>>        */
>> -    if (unlikely(order >= MAX_ORDER)) {
>> -        WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
>> +    if (WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp))
>>           return NULL;
>> -    }
>>       gfp &= gfp_allowed_mask;
>>       /*
> 

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock
  2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
@ 2022-05-11  5:57   ` Jiri Slaby
  2022-05-11  6:11     ` Qi Zheng
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Slaby @ 2022-05-11  5:57 UTC (permalink / raw)
  To: Qi Zheng, akinobu.mita, akpm, vbabka, gregkh, rostedt
  Cc: linux-kernel, linux-mm

Hi,

LGTM:
Acked-by: Jiri Slaby <jirislaby@kernel.org>

Minor comments below.

On 10. 05. 22, 13:38, Qi Zheng wrote:
> The pty_write() invokes kmalloc() which may invoke a normal printk() to
> print failure message. This can cause a deadlock in the scenario reported
> by syz-bot below:
> 
>         CPU0              CPU1                    CPU2
>         ----              ----                    ----
>                           lock(console_owner);
>                                                   lock(&port_lock_key);
>    lock(&port->lock);
>                           lock(&port_lock_key);
>                                                   lock(&port->lock);
>    lock(console_owner);
> 
> As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
> load balance console writes") said, such deadlock can be prevented by
> using printk_deferred() in kmalloc() (which is invoked in the section
> guarded by the port->lock). But there are too many printk() on the
> kmalloc() path, and kmalloc() can be called from anywhere, so changing
> printk() to printk_deferred() is too complicated and inelegant.
> 
> Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
> that printk() will not be called, and this deadlock problem can be avoided.
> 
> Syz-bot reported the following lockdep error:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 5.4.143-00237-g08ccc19a-dirty #10 Not tainted
> ------------------------------------------------------
> syz-executor.4/29420 is trying to acquire lock:
> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: console_trylock_spinning kernel/printk/printk.c:1752 [inline]
> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
> 
> but task is already holding lock:
> ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 drivers/tty/pty.c:120
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:

Maybe trim the stack traces a bit, the commit message is unnecessarily 
long...

> -> #2 (&port->lock){-.-.}-{2:2}:
>         __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
>         _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>         tty_port_tty_get drivers/tty/tty_port.c:288 [inline]          		<-- lock(&port->lock);
>         tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
>         serial8250_tx_chars+0x530/0xa80 drivers/tty/serial/8250/8250_port.c:1767
>         serial8250_handle_irq.part.0+0x31f/0x3d0 drivers/tty/serial/8250/8250_port.c:1854
>         serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 [inline] 	<-- lock(&port_lock_key);
>         serial8250_default_handle_irq+0xb2/0x220 drivers/tty/serial/8250/8250_port.c:1870
>         serial8250_interrupt+0xfd/0x200 drivers/tty/serial/8250/8250_core.c:126
>         __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156

Stop this trace here and trim the rest?

>         handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
>         handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
>         handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
>         generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
>         do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
>         ret_from_intr+0x0/0x19
>         native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
>         arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
>         default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
>         cpuidle_idle_call kernel/sched/idle.c:184 [inline]
>         do_idle+0x44c/0x590 kernel/sched/idle.c:294
>         cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
>         start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
>         secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
> 
> -> #1 (&port_lock_key){-.-.}-{2:2}:
>         __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
>         _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>         serial8250_console_write+0x184/0xa40 drivers/tty/serial/8250/8250_port.c:3198
> 										<-- lock(&port_lock_key);
>         call_console_drivers kernel/printk/printk.c:1819 [inline]
>         console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
>         vprintk_emit+0x1b5/0x470 kernel/printk/printk.c:2024			<-- lock(console_owner);
>         vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>         printk+0xba/0xed kernel/printk/printk.c:2084
>         register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
>         univ8250_console_init+0x3a/0x46 drivers/tty/serial/8250/8250_core.c:681
>         console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
>         start_kernel+0x5e9/0x879 init/main.c:713
>         secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
> 
> -> #0 (console_owner){....}-{0:0}:
>         check_prev_add kernel/locking/lockdep.c:2600 [inline]
>         check_prevs_add kernel/locking/lockdep.c:2705 [inline]
>         validate_chain kernel/locking/lockdep.c:3095 [inline]
>         __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200

Delete the above 4 lines?

>         lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
>         console_trylock_spinning kernel/printk/printk.c:1773 [inline]		<-- lock(console_owner);
>         vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
>         vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>         printk+0xba/0xed kernel/printk/printk.c:2084
>         fail_dump lib/fault-inject.c:45 [inline]
>         should_fail+0x67b/0x7c0 lib/fault-inject.c:144
>         __should_failslab+0x152/0x1c0 mm/failslab.c:33
>         should_failslab+0x5/0x10 mm/slab_common.c:1224
>         slab_pre_alloc_hook mm/slab.h:468 [inline]
>         slab_alloc_node mm/slub.c:2723 [inline]
>         slab_alloc mm/slub.c:2807 [inline]
>         __kmalloc+0x72/0x300 mm/slub.c:3871
>         kmalloc include/linux/slab.h:582 [inline]
>         tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
>         __tty_buffer_request_room+0x156/0x2a0 drivers/tty/tty_buffer.c:273
>         tty_insert_flip_string_fixed_flag+0x93/0x250 drivers/tty/tty_buffer.c:318
>         tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
>         pty_write+0x126/0x1f0 drivers/tty/pty.c:122				<-- lock(&port->lock);
>         n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
>         do_tty_write drivers/tty/tty_io.c:961 [inline]
>         tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
>         __vfs_write+0x76/0x100 fs/read_write.c:494

And stop here?

>         vfs_write+0x268/0x5c0 fs/read_write.c:558
>         ksys_write+0x12d/0x250 fs/read_write.c:611
>         do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
>         entry_SYSCALL_64_after_hwframe+0x49/0xbe
> 
> other info that might help us debug this:
> 
> Chain exists of:
>    console_owner --> &port_lock_key --> &port->lock
> 
> Fixes: b6da31b2c07c ("tty: Fix data race in tty_insert_flip_string_fixed_flag")
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>   drivers/tty/tty_buffer.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
> index 646510476c30..bfa431a8e690 100644
> --- a/drivers/tty/tty_buffer.c
> +++ b/drivers/tty/tty_buffer.c
> @@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
>   	 */
>   	if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
>   		return NULL;
> -	p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
> +	p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
> +		    GFP_ATOMIC | __GFP_NOWARN);
>   	if (p == NULL)
>   		return NULL;
>   


-- 
js
suse labs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock
  2022-05-11  5:57   ` Jiri Slaby
@ 2022-05-11  6:11     ` Qi Zheng
  0 siblings, 0 replies; 10+ messages in thread
From: Qi Zheng @ 2022-05-11  6:11 UTC (permalink / raw)
  To: Jiri Slaby, akinobu.mita, akpm, vbabka, gregkh, rostedt
  Cc: linux-kernel, linux-mm



On 2022/5/11 1:57 PM, Jiri Slaby wrote:
> Hi,
> 
> LGTM:
> Acked-by: Jiri Slaby <jirislaby@kernel.org>

Thanks.

> 
> Minor comments below.
> 
> On 10. 05. 22, 13:38, Qi Zheng wrote:
>> The pty_write() invokes kmalloc() which may invoke a normal printk() to
>> print failure message. This can cause a deadlock in the scenario reported
>> by syz-bot below:
>>
>>         CPU0              CPU1                    CPU2
>>         ----              ----                    ----
>>                           lock(console_owner);
>>                                                   lock(&port_lock_key);
>>    lock(&port->lock);
>>                           lock(&port_lock_key);
>>                                                   lock(&port->lock);
>>    lock(console_owner);
>>
>> As commit dbdda842fe96 ("printk: Add console owner and waiter logic to
>> load balance console writes") said, such deadlock can be prevented by
>> using printk_deferred() in kmalloc() (which is invoked in the section
>> guarded by the port->lock). But there are too many printk() on the
>> kmalloc() path, and kmalloc() can be called from anywhere, so changing
>> printk() to printk_deferred() is too complicated and inelegant.
>>
>> Therefore, this patch chooses to specify __GFP_NOWARN to kmalloc(), so
>> that printk() will not be called, and this deadlock problem can be 
>> avoided.
>>
>> Syz-bot reported the following lockdep error:
>>
>> ======================================================
>> WARNING: possible circular locking dependency detected
>> 5.4.143-00237-g08ccc19a-dirty #10 Not tainted
>> ------------------------------------------------------
>> syz-executor.4/29420 is trying to acquire lock:
>> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: 
>> console_trylock_spinning kernel/printk/printk.c:1752 [inline]
>> ffffffff8aedb2a0 (console_owner){....}-{0:0}, at: 
>> vprintk_emit+0x2ca/0x470 kernel/printk/printk.c:2023
>>
>> but task is already holding lock:
>> ffff8880119c9158 (&port->lock){-.-.}-{2:2}, at: pty_write+0xf4/0x1f0 
>> drivers/tty/pty.c:120
>>
>> which lock already depends on the new lock.
>>
>> the existing dependency chain (in reverse order) is:
> 
> Maybe trim the stack traces a bit, the commit message is unnecessarily 
> long...

Agree, will do.

> 
>> -> #2 (&port->lock){-.-.}-{2:2}:
>>         __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 
>> [inline]
>>         _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>>         tty_port_tty_get drivers/tty/tty_port.c:288 
>> [inline]                  <-- lock(&port->lock);
>>         tty_port_default_wakeup+0x1d/0xb0 drivers/tty/tty_port.c:47
>>         serial8250_tx_chars+0x530/0xa80 
>> drivers/tty/serial/8250/8250_port.c:1767
>>         serial8250_handle_irq.part.0+0x31f/0x3d0 
>> drivers/tty/serial/8250/8250_port.c:1854
>>         serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1827 
>> [inline]     <-- lock(&port_lock_key);
>>         serial8250_default_handle_irq+0xb2/0x220 
>> drivers/tty/serial/8250/8250_port.c:1870
>>         serial8250_interrupt+0xfd/0x200 
>> drivers/tty/serial/8250/8250_core.c:126
>>         __handle_irq_event_percpu+0x109/0xa50 kernel/irq/handle.c:156
> 
> Stop this trace here and trim the rest?

Will do.

> 
>>         handle_irq_event_percpu+0x76/0x170 kernel/irq/handle.c:196
>>         handle_irq_event+0xa1/0x130 kernel/irq/handle.c:213
>>         handle_edge_irq+0x261/0xd00 kernel/irq/chip.c:833
>>         generic_handle_irq_desc include/linux/irqdesc.h:156 [inline]
>>         do_IRQ+0xf2/0x2e0 arch/x86/kernel/irq.c:250
>>         ret_from_intr+0x0/0x19
>>         native_safe_halt arch/x86/include/asm/irqflags.h:60 [inline]
>>         arch_safe_halt arch/x86/include/asm/irqflags.h:103 [inline]
>>         default_idle+0x2c/0x1a0 arch/x86/kernel/process.c:572
>>         cpuidle_idle_call kernel/sched/idle.c:184 [inline]
>>         do_idle+0x44c/0x590 kernel/sched/idle.c:294
>>         cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:386
>>         start_secondary+0x2d1/0x3e0 arch/x86/kernel/smpboot.c:264
>>         secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>>
>> -> #1 (&port_lock_key){-.-.}-{2:2}:
>>         __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 
>> [inline]
>>         _raw_spin_lock_irqsave+0x35/0x50 kernel/locking/spinlock.c:159
>>         serial8250_console_write+0x184/0xa40 
>> drivers/tty/serial/8250/8250_port.c:3198
>>                                         <-- lock(&port_lock_key);
>>         call_console_drivers kernel/printk/printk.c:1819 [inline]
>>         console_unlock+0x8cb/0xd00 kernel/printk/printk.c:2504
>>         vprintk_emit+0x1b5/0x470 
>> kernel/printk/printk.c:2024            <-- lock(console_owner);
>>         vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>>         printk+0xba/0xed kernel/printk/printk.c:2084
>>         register_console+0x8b3/0xc10 kernel/printk/printk.c:2829
>>         univ8250_console_init+0x3a/0x46 
>> drivers/tty/serial/8250/8250_core.c:681
>>         console_init+0x49d/0x6d3 kernel/printk/printk.c:2915
>>         start_kernel+0x5e9/0x879 init/main.c:713
>>         secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241
>>
>> -> #0 (console_owner){....}-{0:0}:
>>         check_prev_add kernel/locking/lockdep.c:2600 [inline]
>>         check_prevs_add kernel/locking/lockdep.c:2705 [inline]
>>         validate_chain kernel/locking/lockdep.c:3095 [inline]
>>         __lock_acquire+0x27e6/0x4cc0 kernel/locking/lockdep.c:4200
> 
> Delete the above 4 lines?

Will do.

> 
>>         lock_acquire+0x127/0x340 kernel/locking/lockdep.c:4734
>>         console_trylock_spinning kernel/printk/printk.c:1773 
>> [inline]        <-- lock(console_owner);
>>         vprintk_emit+0x307/0x470 kernel/printk/printk.c:2023
>>         vprintk_func+0x8d/0x250 kernel/printk/printk_safe.c:394
>>         printk+0xba/0xed kernel/printk/printk.c:2084
>>         fail_dump lib/fault-inject.c:45 [inline]
>>         should_fail+0x67b/0x7c0 lib/fault-inject.c:144
>>         __should_failslab+0x152/0x1c0 mm/failslab.c:33
>>         should_failslab+0x5/0x10 mm/slab_common.c:1224
>>         slab_pre_alloc_hook mm/slab.h:468 [inline]
>>         slab_alloc_node mm/slub.c:2723 [inline]
>>         slab_alloc mm/slub.c:2807 [inline]
>>         __kmalloc+0x72/0x300 mm/slub.c:3871
>>         kmalloc include/linux/slab.h:582 [inline]
>>         tty_buffer_alloc+0x23f/0x2a0 drivers/tty/tty_buffer.c:175
>>         __tty_buffer_request_room+0x156/0x2a0 
>> drivers/tty/tty_buffer.c:273
>>         tty_insert_flip_string_fixed_flag+0x93/0x250 
>> drivers/tty/tty_buffer.c:318
>>         tty_insert_flip_string include/linux/tty_flip.h:37 [inline]
>>         pty_write+0x126/0x1f0 drivers/tty/pty.c:122                <-- 
>> lock(&port->lock);
>>         n_tty_write+0xa7a/0xfc0 drivers/tty/n_tty.c:2356
>>         do_tty_write drivers/tty/tty_io.c:961 [inline]
>>         tty_write+0x512/0x930 drivers/tty/tty_io.c:1045
>>         __vfs_write+0x76/0x100 fs/read_write.c:494
> 
> And stop here?

Will do.

> 
>>         vfs_write+0x268/0x5c0 fs/read_write.c:558
>>         ksys_write+0x12d/0x250 fs/read_write.c:611
>>         do_syscall_64+0xd7/0x380 arch/x86/entry/common.c:291
>>         entry_SYSCALL_64_after_hwframe+0x49/0xbe
>>
>> other info that might help us debug this:
>>
>> Chain exists of:
>>    console_owner --> &port_lock_key --> &port->lock
>>
>> Fixes: b6da31b2c07c ("tty: Fix data race in 
>> tty_insert_flip_string_fixed_flag")
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   drivers/tty/tty_buffer.c | 3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
>> index 646510476c30..bfa431a8e690 100644
>> --- a/drivers/tty/tty_buffer.c
>> +++ b/drivers/tty/tty_buffer.c
>> @@ -175,7 +175,8 @@ static struct tty_buffer *tty_buffer_alloc(struct 
>> tty_port *port, size_t size)
>>        */
>>       if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
>>           return NULL;
>> -    p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
>> +    p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
>> +            GFP_ATOMIC | __GFP_NOWARN);
>>       if (p == NULL)
>>           return NULL;
> 
> 

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-05-11  6:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-10 11:38 [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Qi Zheng
2022-05-10 11:38 ` [PATCH 2/2] tty: fix deadlock caused by calling printk() under tty_port->lock Qi Zheng
2022-05-11  5:57   ` Jiri Slaby
2022-05-11  6:11     ` Qi Zheng
2022-05-10 18:59 ` [PATCH 1/2] mm: fix missing handler for __GFP_NOWARN Andrew Morton
2022-05-11  2:19   ` Qi Zheng
2022-05-11  2:32     ` Andrew Morton
2022-05-11  2:36       ` Qi Zheng
2022-05-11  5:12 ` Qi Zheng
2022-05-11  5:28   ` Qi Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).