All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
@ 2023-05-02 16:08 Roman Gushchin
  2023-05-02 16:08 ` [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached Roman Gushchin
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Roman Gushchin @ 2023-05-02 16:08 UTC (permalink / raw)
  To: linux-mm, Andrew Morton
  Cc: Johannes Weiner, Michal Hocko, Shakeel Butt, Muchun Song,
	linux-kernel, Roman Gushchin, syzbot+774c29891415ab0fd29d,
	Dmitry Vyukov, Yosry Ahmed

KCSAN found an issue in obj_stock_flush_required():
stock->cached_objcg can be reset between the check and dereference:

==================================================================
BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock

write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0:
 drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306
 refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340
 obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408
 memcg_slab_free_hook mm/slab.h:587 [inline]
 __cache_free mm/slab.c:3373 [inline]
 __do_kmem_cache_free mm/slab.c:3577 [inline]
 kmem_cache_free+0x105/0x280 mm/slab.c:3602
 __d_free fs/dcache.c:298 [inline]
 dentry_free fs/dcache.c:375 [inline]
 __dentry_kill+0x422/0x4a0 fs/dcache.c:621
 dentry_kill+0x8d/0x1e0
 dput+0x118/0x1f0 fs/dcache.c:913
 __fput+0x3bf/0x570 fs/file_table.c:329
 ____fput+0x15/0x20 fs/file_table.c:349
 task_work_run+0x123/0x160 kernel/task_work.c:179
 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
 exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171
 exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203
 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
 syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296
 do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1:
 obj_stock_flush_required mm/memcontrol.c:3319 [inline]
 drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361
 try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703
 try_charge mm/memcontrol.c:2837 [inline]
 mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290
 sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025
 sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525
 udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692
 udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817
 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668
 __sys_setsockopt+0x1c3/0x230 net/socket.c:2271
 __do_sys_setsockopt net/socket.c:2282 [inline]
 __se_sys_setsockopt net/socket.c:2279 [inline]
 __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0xffff8881382d52c0 -> 0xffff888138893740

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023

Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to
stock->cached_objcg.

Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API")
Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Link:
https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
---
 mm/memcontrol.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4b27e245a055..c823c35c2ed4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3208,12 +3208,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 	 * accumulating over a page of vmstat data or when pgdat or idx
 	 * changes.
 	 */
-	if (stock->cached_objcg != objcg) {
+	if (READ_ONCE(stock->cached_objcg) != objcg) {
 		old = drain_obj_stock(stock);
 		obj_cgroup_get(objcg);
 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
 				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
-		stock->cached_objcg = objcg;
+		WRITE_ONCE(stock->cached_objcg, objcg);
 		stock->cached_pgdat = pgdat;
 	} else if (stock->cached_pgdat != pgdat) {
 		/* Flush the existing cached vmstat data */
@@ -3267,7 +3267,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
+	if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
 		stock->nr_bytes -= nr_bytes;
 		ret = true;
 	}
@@ -3279,7 +3279,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 
 static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 {
-	struct obj_cgroup *old = stock->cached_objcg;
+	struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);
 
 	if (!old)
 		return NULL;
@@ -3332,7 +3332,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 		stock->cached_pgdat = NULL;
 	}
 
-	stock->cached_objcg = NULL;
+	WRITE_ONCE(stock->cached_objcg, NULL);
 	/*
 	 * The `old' objects needs to be released by the caller via
 	 * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
@@ -3343,10 +3343,11 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 				     struct mem_cgroup *root_memcg)
 {
+	struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg);
 	struct mem_cgroup *memcg;
 
-	if (stock->cached_objcg) {
-		memcg = obj_cgroup_memcg(stock->cached_objcg);
+	if (objcg) {
+		memcg = obj_cgroup_memcg(objcg);
 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
 			return true;
 	}
@@ -3365,10 +3366,10 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	if (stock->cached_objcg != objcg) { /* reset if necessary */
+	if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
 		old = drain_obj_stock(stock);
 		obj_cgroup_get(objcg);
-		stock->cached_objcg = objcg;
+		WRITE_ONCE(stock->cached_objcg, objcg);
 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
 				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
 		allow_uncharge = true;	/* Allow uncharge when objcg changes */
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached
  2023-05-02 16:08 [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Roman Gushchin
@ 2023-05-02 16:08 ` Roman Gushchin
  2023-05-02 20:21   ` Yosry Ahmed
  2023-05-03 17:06   ` Shakeel Butt
  2023-05-02 20:15 ` [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Yosry Ahmed
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 10+ messages in thread
From: Roman Gushchin @ 2023-05-02 16:08 UTC (permalink / raw)
  To: linux-mm, Andrew Morton
  Cc: Johannes Weiner, Michal Hocko, Shakeel Butt, Muchun Song,
	linux-kernel, Roman Gushchin, Dmitry Vyukov, Yosry Ahmed

A memcg pointer in the percpu stock can be accessed by drain_all_stock()
from another cpu in a lockless way.
In theory it might lead to an issue, similar to the one which has been
discovered with stock->cached_objcg, where the pointer was zeroed
between the check for being NULL and dereferencing.
In this case the issue is unlikely a real problem, but to make it
bulletproof and similar to stock->cached_objcg, let's annotate all
accesses to stock->cached with READ_ONCE()/WTRITE_ONCE().

Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
---
 mm/memcontrol.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c823c35c2ed4..1e364ad495a3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2275,7 +2275,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
+	if (memcg == READ_ONCE(stock->cached) && stock->nr_pages >= nr_pages) {
 		stock->nr_pages -= nr_pages;
 		ret = true;
 	}
@@ -2290,7 +2290,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  */
 static void drain_stock(struct memcg_stock_pcp *stock)
 {
-	struct mem_cgroup *old = stock->cached;
+	struct mem_cgroup *old = READ_ONCE(stock->cached);
 
 	if (!old)
 		return;
@@ -2303,7 +2303,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
 	}
 
 	css_put(&old->css);
-	stock->cached = NULL;
+	WRITE_ONCE(stock->cached, NULL);
 }
 
 static void drain_local_stock(struct work_struct *dummy)
@@ -2338,10 +2338,10 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 	struct memcg_stock_pcp *stock;
 
 	stock = this_cpu_ptr(&memcg_stock);
-	if (stock->cached != memcg) { /* reset if necessary */
+	if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */
 		drain_stock(stock);
 		css_get(&memcg->css);
-		stock->cached = memcg;
+		WRITE_ONCE(stock->cached, memcg);
 	}
 	stock->nr_pages += nr_pages;
 
@@ -2383,7 +2383,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 		bool flush = false;
 
 		rcu_read_lock();
-		memcg = stock->cached;
+		memcg = READ_ONCE(stock->cached);
 		if (memcg && stock->nr_pages &&
 		    mem_cgroup_is_descendant(memcg, root_memcg))
 			flush = true;
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
  2023-05-02 16:08 [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Roman Gushchin
  2023-05-02 16:08 ` [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached Roman Gushchin
@ 2023-05-02 20:15 ` Yosry Ahmed
  2023-05-02 21:38   ` Roman Gushchin
  2023-05-03  8:05 ` Dmitry Vyukov
  2023-05-03 17:04 ` Shakeel Butt
  3 siblings, 1 reply; 10+ messages in thread
From: Yosry Ahmed @ 2023-05-02 20:15 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-kernel,
	syzbot+774c29891415ab0fd29d, Dmitry Vyukov

On Tue, May 2, 2023 at 9:09 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> KCSAN found an issue in obj_stock_flush_required():
> stock->cached_objcg can be reset between the check and dereference:
>
> ==================================================================
> BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock
>
> write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0:
>  drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306
>  refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340
>  obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408
>  memcg_slab_free_hook mm/slab.h:587 [inline]
>  __cache_free mm/slab.c:3373 [inline]
>  __do_kmem_cache_free mm/slab.c:3577 [inline]
>  kmem_cache_free+0x105/0x280 mm/slab.c:3602
>  __d_free fs/dcache.c:298 [inline]
>  dentry_free fs/dcache.c:375 [inline]
>  __dentry_kill+0x422/0x4a0 fs/dcache.c:621
>  dentry_kill+0x8d/0x1e0
>  dput+0x118/0x1f0 fs/dcache.c:913
>  __fput+0x3bf/0x570 fs/file_table.c:329
>  ____fput+0x15/0x20 fs/file_table.c:349
>  task_work_run+0x123/0x160 kernel/task_work.c:179
>  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
>  exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171
>  exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203
>  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
>  syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296
>  do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1:
>  obj_stock_flush_required mm/memcontrol.c:3319 [inline]
>  drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361
>  try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703
>  try_charge mm/memcontrol.c:2837 [inline]
>  mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290
>  sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025
>  sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525
>  udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692
>  udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817
>  sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668
>  __sys_setsockopt+0x1c3/0x230 net/socket.c:2271
>  __do_sys_setsockopt net/socket.c:2282 [inline]
>  __se_sys_setsockopt net/socket.c:2279 [inline]
>  __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> value changed: 0xffff8881382d52c0 -> 0xffff888138893740
>
> Reported by Kernel Concurrency Sanitizer on:
> CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
>
> Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to
> stock->cached_objcg.

I believe all read accesses other than obj_stock_flush_required() are
done under the lock, so READ_ONCE() wouldn't be needed AFAICT. Having
READ_ONCE() only around the racy read can be useful to document the
racy read and differentiate it from others.

With that said, it's also inconvenient to keep track moving forward of
which reading sites are racy, and it may be simpler to just annotate
all readers with READ_ONCE().

I am not sure which approach is better, just thinking out loud.

>
> Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API")
> Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Link:
> https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>

With the above said, I don't feel strongly either way, the patch looks
good AFAICT:
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>

> Cc: Yosry Ahmed <yosryahmed@google.com>
> Cc: Shakeel Butt <shakeelb@google.com>
> ---
>  mm/memcontrol.c | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4b27e245a055..c823c35c2ed4 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3208,12 +3208,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
>          * accumulating over a page of vmstat data or when pgdat or idx
>          * changes.
>          */
> -       if (stock->cached_objcg != objcg) {
> +       if (READ_ONCE(stock->cached_objcg) != objcg) {
>                 old = drain_obj_stock(stock);
>                 obj_cgroup_get(objcg);
>                 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
>                                 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
> -               stock->cached_objcg = objcg;
> +               WRITE_ONCE(stock->cached_objcg, objcg);
>                 stock->cached_pgdat = pgdat;
>         } else if (stock->cached_pgdat != pgdat) {
>                 /* Flush the existing cached vmstat data */
> @@ -3267,7 +3267,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>         local_lock_irqsave(&memcg_stock.stock_lock, flags);
>
>         stock = this_cpu_ptr(&memcg_stock);
> -       if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
> +       if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
>                 stock->nr_bytes -= nr_bytes;
>                 ret = true;
>         }
> @@ -3279,7 +3279,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>
>  static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>  {
> -       struct obj_cgroup *old = stock->cached_objcg;
> +       struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);
>
>         if (!old)
>                 return NULL;
> @@ -3332,7 +3332,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>                 stock->cached_pgdat = NULL;
>         }
>
> -       stock->cached_objcg = NULL;
> +       WRITE_ONCE(stock->cached_objcg, NULL);
>         /*
>          * The `old' objects needs to be released by the caller via
>          * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
> @@ -3343,10 +3343,11 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
>                                      struct mem_cgroup *root_memcg)
>  {
> +       struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg);
>         struct mem_cgroup *memcg;
>
> -       if (stock->cached_objcg) {
> -               memcg = obj_cgroup_memcg(stock->cached_objcg);
> +       if (objcg) {
> +               memcg = obj_cgroup_memcg(objcg);
>                 if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
>                         return true;
>         }
> @@ -3365,10 +3366,10 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
>         local_lock_irqsave(&memcg_stock.stock_lock, flags);
>
>         stock = this_cpu_ptr(&memcg_stock);
> -       if (stock->cached_objcg != objcg) { /* reset if necessary */
> +       if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
>                 old = drain_obj_stock(stock);
>                 obj_cgroup_get(objcg);
> -               stock->cached_objcg = objcg;
> +               WRITE_ONCE(stock->cached_objcg, objcg);
>                 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
>                                 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
>                 allow_uncharge = true;  /* Allow uncharge when objcg changes */
> --
> 2.40.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached
  2023-05-02 16:08 ` [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached Roman Gushchin
@ 2023-05-02 20:21   ` Yosry Ahmed
  2023-05-03 17:06   ` Shakeel Butt
  1 sibling, 0 replies; 10+ messages in thread
From: Yosry Ahmed @ 2023-05-02 20:21 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-kernel, Dmitry Vyukov

On Tue, May 2, 2023 at 9:09 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> A memcg pointer in the percpu stock can be accessed by drain_all_stock()
> from another cpu in a lockless way.
> In theory it might lead to an issue, similar to the one which has been
> discovered with stock->cached_objcg, where the pointer was zeroed
> between the check for being NULL and dereferencing.
> In this case the issue is unlikely a real problem, but to make it
> bulletproof and similar to stock->cached_objcg, let's annotate all
> accesses to stock->cached with READ_ONCE()/WTRITE_ONCE().

Is it time to rename that to cached_memcg? :)

Anyway, same comment as patch 1 about annotating all reads with
READ_ONCE() vs. singling out the racy read.

>
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Yosry Ahmed <yosryahmed@google.com>
> Cc: Shakeel Butt <shakeelb@google.com>
> ---
>  mm/memcontrol.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c823c35c2ed4..1e364ad495a3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2275,7 +2275,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>         local_lock_irqsave(&memcg_stock.stock_lock, flags);
>
>         stock = this_cpu_ptr(&memcg_stock);
> -       if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
> +       if (memcg == READ_ONCE(stock->cached) && stock->nr_pages >= nr_pages) {
>                 stock->nr_pages -= nr_pages;
>                 ret = true;
>         }
> @@ -2290,7 +2290,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>   */
>  static void drain_stock(struct memcg_stock_pcp *stock)
>  {
> -       struct mem_cgroup *old = stock->cached;
> +       struct mem_cgroup *old = READ_ONCE(stock->cached);
>
>         if (!old)
>                 return;
> @@ -2303,7 +2303,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
>         }
>
>         css_put(&old->css);
> -       stock->cached = NULL;
> +       WRITE_ONCE(stock->cached, NULL);

Is it me or can we call drain_stock() from memcg_hotplug_cpu_dead()
without holding the lock, unlike all other callers. Is this a problem?

>  }
>
>  static void drain_local_stock(struct work_struct *dummy)
> @@ -2338,10 +2338,10 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>         struct memcg_stock_pcp *stock;
>
>         stock = this_cpu_ptr(&memcg_stock);
> -       if (stock->cached != memcg) { /* reset if necessary */
> +       if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */
>                 drain_stock(stock);
>                 css_get(&memcg->css);
> -               stock->cached = memcg;
> +               WRITE_ONCE(stock->cached, memcg);
>         }
>         stock->nr_pages += nr_pages;
>
> @@ -2383,7 +2383,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
>                 bool flush = false;
>
>                 rcu_read_lock();
> -               memcg = stock->cached;
> +               memcg = READ_ONCE(stock->cached);
>                 if (memcg && stock->nr_pages &&
>                     mem_cgroup_is_descendant(memcg, root_memcg))
>                         flush = true;
> --
> 2.40.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
  2023-05-02 20:15 ` [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Yosry Ahmed
@ 2023-05-02 21:38   ` Roman Gushchin
  2023-05-02 22:10     ` Yosry Ahmed
  2023-05-03 17:03     ` Shakeel Butt
  0 siblings, 2 replies; 10+ messages in thread
From: Roman Gushchin @ 2023-05-02 21:38 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-kernel,
	syzbot+774c29891415ab0fd29d, Dmitry Vyukov

On Tue, May 02, 2023 at 01:15:02PM -0700, Yosry Ahmed wrote:
> On Tue, May 2, 2023 at 9:09 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> >
> > KCSAN found an issue in obj_stock_flush_required():
> > stock->cached_objcg can be reset between the check and dereference:
> >
> > ==================================================================
> > BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock
> >
> > write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0:
> >  drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306
> >  refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340
> >  obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408
> >  memcg_slab_free_hook mm/slab.h:587 [inline]
> >  __cache_free mm/slab.c:3373 [inline]
> >  __do_kmem_cache_free mm/slab.c:3577 [inline]
> >  kmem_cache_free+0x105/0x280 mm/slab.c:3602
> >  __d_free fs/dcache.c:298 [inline]
> >  dentry_free fs/dcache.c:375 [inline]
> >  __dentry_kill+0x422/0x4a0 fs/dcache.c:621
> >  dentry_kill+0x8d/0x1e0
> >  dput+0x118/0x1f0 fs/dcache.c:913
> >  __fput+0x3bf/0x570 fs/file_table.c:329
> >  ____fput+0x15/0x20 fs/file_table.c:349
> >  task_work_run+0x123/0x160 kernel/task_work.c:179
> >  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
> >  exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171
> >  exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203
> >  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
> >  syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296
> >  do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86
> >  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> >
> > read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1:
> >  obj_stock_flush_required mm/memcontrol.c:3319 [inline]
> >  drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361
> >  try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703
> >  try_charge mm/memcontrol.c:2837 [inline]
> >  mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290
> >  sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025
> >  sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525
> >  udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692
> >  udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817
> >  sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668
> >  __sys_setsockopt+0x1c3/0x230 net/socket.c:2271
> >  __do_sys_setsockopt net/socket.c:2282 [inline]
> >  __se_sys_setsockopt net/socket.c:2279 [inline]
> >  __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279
> >  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> >  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
> >  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> >
> > value changed: 0xffff8881382d52c0 -> 0xffff888138893740
> >
> > Reported by Kernel Concurrency Sanitizer on:
> > CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0
> > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
> >
> > Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to
> > stock->cached_objcg.
> 
> I believe all read accesses other than obj_stock_flush_required() are
> done under the lock, so READ_ONCE() wouldn't be needed AFAICT. Having
> READ_ONCE() only around the racy read can be useful to document the
> racy read and differentiate it from others.
> 
> With that said, it's also inconvenient to keep track moving forward of
> which reading sites are racy, and it may be simpler to just annotate
> all readers with READ_ONCE().
> 
> I am not sure which approach is better, just thinking out loud.

Yeah, I wasn't sure either. I believe that all changes except the original
READ_ONCE() are not leading to any meaningful asm changes, so it's a matter
of taste.

The reason why I went with the "change them all" approach:
reads without READ_ONCE() and subsequent writes with WRITE_ONCE()
inside a single function looked really weird.

> 
> >
> > Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API")
> > Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com
> > Reported-by: Dmitry Vyukov <dvyukov@google.com>
> > Link:
> > https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t
> > Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> 
> With the above said, I don't feel strongly either way, the patch looks
> good AFAICT:
> Reviewed-by: Yosry Ahmed <yosryahmed@google.com>

Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
  2023-05-02 21:38   ` Roman Gushchin
@ 2023-05-02 22:10     ` Yosry Ahmed
  2023-05-03 17:03     ` Shakeel Butt
  1 sibling, 0 replies; 10+ messages in thread
From: Yosry Ahmed @ 2023-05-02 22:10 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-kernel,
	syzbot+774c29891415ab0fd29d, Dmitry Vyukov

On Tue, May 2, 2023 at 2:38 PM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> On Tue, May 02, 2023 at 01:15:02PM -0700, Yosry Ahmed wrote:
> > On Tue, May 2, 2023 at 9:09 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> > >
> > > KCSAN found an issue in obj_stock_flush_required():
> > > stock->cached_objcg can be reset between the check and dereference:
> > >
> > > ==================================================================
> > > BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock
> > >
> > > write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0:
> > >  drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306
> > >  refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340
> > >  obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408
> > >  memcg_slab_free_hook mm/slab.h:587 [inline]
> > >  __cache_free mm/slab.c:3373 [inline]
> > >  __do_kmem_cache_free mm/slab.c:3577 [inline]
> > >  kmem_cache_free+0x105/0x280 mm/slab.c:3602
> > >  __d_free fs/dcache.c:298 [inline]
> > >  dentry_free fs/dcache.c:375 [inline]
> > >  __dentry_kill+0x422/0x4a0 fs/dcache.c:621
> > >  dentry_kill+0x8d/0x1e0
> > >  dput+0x118/0x1f0 fs/dcache.c:913
> > >  __fput+0x3bf/0x570 fs/file_table.c:329
> > >  ____fput+0x15/0x20 fs/file_table.c:349
> > >  task_work_run+0x123/0x160 kernel/task_work.c:179
> > >  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
> > >  exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171
> > >  exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203
> > >  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
> > >  syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296
> > >  do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86
> > >  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> > >
> > > read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1:
> > >  obj_stock_flush_required mm/memcontrol.c:3319 [inline]
> > >  drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361
> > >  try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703
> > >  try_charge mm/memcontrol.c:2837 [inline]
> > >  mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290
> > >  sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025
> > >  sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525
> > >  udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692
> > >  udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817
> > >  sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668
> > >  __sys_setsockopt+0x1c3/0x230 net/socket.c:2271
> > >  __do_sys_setsockopt net/socket.c:2282 [inline]
> > >  __se_sys_setsockopt net/socket.c:2279 [inline]
> > >  __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279
> > >  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > >  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
> > >  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> > >
> > > value changed: 0xffff8881382d52c0 -> 0xffff888138893740
> > >
> > > Reported by Kernel Concurrency Sanitizer on:
> > > CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0
> > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
> > >
> > > Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to
> > > stock->cached_objcg.
> >
> > I believe all read accesses other than obj_stock_flush_required() are
> > done under the lock, so READ_ONCE() wouldn't be needed AFAICT. Having
> > READ_ONCE() only around the racy read can be useful to document the
> > racy read and differentiate it from others.
> >
> > With that said, it's also inconvenient to keep track moving forward of
> > which reading sites are racy, and it may be simpler to just annotate
> > all readers with READ_ONCE().
> >
> > I am not sure which approach is better, just thinking out loud.
>
> Yeah, I wasn't sure either. I believe that all changes except the original
> READ_ONCE() are not leading to any meaningful asm changes, so it's a matter
> of taste.
>
> The reason why I went with the "change them all" approach:
> reads without READ_ONCE() and subsequent writes with WRITE_ONCE()
> inside a single function looked really weird.
>

Agreed. It might be worth adding a comment somewhere documenting this.
It's not very hard to dig though, so whatever you think is best.

> >
> > >
> > > Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API")
> > > Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com
> > > Reported-by: Dmitry Vyukov <dvyukov@google.com>
> > > Link:
> > > https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t
> > > Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> >
> > With the above said, I don't feel strongly either way, the patch looks
> > good AFAICT:
> > Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
>
> Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
  2023-05-02 16:08 [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Roman Gushchin
  2023-05-02 16:08 ` [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached Roman Gushchin
  2023-05-02 20:15 ` [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Yosry Ahmed
@ 2023-05-03  8:05 ` Dmitry Vyukov
  2023-05-03 17:04 ` Shakeel Butt
  3 siblings, 0 replies; 10+ messages in thread
From: Dmitry Vyukov @ 2023-05-03  8:05 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-kernel,
	syzbot+774c29891415ab0fd29d, Yosry Ahmed

On Tue, 2 May 2023 at 18:09, Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> KCSAN found an issue in obj_stock_flush_required():
> stock->cached_objcg can be reset between the check and dereference:
>
> ==================================================================
> BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock
>
> write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0:
>  drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306
>  refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340
>  obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408
>  memcg_slab_free_hook mm/slab.h:587 [inline]
>  __cache_free mm/slab.c:3373 [inline]
>  __do_kmem_cache_free mm/slab.c:3577 [inline]
>  kmem_cache_free+0x105/0x280 mm/slab.c:3602
>  __d_free fs/dcache.c:298 [inline]
>  dentry_free fs/dcache.c:375 [inline]
>  __dentry_kill+0x422/0x4a0 fs/dcache.c:621
>  dentry_kill+0x8d/0x1e0
>  dput+0x118/0x1f0 fs/dcache.c:913
>  __fput+0x3bf/0x570 fs/file_table.c:329
>  ____fput+0x15/0x20 fs/file_table.c:349
>  task_work_run+0x123/0x160 kernel/task_work.c:179
>  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
>  exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171
>  exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203
>  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
>  syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296
>  do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1:
>  obj_stock_flush_required mm/memcontrol.c:3319 [inline]
>  drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361
>  try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703
>  try_charge mm/memcontrol.c:2837 [inline]
>  mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290
>  sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025
>  sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525
>  udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692
>  udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817
>  sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668
>  __sys_setsockopt+0x1c3/0x230 net/socket.c:2271
>  __do_sys_setsockopt net/socket.c:2282 [inline]
>  __se_sys_setsockopt net/socket.c:2279 [inline]
>  __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> value changed: 0xffff8881382d52c0 -> 0xffff888138893740
>
> Reported by Kernel Concurrency Sanitizer on:
> CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
>
> Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to
> stock->cached_objcg.
>
> Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API")
> Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Link:
> https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> Cc: Yosry Ahmed <yosryahmed@google.com>
> Cc: Shakeel Butt <shakeelb@google.com>
> ---
>  mm/memcontrol.c | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4b27e245a055..c823c35c2ed4 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3208,12 +3208,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
>          * accumulating over a page of vmstat data or when pgdat or idx
>          * changes.
>          */
> -       if (stock->cached_objcg != objcg) {
> +       if (READ_ONCE(stock->cached_objcg) != objcg) {
>                 old = drain_obj_stock(stock);
>                 obj_cgroup_get(objcg);
>                 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
>                                 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
> -               stock->cached_objcg = objcg;
> +               WRITE_ONCE(stock->cached_objcg, objcg);
>                 stock->cached_pgdat = pgdat;
>         } else if (stock->cached_pgdat != pgdat) {
>                 /* Flush the existing cached vmstat data */
> @@ -3267,7 +3267,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>         local_lock_irqsave(&memcg_stock.stock_lock, flags);
>
>         stock = this_cpu_ptr(&memcg_stock);
> -       if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
> +       if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
>                 stock->nr_bytes -= nr_bytes;
>                 ret = true;
>         }
> @@ -3279,7 +3279,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>
>  static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>  {
> -       struct obj_cgroup *old = stock->cached_objcg;
> +       struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);
>
>         if (!old)
>                 return NULL;
> @@ -3332,7 +3332,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>                 stock->cached_pgdat = NULL;
>         }
>
> -       stock->cached_objcg = NULL;
> +       WRITE_ONCE(stock->cached_objcg, NULL);
>         /*
>          * The `old' objects needs to be released by the caller via
>          * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
> @@ -3343,10 +3343,11 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
>                                      struct mem_cgroup *root_memcg)
>  {
> +       struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg);
>         struct mem_cgroup *memcg;
>
> -       if (stock->cached_objcg) {
> -               memcg = obj_cgroup_memcg(stock->cached_objcg);
> +       if (objcg) {
> +               memcg = obj_cgroup_memcg(objcg);
>                 if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
>                         return true;
>         }
> @@ -3365,10 +3366,10 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
>         local_lock_irqsave(&memcg_stock.stock_lock, flags);
>
>         stock = this_cpu_ptr(&memcg_stock);
> -       if (stock->cached_objcg != objcg) { /* reset if necessary */
> +       if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
>                 old = drain_obj_stock(stock);
>                 obj_cgroup_get(objcg);
> -               stock->cached_objcg = objcg;
> +               WRITE_ONCE(stock->cached_objcg, objcg);
>                 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
>                                 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
>                 allow_uncharge = true;  /* Allow uncharge when objcg changes */


Reviewed-by: Dmitry Vyukov <dvyukov@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
  2023-05-02 21:38   ` Roman Gushchin
  2023-05-02 22:10     ` Yosry Ahmed
@ 2023-05-03 17:03     ` Shakeel Butt
  1 sibling, 0 replies; 10+ messages in thread
From: Shakeel Butt @ 2023-05-03 17:03 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Yosry Ahmed, linux-mm, Andrew Morton, Johannes Weiner,
	Michal Hocko, Muchun Song, linux-kernel,
	syzbot+774c29891415ab0fd29d, Dmitry Vyukov

On Tue, May 02, 2023 at 02:38:19PM -0700, Roman Gushchin wrote:
[...]
> > 
> > I believe all read accesses other than obj_stock_flush_required() are
> > done under the lock, so READ_ONCE() wouldn't be needed AFAICT. Having
> > READ_ONCE() only around the racy read can be useful to document the
> > racy read and differentiate it from others.
> > 
> > With that said, it's also inconvenient to keep track moving forward of
> > which reading sites are racy, and it may be simpler to just annotate
> > all readers with READ_ONCE().
> > 
> > I am not sure which approach is better, just thinking out loud.
> 
> Yeah, I wasn't sure either. I believe that all changes except the original
> READ_ONCE() are not leading to any meaningful asm changes, so it's a matter
> of taste.
> 
> The reason why I went with the "change them all" approach:
> reads without READ_ONCE() and subsequent writes with WRITE_ONCE()
> inside a single function looked really weird.
> 

Change them all is the right approach. This code will evolve in future
and having partial tagging will cause confusion or might be missed
altogether. Also the automated tools prefer change them all.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()
  2023-05-02 16:08 [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Roman Gushchin
                   ` (2 preceding siblings ...)
  2023-05-03  8:05 ` Dmitry Vyukov
@ 2023-05-03 17:04 ` Shakeel Butt
  3 siblings, 0 replies; 10+ messages in thread
From: Shakeel Butt @ 2023-05-03 17:04 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Muchun Song, linux-kernel, syzbot+774c29891415ab0fd29d,
	Dmitry Vyukov, Yosry Ahmed

On Tue, May 02, 2023 at 09:08:38AM -0700, Roman Gushchin wrote:
> KCSAN found an issue in obj_stock_flush_required():
> stock->cached_objcg can be reset between the check and dereference:
> 
> ==================================================================
> BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock
> 
> write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0:
>  drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306
>  refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340
>  obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408
>  memcg_slab_free_hook mm/slab.h:587 [inline]
>  __cache_free mm/slab.c:3373 [inline]
>  __do_kmem_cache_free mm/slab.c:3577 [inline]
>  kmem_cache_free+0x105/0x280 mm/slab.c:3602
>  __d_free fs/dcache.c:298 [inline]
>  dentry_free fs/dcache.c:375 [inline]
>  __dentry_kill+0x422/0x4a0 fs/dcache.c:621
>  dentry_kill+0x8d/0x1e0
>  dput+0x118/0x1f0 fs/dcache.c:913
>  __fput+0x3bf/0x570 fs/file_table.c:329
>  ____fput+0x15/0x20 fs/file_table.c:349
>  task_work_run+0x123/0x160 kernel/task_work.c:179
>  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
>  exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171
>  exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203
>  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
>  syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296
>  do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1:
>  obj_stock_flush_required mm/memcontrol.c:3319 [inline]
>  drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361
>  try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703
>  try_charge mm/memcontrol.c:2837 [inline]
>  mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290
>  sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025
>  sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525
>  udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692
>  udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817
>  sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668
>  __sys_setsockopt+0x1c3/0x230 net/socket.c:2271
>  __do_sys_setsockopt net/socket.c:2282 [inline]
>  __se_sys_setsockopt net/socket.c:2279 [inline]
>  __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> value changed: 0xffff8881382d52c0 -> 0xffff888138893740
> 
> Reported by Kernel Concurrency Sanitizer on:
> CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
> 
> Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to
> stock->cached_objcg.
> 
> Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API")
> Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Link:
> https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> Cc: Yosry Ahmed <yosryahmed@google.com>
> Cc: Shakeel Butt <shakeelb@google.com>

Acked-by: Shakeel Butt <shakeelb@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached
  2023-05-02 16:08 ` [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached Roman Gushchin
  2023-05-02 20:21   ` Yosry Ahmed
@ 2023-05-03 17:06   ` Shakeel Butt
  1 sibling, 0 replies; 10+ messages in thread
From: Shakeel Butt @ 2023-05-03 17:06 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: linux-mm, Andrew Morton, Johannes Weiner, Michal Hocko,
	Muchun Song, linux-kernel, Dmitry Vyukov, Yosry Ahmed

On Tue, May 02, 2023 at 09:08:39AM -0700, Roman Gushchin wrote:
> A memcg pointer in the percpu stock can be accessed by drain_all_stock()
> from another cpu in a lockless way.
> In theory it might lead to an issue, similar to the one which has been
> discovered with stock->cached_objcg, where the pointer was zeroed
> between the check for being NULL and dereferencing.
> In this case the issue is unlikely a real problem, but to make it
> bulletproof and similar to stock->cached_objcg, let's annotate all
> accesses to stock->cached with READ_ONCE()/WTRITE_ONCE().
> 
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Yosry Ahmed <yosryahmed@google.com>
> Cc: Shakeel Butt <shakeelb@google.com>

Acked-by: Shakeel Butt <shakeelb@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-05-03 17:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-02 16:08 [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Roman Gushchin
2023-05-02 16:08 ` [PATCH v2 2/2] mm: memcg: use READ_ONCE()/WRITE_ONCE() to access stock->cached Roman Gushchin
2023-05-02 20:21   ` Yosry Ahmed
2023-05-03 17:06   ` Shakeel Butt
2023-05-02 20:15 ` [PATCH v2 1/2] mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() Yosry Ahmed
2023-05-02 21:38   ` Roman Gushchin
2023-05-02 22:10     ` Yosry Ahmed
2023-05-03 17:03     ` Shakeel Butt
2023-05-03  8:05 ` Dmitry Vyukov
2023-05-03 17:04 ` Shakeel Butt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.