All of lore.kernel.org
 help / color / mirror / Atom feed
* [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot
@ 2019-10-24 15:03 zhong jiang
  2019-10-24 17:41 ` Matthew Wilcox
  2019-10-25 15:17 ` Matthew Wilcox
  0 siblings, 2 replies; 5+ messages in thread
From: zhong jiang @ 2019-10-24 15:03 UTC (permalink / raw)
  To: gregkh; +Cc: willy, stable, vbabka, mhocko, linux-mm, zhongjiang

Recently, We test an linux 4.19 stable and find the following issue.

kernel BUG at lib/radix-tree.c:1429!
invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
Call Trace:
 memfd_tag_pins mm/memfd.c:51 [inline]
 memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
 memfd_add_seals mm/memfd.c:215 [inline]
 memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
 do_fcntl+0x589/0xeb0 fs/fcntl.c:421
 __do_sys_fcntl fs/fcntl.c:463 [inline]
 __se_sys_fcntl fs/fcntl.c:448 [inline]
 __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293

By reviewing the code, I find that there is an race between iterate
the radix_tree and radix_tree_insert/delete. Because the former just
access its slot in rcu protected period. but it fails to prevent the
radix_tree from being changed.

Cc: stable@vger.kernel.org
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
---
 mm/memfd.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mm/memfd.c b/mm/memfd.c
index 2bb5e25..0b3fedc 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -37,8 +37,8 @@ static void memfd_tag_pins(struct address_space *mapping)
 
 	lru_add_drain();
 	start = 0;
-	rcu_read_lock();
 
+	xa_lock_irq(&mapping->i_pages);
 	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
@@ -47,18 +47,16 @@ static void memfd_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			xa_lock_irq(&mapping->i_pages);
 			radix_tree_tag_set(&mapping->i_pages, iter.index,
 					   MEMFD_TAG_PINNED);
-			xa_unlock_irq(&mapping->i_pages);
 		}
 
 		if (need_resched()) {
 			slot = radix_tree_iter_resume(slot, &iter);
-			cond_resched_rcu();
+			cond_resched_lock(&mapping->i_pages.xa_lock);
 		}
 	}
-	rcu_read_unlock();
+	xa_unlock_irq(&mapping->i_pages);
 }
 
 /*
-- 
1.7.12.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot
  2019-10-24 15:03 [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot zhong jiang
@ 2019-10-24 17:41 ` Matthew Wilcox
  2019-10-24 23:54   ` Sasha Levin
  2019-10-25 15:17 ` Matthew Wilcox
  1 sibling, 1 reply; 5+ messages in thread
From: Matthew Wilcox @ 2019-10-24 17:41 UTC (permalink / raw)
  To: zhong jiang; +Cc: gregkh, stable, vbabka, mhocko, linux-mm

On Thu, Oct 24, 2019 at 11:03:20PM +0800, zhong jiang wrote:
> By reviewing the code, I find that there is an race between iterate
> the radix_tree and radix_tree_insert/delete. Because the former just
> access its slot in rcu protected period. but it fails to prevent the
> radix_tree from being changed.

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>

The locking here now matches the locking in memfd_tag_pins() that
was changed in ef3038a573aa8bf2f3797b110f7244b55a0e519c (part of 4.20-rc1).
I didn't notice that I was fixing a bug when I changed the locking.
This bug has been present since 05f65b5c70909ef686f865f0a85406d74d75f70f
(part of 3.17) so backports will need to go further back.  This code has
moved around a bit (mm/shmem.c) and the APIs have changed, so it will
take some effort.

> Cc: stable@vger.kernel.org
> Signed-off-by: zhong jiang <zhongjiang@huawei.com>
> ---
>  mm/memfd.c | 8 +++-----
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/memfd.c b/mm/memfd.c
> index 2bb5e25..0b3fedc 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -37,8 +37,8 @@ static void memfd_tag_pins(struct address_space *mapping)
>  
>  	lru_add_drain();
>  	start = 0;
> -	rcu_read_lock();
>  
> +	xa_lock_irq(&mapping->i_pages);
>  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>  		page = radix_tree_deref_slot(slot);
>  		if (!page || radix_tree_exception(page)) {
> @@ -47,18 +47,16 @@ static void memfd_tag_pins(struct address_space *mapping)
>  				continue;
>  			}
>  		} else if (page_count(page) - page_mapcount(page) > 1) {
> -			xa_lock_irq(&mapping->i_pages);
>  			radix_tree_tag_set(&mapping->i_pages, iter.index,
>  					   MEMFD_TAG_PINNED);
> -			xa_unlock_irq(&mapping->i_pages);
>  		}
>  
>  		if (need_resched()) {
>  			slot = radix_tree_iter_resume(slot, &iter);
> -			cond_resched_rcu();
> +			cond_resched_lock(&mapping->i_pages.xa_lock);
>  		}
>  	}
> -	rcu_read_unlock();
> +	xa_unlock_irq(&mapping->i_pages);
>  }
>  
>  /*
> -- 
> 1.7.12.4
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot
  2019-10-24 17:41 ` Matthew Wilcox
@ 2019-10-24 23:54   ` Sasha Levin
  0 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2019-10-24 23:54 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: zhong jiang, gregkh, stable, vbabka, mhocko, linux-mm

On Thu, Oct 24, 2019 at 10:41:15AM -0700, Matthew Wilcox wrote:
>On Thu, Oct 24, 2019 at 11:03:20PM +0800, zhong jiang wrote:
>> By reviewing the code, I find that there is an race between iterate
>> the radix_tree and radix_tree_insert/delete. Because the former just
>> access its slot in rcu protected period. but it fails to prevent the
>> radix_tree from being changed.
>
>Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>
>The locking here now matches the locking in memfd_tag_pins() that
>was changed in ef3038a573aa8bf2f3797b110f7244b55a0e519c (part of 4.20-rc1).
>I didn't notice that I was fixing a bug when I changed the locking.
>This bug has been present since 05f65b5c70909ef686f865f0a85406d74d75f70f
>(part of 3.17) so backports will need to go further back.  This code has
>moved around a bit (mm/shmem.c) and the APIs have changed, so it will
>take some effort.

I've queued this up for 4.19. Patches for older branches are more than
welcome.

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot
  2019-10-24 15:03 [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot zhong jiang
  2019-10-24 17:41 ` Matthew Wilcox
@ 2019-10-25 15:17 ` Matthew Wilcox
  2019-10-26  1:54   ` zhong jiang
  1 sibling, 1 reply; 5+ messages in thread
From: Matthew Wilcox @ 2019-10-25 15:17 UTC (permalink / raw)
  To: zhong jiang; +Cc: gregkh, stable, vbabka, mhocko, linux-mm

On Thu, Oct 24, 2019 at 11:03:20PM +0800, zhong jiang wrote:
> +	xa_lock_irq(&mapping->i_pages);
...
>  		if (need_resched()) {
>  			slot = radix_tree_iter_resume(slot, &iter);
> -			cond_resched_rcu();
> +			cond_resched_lock(&mapping->i_pages.xa_lock);

Ooh, this isn't right.  We're taking the lock, disabling interrupts,
then dropping the lock and rescheduling without reenabling interrupts.
If this ever triggers then we'll get a scheduling-while-atomic error.

Fortunately (?) need_resched() can almost never be set while we're holding
a spinlock with interrupts disabled (thanks to peterz for telling me that
when I asked for a cond_resched_lock_irq() a few years ago).  So we need
to take this patch further towards the current code.

Here's a version for 4.14.y.  Compile tested only.

diff --git a/mm/shmem.c b/mm/shmem.c
index 6c10f1d92251..deaea74ec1b3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2657,11 +2657,12 @@ static void shmem_tag_pins(struct address_space *mapping)
 	void **slot;
 	pgoff_t start;
 	struct page *page;
+	unsigned int tagged = 0;
 
 	lru_add_drain();
 	start = 0;
-	rcu_read_lock();
 
+	spin_lock_irq(&mapping->tree_lock);
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
@@ -2670,18 +2671,19 @@ static void shmem_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			spin_lock_irq(&mapping->tree_lock);
 			radix_tree_tag_set(&mapping->page_tree, iter.index,
 					   SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
 		}
 
-		if (need_resched()) {
-			slot = radix_tree_iter_resume(slot, &iter);
-			cond_resched_rcu();
-		}
+		if (++tagged % 1024)
+			continue;
+
+		slot = radix_tree_iter_resume(slot, &iter);
+		spin_unlock_irq(&mapping->tree_lock);
+		cond_resched();
+		spin_lock_irq(&mapping->tree_lock);
 	}
-	rcu_read_unlock();
+	spin_unlock_irq(&mapping->tree_lock);
 }
 
 /*



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot
  2019-10-25 15:17 ` Matthew Wilcox
@ 2019-10-26  1:54   ` zhong jiang
  0 siblings, 0 replies; 5+ messages in thread
From: zhong jiang @ 2019-10-26  1:54 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: gregkh, stable, vbabka, mhocko, linux-mm

On 2019/10/25 23:17, Matthew Wilcox wrote:
> On Thu, Oct 24, 2019 at 11:03:20PM +0800, zhong jiang wrote:
>> +	xa_lock_irq(&mapping->i_pages);
> ...
>>  		if (need_resched()) {
>>  			slot = radix_tree_iter_resume(slot, &iter);
>> -			cond_resched_rcu();
>> +			cond_resched_lock(&mapping->i_pages.xa_lock);
> Ooh, this isn't right.  We're taking the lock, disabling interrupts,
> then dropping the lock and rescheduling without reenabling interrupts.
> If this ever triggers then we'll get a scheduling-while-atomic error.
>
> Fortunately (?) need_resched() can almost never be set while we're holding
> a spinlock with interrupts disabled (thanks to peterz for telling me that
> when I asked for a cond_resched_lock_irq() a few years ago).  So we need
> to take this patch further towards the current code.
I miss that.  Thanks you for pointing out.

Thanks,
zhong jiang
> Here's a version for 4.14.y.  Compile tested only.
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 6c10f1d92251..deaea74ec1b3 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -2657,11 +2657,12 @@ static void shmem_tag_pins(struct address_space *mapping)
>  	void **slot;
>  	pgoff_t start;
>  	struct page *page;
> +	unsigned int tagged = 0;
>  
>  	lru_add_drain();
>  	start = 0;
> -	rcu_read_lock();
>  
> +	spin_lock_irq(&mapping->tree_lock);
>  	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
>  		page = radix_tree_deref_slot(slot);
>  		if (!page || radix_tree_exception(page)) {
> @@ -2670,18 +2671,19 @@ static void shmem_tag_pins(struct address_space *mapping)
>  				continue;
>  			}
>  		} else if (page_count(page) - page_mapcount(page) > 1) {
> -			spin_lock_irq(&mapping->tree_lock);
>  			radix_tree_tag_set(&mapping->page_tree, iter.index,
>  					   SHMEM_TAG_PINNED);
> -			spin_unlock_irq(&mapping->tree_lock);
>  		}
>  
> -		if (need_resched()) {
> -			slot = radix_tree_iter_resume(slot, &iter);
> -			cond_resched_rcu();
> -		}
> +		if (++tagged % 1024)
> +			continue;
> +
> +		slot = radix_tree_iter_resume(slot, &iter);
> +		spin_unlock_irq(&mapping->tree_lock);
> +		cond_resched();
> +		spin_lock_irq(&mapping->tree_lock);
>  	}
> -	rcu_read_unlock();
> +	spin_unlock_irq(&mapping->tree_lock);
>  }
>  

>  /*
>
>
>
> .
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-10-26  1:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-24 15:03 [RPF STABLE PATCH] mm/memfd: should be lock the radix_tree when iterating its slot zhong jiang
2019-10-24 17:41 ` Matthew Wilcox
2019-10-24 23:54   ` Sasha Levin
2019-10-25 15:17 ` Matthew Wilcox
2019-10-26  1:54   ` zhong jiang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.