linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 4.19] memfd: Fix locking when tagging pins
@ 2019-10-25 16:58 Matthew Wilcox
  2019-10-25 16:58 ` [PATCH 4.14] " Matthew Wilcox
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Matthew Wilcox @ 2019-10-25 16:58 UTC (permalink / raw)
  To: gregkh, stable, dh.herrmann, linux-mm, zhongjiang; +Cc: Matthew Wilcox (Oracle)

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

The RCU lock is insufficient to protect the radix tree iteration as
a deletion from the tree can occur before we take the spinlock to
tag the entry.  In 4.19, this has manifested as a bug with the following
trace:

kernel BUG at lib/radix-tree.c:1429!
invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
Call Trace:
 memfd_tag_pins mm/memfd.c:51 [inline]
 memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
 memfd_add_seals mm/memfd.c:215 [inline]
 memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
 do_fcntl+0x589/0xeb0 fs/fcntl.c:421
 __do_sys_fcntl fs/fcntl.c:463 [inline]
 __se_sys_fcntl fs/fcntl.c:448 [inline]
 __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293

The problem does not occur in mainline due to the XArray rewrite which
changed the locking to exclude modification of the tree during iteration.
At the time, nobody realised this was a bugfix.  Backport the locking
changes to stable.

Cc: stable@vger.kernel.org
Reported-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memfd.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mm/memfd.c b/mm/memfd.c
index 2bb5e257080e..5859705dafe1 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
 	void __rcu **slot;
 	pgoff_t start;
 	struct page *page;
+	unsigned int tagged = 0;
 
 	lru_add_drain();
 	start = 0;
-	rcu_read_lock();
 
+	xa_lock_irq(&mapping->i_pages);
 	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
@@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			xa_lock_irq(&mapping->i_pages);
 			radix_tree_tag_set(&mapping->i_pages, iter.index,
 					   MEMFD_TAG_PINNED);
-			xa_unlock_irq(&mapping->i_pages);
 		}
 
-		if (need_resched()) {
-			slot = radix_tree_iter_resume(slot, &iter);
-			cond_resched_rcu();
-		}
+		if (++tagged % 1024)
+			continue;
+
+		slot = radix_tree_iter_resume(slot, &iter);
+		xa_unlock_irq(&mapping->i_pages);
+		cond_resched();
+		xa_lock_irq(&mapping->i_pages);
 	}
-	rcu_read_unlock();
+	xa_unlock_irq(&mapping->i_pages);
 }
 
 /*
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4.14] memfd: Fix locking when tagging pins
  2019-10-25 16:58 [PATCH 4.19] memfd: Fix locking when tagging pins Matthew Wilcox
@ 2019-10-25 16:58 ` Matthew Wilcox
  2019-10-25 16:58 ` [PATCH 4.9] " Matthew Wilcox
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Matthew Wilcox @ 2019-10-25 16:58 UTC (permalink / raw)
  To: gregkh, stable, dh.herrmann, linux-mm, zhongjiang; +Cc: Matthew Wilcox (Oracle)

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

The RCU lock is insufficient to protect the radix tree iteration as
a deletion from the tree can occur before we take the spinlock to
tag the entry.  In 4.19, this has manifested as a bug with the following
trace:

kernel BUG at lib/radix-tree.c:1429!
invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
Call Trace:
 memfd_tag_pins mm/memfd.c:51 [inline]
 memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
 memfd_add_seals mm/memfd.c:215 [inline]
 memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
 do_fcntl+0x589/0xeb0 fs/fcntl.c:421
 __do_sys_fcntl fs/fcntl.c:463 [inline]
 __se_sys_fcntl fs/fcntl.c:448 [inline]
 __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293

The problem does not occur in mainline due to the XArray rewrite which
changed the locking to exclude modification of the tree during iteration.
At the time, nobody realised this was a bugfix.  Backport the locking
changes to stable.

Cc: stable@vger.kernel.org
Reported-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/shmem.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 037e2ee9ccac..5b2cc9f9b1f1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2657,11 +2657,12 @@ static void shmem_tag_pins(struct address_space *mapping)
 	void **slot;
 	pgoff_t start;
 	struct page *page;
+	unsigned int tagged = 0;
 
 	lru_add_drain();
 	start = 0;
-	rcu_read_lock();
 
+	spin_lock_irq(&mapping->tree_lock);
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
@@ -2670,18 +2671,19 @@ static void shmem_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			spin_lock_irq(&mapping->tree_lock);
 			radix_tree_tag_set(&mapping->page_tree, iter.index,
 					   SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
 		}
 
-		if (need_resched()) {
-			slot = radix_tree_iter_resume(slot, &iter);
-			cond_resched_rcu();
-		}
+		if (++tagged % 1024)
+			continue;
+
+		slot = radix_tree_iter_resume(slot, &iter);
+		spin_unlock_irq(&mapping->tree_lock);
+		cond_resched();
+		spin_lock_irq(&mapping->tree_lock);
 	}
-	rcu_read_unlock();
+	spin_unlock_irq(&mapping->tree_lock);
 }
 
 /*
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4.9] memfd: Fix locking when tagging pins
  2019-10-25 16:58 [PATCH 4.19] memfd: Fix locking when tagging pins Matthew Wilcox
  2019-10-25 16:58 ` [PATCH 4.14] " Matthew Wilcox
@ 2019-10-25 16:58 ` Matthew Wilcox
  2019-10-25 16:58 ` [PATCH 4.4] " Matthew Wilcox
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Matthew Wilcox @ 2019-10-25 16:58 UTC (permalink / raw)
  To: gregkh, stable, dh.herrmann, linux-mm, zhongjiang; +Cc: Matthew Wilcox (Oracle)

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

The RCU lock is insufficient to protect the radix tree iteration as
a deletion from the tree can occur before we take the spinlock to
tag the entry.  In 4.19, this has manifested as a bug with the following
trace:

kernel BUG at lib/radix-tree.c:1429!
invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
Call Trace:
 memfd_tag_pins mm/memfd.c:51 [inline]
 memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
 memfd_add_seals mm/memfd.c:215 [inline]
 memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
 do_fcntl+0x589/0xeb0 fs/fcntl.c:421
 __do_sys_fcntl fs/fcntl.c:463 [inline]
 __se_sys_fcntl fs/fcntl.c:448 [inline]
 __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293

The problem does not occur in mainline due to the XArray rewrite which
changed the locking to exclude modification of the tree during iteration.
At the time, nobody realised this was a bugfix.  Backport the locking
changes to stable.

Cc: stable@vger.kernel.org
Reported-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/shmem.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 944242491059..ac8a5fedc245 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2457,11 +2457,12 @@ static void shmem_tag_pins(struct address_space *mapping)
 	void **slot;
 	pgoff_t start;
 	struct page *page;
+	unsigned int tagged = 0;
 
 	lru_add_drain();
 	start = 0;
-	rcu_read_lock();
 
+	spin_lock_irq(&mapping->tree_lock);
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
@@ -2470,18 +2471,19 @@ static void shmem_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			spin_lock_irq(&mapping->tree_lock);
 			radix_tree_tag_set(&mapping->page_tree, iter.index,
 					   SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
 		}
 
-		if (need_resched()) {
-			cond_resched_rcu();
-			slot = radix_tree_iter_next(&iter);
-		}
+		if (++tagged % 1024)
+			continue;
+
+		slot = radix_tree_iter_next(&iter);
+		spin_unlock_irq(&mapping->tree_lock);
+		cond_resched();
+		spin_lock_irq(&mapping->tree_lock);
 	}
-	rcu_read_unlock();
+	spin_unlock_irq(&mapping->tree_lock);
 }
 
 /*
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4.4] memfd: Fix locking when tagging pins
  2019-10-25 16:58 [PATCH 4.19] memfd: Fix locking when tagging pins Matthew Wilcox
  2019-10-25 16:58 ` [PATCH 4.14] " Matthew Wilcox
  2019-10-25 16:58 ` [PATCH 4.9] " Matthew Wilcox
@ 2019-10-25 16:58 ` Matthew Wilcox
  2019-10-26  2:03 ` [PATCH 4.19] " zhong jiang
  2019-11-13  4:00 ` zhong jiang
  4 siblings, 0 replies; 10+ messages in thread
From: Matthew Wilcox @ 2019-10-25 16:58 UTC (permalink / raw)
  To: gregkh, stable, dh.herrmann, linux-mm, zhongjiang; +Cc: Matthew Wilcox (Oracle)

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

The RCU lock is insufficient to protect the radix tree iteration as
a deletion from the tree can occur before we take the spinlock to
tag the entry.  In 4.19, this has manifested as a bug with the following
trace:

kernel BUG at lib/radix-tree.c:1429!
invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
Call Trace:
 memfd_tag_pins mm/memfd.c:51 [inline]
 memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
 memfd_add_seals mm/memfd.c:215 [inline]
 memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
 do_fcntl+0x589/0xeb0 fs/fcntl.c:421
 __do_sys_fcntl fs/fcntl.c:463 [inline]
 __se_sys_fcntl fs/fcntl.c:448 [inline]
 __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293

The problem does not occur in mainline due to the XArray rewrite which
changed the locking to exclude modification of the tree during iteration.
At the time, nobody realised this was a bugfix.  Backport the locking
changes to stable.

Cc: stable@vger.kernel.org
Reported-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/shmem.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index f11aec40f2e1..62668379623b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1854,11 +1854,12 @@ static void shmem_tag_pins(struct address_space *mapping)
 	void **slot;
 	pgoff_t start;
 	struct page *page;
+	unsigned int tagged = 0;
 
 	lru_add_drain();
 	start = 0;
-	rcu_read_lock();
 
+	spin_lock_irq(&mapping->tree_lock);
 restart:
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 		page = radix_tree_deref_slot(slot);
@@ -1866,19 +1867,20 @@ restart:
 			if (radix_tree_deref_retry(page))
 				goto restart;
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			spin_lock_irq(&mapping->tree_lock);
 			radix_tree_tag_set(&mapping->page_tree, iter.index,
 					   SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
 		}
 
-		if (need_resched()) {
-			cond_resched_rcu();
-			start = iter.index + 1;
-			goto restart;
-		}
+		if (++tagged % 1024)
+			continue;
+
+		spin_unlock_irq(&mapping->tree_lock);
+		cond_resched();
+		start = iter.index + 1;
+		spin_lock_irq(&mapping->tree_lock);
+		goto restart;
 	}
-	rcu_read_unlock();
+	spin_unlock_irq(&mapping->tree_lock);
 }
 
 /*
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] memfd: Fix locking when tagging pins
  2019-10-25 16:58 [PATCH 4.19] memfd: Fix locking when tagging pins Matthew Wilcox
                   ` (2 preceding siblings ...)
  2019-10-25 16:58 ` [PATCH 4.4] " Matthew Wilcox
@ 2019-10-26  2:03 ` zhong jiang
  2019-10-26 15:34   ` Sasha Levin
  2019-11-13  4:00 ` zhong jiang
  4 siblings, 1 reply; 10+ messages in thread
From: zhong jiang @ 2019-10-26  2:03 UTC (permalink / raw)
  To: Matthew Wilcox, sashal; +Cc: gregkh, stable, dh.herrmann, linux-mm

On 2019/10/26 0:58, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>
> The RCU lock is insufficient to protect the radix tree iteration as
> a deletion from the tree can occur before we take the spinlock to
> tag the entry.  In 4.19, this has manifested as a bug with the following
> trace:
>
> kernel BUG at lib/radix-tree.c:1429!
> invalid opcode: 0000 [#1] SMP KASAN PTI
> CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
> RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
> Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
> RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
> RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
> RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
> RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
> R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
> R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
> FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
> Call Trace:
>  memfd_tag_pins mm/memfd.c:51 [inline]
>  memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
>  memfd_add_seals mm/memfd.c:215 [inline]
>  memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
>  do_fcntl+0x589/0xeb0 fs/fcntl.c:421
>  __do_sys_fcntl fs/fcntl.c:463 [inline]
>  __se_sys_fcntl fs/fcntl.c:448 [inline]
>  __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
>  do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293
>
> The problem does not occur in mainline due to the XArray rewrite which
> changed the locking to exclude modification of the tree during iteration.
> At the time, nobody realised this was a bugfix.  Backport the locking
> changes to stable.
>
> Cc: stable@vger.kernel.org
> Reported-by: zhong jiang <zhongjiang@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/memfd.c | 18 ++++++++++--------
>  1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/mm/memfd.c b/mm/memfd.c
> index 2bb5e257080e..5859705dafe1 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
>  	void __rcu **slot;
>  	pgoff_t start;
>  	struct page *page;
> +	unsigned int tagged = 0;
>  
>  	lru_add_drain();
>  	start = 0;
> -	rcu_read_lock();
>  
> +	xa_lock_irq(&mapping->i_pages);
>  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>  		page = radix_tree_deref_slot(slot);
>  		if (!page || radix_tree_exception(page)) {
> @@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
>  				continue;
>  			}
>  		} else if (page_count(page) - page_mapcount(page) > 1) {
> -			xa_lock_irq(&mapping->i_pages);
>  			radix_tree_tag_set(&mapping->i_pages, iter.index,
>  					   MEMFD_TAG_PINNED);
> -			xa_unlock_irq(&mapping->i_pages);
>  		}
>  
> -		if (need_resched()) {
> -			slot = radix_tree_iter_resume(slot, &iter);
> -			cond_resched_rcu();
> -		}
> +		if (++tagged % 1024)
> +			continue;
> +
> +		slot = radix_tree_iter_resume(slot, &iter);
> +		xa_unlock_irq(&mapping->i_pages);
> +		cond_resched();
> +		xa_lock_irq(&mapping->i_pages);
>  	}
> -	rcu_read_unlock();
> +	xa_unlock_irq(&mapping->i_pages);
>  }
>  
>  /*
The patch looks good to me.   thanks for your review and efforts.

Sasha,  The patch was correct,  It should go into stable instead of my patch.

Sincerely,
zhong jiang



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] memfd: Fix locking when tagging pins
  2019-10-26  2:03 ` [PATCH 4.19] " zhong jiang
@ 2019-10-26 15:34   ` Sasha Levin
  0 siblings, 0 replies; 10+ messages in thread
From: Sasha Levin @ 2019-10-26 15:34 UTC (permalink / raw)
  To: zhong jiang; +Cc: Matthew Wilcox, gregkh, stable, dh.herrmann, linux-mm

On Sat, Oct 26, 2019 at 10:03:49AM +0800, zhong jiang wrote:
>On 2019/10/26 0:58, Matthew Wilcox wrote:
>> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>>
>> The RCU lock is insufficient to protect the radix tree iteration as
>> a deletion from the tree can occur before we take the spinlock to
>> tag the entry.  In 4.19, this has manifested as a bug with the following
>> trace:
>>
>> kernel BUG at lib/radix-tree.c:1429!
>> invalid opcode: 0000 [#1] SMP KASAN PTI
>> CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
>> RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
>> Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
>> RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
>> RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
>> RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
>> RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
>> R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
>> R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
>> FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
>> Call Trace:
>>  memfd_tag_pins mm/memfd.c:51 [inline]
>>  memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
>>  memfd_add_seals mm/memfd.c:215 [inline]
>>  memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
>>  do_fcntl+0x589/0xeb0 fs/fcntl.c:421
>>  __do_sys_fcntl fs/fcntl.c:463 [inline]
>>  __se_sys_fcntl fs/fcntl.c:448 [inline]
>>  __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
>>  do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293
>>
>> The problem does not occur in mainline due to the XArray rewrite which
>> changed the locking to exclude modification of the tree during iteration.
>> At the time, nobody realised this was a bugfix.  Backport the locking
>> changes to stable.
>>
>> Cc: stable@vger.kernel.org
>> Reported-by: zhong jiang <zhongjiang@huawei.com>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  mm/memfd.c | 18 ++++++++++--------
>>  1 file changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/memfd.c b/mm/memfd.c
>> index 2bb5e257080e..5859705dafe1 100644
>> --- a/mm/memfd.c
>> +++ b/mm/memfd.c
>> @@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
>>  	void __rcu **slot;
>>  	pgoff_t start;
>>  	struct page *page;
>> +	unsigned int tagged = 0;
>>
>>  	lru_add_drain();
>>  	start = 0;
>> -	rcu_read_lock();
>>
>> +	xa_lock_irq(&mapping->i_pages);
>>  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>>  		page = radix_tree_deref_slot(slot);
>>  		if (!page || radix_tree_exception(page)) {
>> @@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
>>  				continue;
>>  			}
>>  		} else if (page_count(page) - page_mapcount(page) > 1) {
>> -			xa_lock_irq(&mapping->i_pages);
>>  			radix_tree_tag_set(&mapping->i_pages, iter.index,
>>  					   MEMFD_TAG_PINNED);
>> -			xa_unlock_irq(&mapping->i_pages);
>>  		}
>>
>> -		if (need_resched()) {
>> -			slot = radix_tree_iter_resume(slot, &iter);
>> -			cond_resched_rcu();
>> -		}
>> +		if (++tagged % 1024)
>> +			continue;
>> +
>> +		slot = radix_tree_iter_resume(slot, &iter);
>> +		xa_unlock_irq(&mapping->i_pages);
>> +		cond_resched();
>> +		xa_lock_irq(&mapping->i_pages);
>>  	}
>> -	rcu_read_unlock();
>> +	xa_unlock_irq(&mapping->i_pages);
>>  }
>>
>>  /*
>The patch looks good to me.   thanks for your review and efforts.
>
>Sasha,  The patch was correct,  It should go into stable instead of my patch.

I've queued up this series for all respective branches (fixing up 4.19),
thanks!

-- 
Thanks,
Sasha


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] memfd: Fix locking when tagging pins
  2019-10-25 16:58 [PATCH 4.19] memfd: Fix locking when tagging pins Matthew Wilcox
                   ` (3 preceding siblings ...)
  2019-10-26  2:03 ` [PATCH 4.19] " zhong jiang
@ 2019-11-13  4:00 ` zhong jiang
  2019-11-14  1:53   ` zhong jiang
  2019-11-14  2:26   ` Matthew Wilcox
  4 siblings, 2 replies; 10+ messages in thread
From: zhong jiang @ 2019-11-13  4:00 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: gregkh, stable, dh.herrmann, linux-mm

[-- Attachment #1: Type: text/plain, Size: 4608 bytes --]

Hi,  Matthew

When appling the following patch,  I  hit the an warning.

*WARNING: suspicious RCU usage in memfd_wait_for_pins*

It is because we remove the rcu_read_lock/read_read_unlock. but We still use
radix_tree_deref_slot checking rcu_read_lock_held().

A simple fix as follows without test.

diff --git a/mm/memfd.c b/mm/memfd.c
index 5859705dafe1..af244c9c8b6f 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -40,6 +40,7 @@ static void memfd_tag_pins(struct address_space *mapping)
        start = 0;

        xa_lock_irq(&mapping->i_pages);
+       rcu_read_lock();
        radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                page = radix_tree_deref_slot(slot);
                if (!page || radix_tree_exception(page)) {
@@ -60,6 +61,7 @@ static void memfd_tag_pins(struct address_space *mapping)
                cond_resched();
                xa_lock_irq(&mapping->i_pages);
        }
+       rcu_read_unlock();
        xa_unlock_irq(&mapping->i_pages);
 }

Thanks,
zhong jiang

On 2019/10/26 0:58, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>
> The RCU lock is insufficient to protect the radix tree iteration as
> a deletion from the tree can occur before we take the spinlock to
> tag the entry.  In 4.19, this has manifested as a bug with the following
> trace:
>
> kernel BUG at lib/radix-tree.c:1429!
> invalid opcode: 0000 [#1] SMP KASAN PTI
> CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
> RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
> Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
> RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
> RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
> RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
> RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
> R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
> R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
> FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
> Call Trace:
>  memfd_tag_pins mm/memfd.c:51 [inline]
>  memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
>  memfd_add_seals mm/memfd.c:215 [inline]
>  memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
>  do_fcntl+0x589/0xeb0 fs/fcntl.c:421
>  __do_sys_fcntl fs/fcntl.c:463 [inline]
>  __se_sys_fcntl fs/fcntl.c:448 [inline]
>  __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
>  do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293
>
> The problem does not occur in mainline due to the XArray rewrite which
> changed the locking to exclude modification of the tree during iteration.
> At the time, nobody realised this was a bugfix.  Backport the locking
> changes to stable.
>
> Cc: stable@vger.kernel.org
> Reported-by: zhong jiang <zhongjiang@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/memfd.c | 18 ++++++++++--------
>  1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/mm/memfd.c b/mm/memfd.c
> index 2bb5e257080e..5859705dafe1 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
>  	void __rcu **slot;
>  	pgoff_t start;
>  	struct page *page;
> +	unsigned int tagged = 0;
>  
>  	lru_add_drain();
>  	start = 0;
> -	rcu_read_lock();
>  
> +	xa_lock_irq(&mapping->i_pages);
>  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>  		page = radix_tree_deref_slot(slot);
>  		if (!page || radix_tree_exception(page)) {
> @@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
>  				continue;
>  			}
>  		} else if (page_count(page) - page_mapcount(page) > 1) {
> -			xa_lock_irq(&mapping->i_pages);
>  			radix_tree_tag_set(&mapping->i_pages, iter.index,
>  					   MEMFD_TAG_PINNED);
> -			xa_unlock_irq(&mapping->i_pages);
>  		}
>  
> -		if (need_resched()) {
> -			slot = radix_tree_iter_resume(slot, &iter);
> -			cond_resched_rcu();
> -		}
> +		if (++tagged % 1024)
> +			continue;
> +
> +		slot = radix_tree_iter_resume(slot, &iter);
> +		xa_unlock_irq(&mapping->i_pages);
> +		cond_resched();
> +		xa_lock_irq(&mapping->i_pages);
>  	}
> -	rcu_read_unlock();
> +	xa_unlock_irq(&mapping->i_pages);
>  }
>  
>  /*


[-- Attachment #2: Type: text/html, Size: 6029 bytes --]

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] memfd: Fix locking when tagging pins
  2019-11-13  4:00 ` zhong jiang
@ 2019-11-14  1:53   ` zhong jiang
  2019-11-14  2:26   ` Matthew Wilcox
  1 sibling, 0 replies; 10+ messages in thread
From: zhong jiang @ 2019-11-14  1:53 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: gregkh, stable, dh.herrmann, linux-mm

[-- Attachment #1: Type: text/plain, Size: 4860 bytes --]

On 2019/11/13 12:00, zhong jiang wrote:
> Hi,  Matthew
>
> When appling the following patch,  I  hit the an warning.
>
> *WARNING: suspicious RCU usage in memfd_wait_for_pins*
>
> It is because we remove the rcu_read_lock/read_read_unlock. but We still use
> radix_tree_deref_slot checking rcu_read_lock_held().
>
> A simple fix as follows without test.
>
> diff --git a/mm/memfd.c b/mm/memfd.c
> index 5859705dafe1..af244c9c8b6f 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -40,6 +40,7 @@ static void memfd_tag_pins(struct address_space *mapping)
>         start = 0;
>
>         xa_lock_irq(&mapping->i_pages);
> +       rcu_read_lock();
>         radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>                 page = radix_tree_deref_slot(slot);
>                 if (!page || radix_tree_exception(page)) {
> @@ -60,6 +61,7 @@ static void memfd_tag_pins(struct address_space *mapping)
>                 cond_resched();
cond_resched() should be replaced by cond_resched_rcu(); 
>                 xa_lock_irq(&mapping->i_pages);
>         }
> +       rcu_read_unlock();
>         xa_unlock_irq(&mapping->i_pages);
>  }
>
> Thanks,
> zhong jiang
>
> On 2019/10/26 0:58, Matthew Wilcox wrote:
>> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>>
>> The RCU lock is insufficient to protect the radix tree iteration as
>> a deletion from the tree can occur before we take the spinlock to
>> tag the entry.  In 4.19, this has manifested as a bug with the following
>> trace:
>>
>> kernel BUG at lib/radix-tree.c:1429!
>> invalid opcode: 0000 [#1] SMP KASAN PTI
>> CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
>> RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
>> Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
>> RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
>> RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
>> RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
>> RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
>> R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
>> R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
>> FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
>> Call Trace:
>>  memfd_tag_pins mm/memfd.c:51 [inline]
>>  memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
>>  memfd_add_seals mm/memfd.c:215 [inline]
>>  memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
>>  do_fcntl+0x589/0xeb0 fs/fcntl.c:421
>>  __do_sys_fcntl fs/fcntl.c:463 [inline]
>>  __se_sys_fcntl fs/fcntl.c:448 [inline]
>>  __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
>>  do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293
>>
>> The problem does not occur in mainline due to the XArray rewrite which
>> changed the locking to exclude modification of the tree during iteration.
>> At the time, nobody realised this was a bugfix.  Backport the locking
>> changes to stable.
>>
>> Cc: stable@vger.kernel.org
>> Reported-by: zhong jiang <zhongjiang@huawei.com>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  mm/memfd.c | 18 ++++++++++--------
>>  1 file changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/memfd.c b/mm/memfd.c
>> index 2bb5e257080e..5859705dafe1 100644
>> --- a/mm/memfd.c
>> +++ b/mm/memfd.c
>> @@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
>>  	void __rcu **slot;
>>  	pgoff_t start;
>>  	struct page *page;
>> +	unsigned int tagged = 0;
>>  
>>  	lru_add_drain();
>>  	start = 0;
>> -	rcu_read_lock();
>>  
>> +	xa_lock_irq(&mapping->i_pages);
>>  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>>  		page = radix_tree_deref_slot(slot);
>>  		if (!page || radix_tree_exception(page)) {
>> @@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
>>  				continue;
>>  			}
>>  		} else if (page_count(page) - page_mapcount(page) > 1) {
>> -			xa_lock_irq(&mapping->i_pages);
>>  			radix_tree_tag_set(&mapping->i_pages, iter.index,
>>  					   MEMFD_TAG_PINNED);
>> -			xa_unlock_irq(&mapping->i_pages);
>>  		}
>>  
>> -		if (need_resched()) {
>> -			slot = radix_tree_iter_resume(slot, &iter);
>> -			cond_resched_rcu();
>> -		}
>> +		if (++tagged % 1024)
>> +			continue;
>> +
>> +		slot = radix_tree_iter_resume(slot, &iter);
>> +		xa_unlock_irq(&mapping->i_pages);
>> +		cond_resched();
>> +		xa_lock_irq(&mapping->i_pages);
>>  	}
>> -	rcu_read_unlock();
>> +	xa_unlock_irq(&mapping->i_pages);
>>  }
>>  
>>  /*
>


[-- Attachment #2: Type: text/html, Size: 6591 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] memfd: Fix locking when tagging pins
  2019-11-13  4:00 ` zhong jiang
  2019-11-14  1:53   ` zhong jiang
@ 2019-11-14  2:26   ` Matthew Wilcox
  2019-11-15 10:46     ` zhong jiang
  1 sibling, 1 reply; 10+ messages in thread
From: Matthew Wilcox @ 2019-11-14  2:26 UTC (permalink / raw)
  To: zhong jiang; +Cc: gregkh, stable, dh.herrmann, linux-mm

On Wed, Nov 13, 2019 at 12:00:22PM +0800, zhong jiang wrote:
> Hi,  Matthew
> 
> When appling the following patch,  I  hit the an warning.
> 
> *WARNING: suspicious RCU usage in memfd_wait_for_pins*
> 
> It is because we remove the rcu_read_lock/read_read_unlock. but We still use
> radix_tree_deref_slot checking rcu_read_lock_held().

Ooh.  We should be using radix_tree_deref_slot_protected() instead.
Can I trouble you to send that patch?

> A simple fix as follows without test.
> 
> diff --git a/mm/memfd.c b/mm/memfd.c
> index 5859705dafe1..af244c9c8b6f 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -40,6 +40,7 @@ static void memfd_tag_pins(struct address_space *mapping)
>         start = 0;
> 
>         xa_lock_irq(&mapping->i_pages);
> +       rcu_read_lock();
>         radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>                 page = radix_tree_deref_slot(slot);
>                 if (!page || radix_tree_exception(page)) {
> @@ -60,6 +61,7 @@ static void memfd_tag_pins(struct address_space *mapping)
>                 cond_resched();
>                 xa_lock_irq(&mapping->i_pages);
>         }
> +       rcu_read_unlock();
>         xa_unlock_irq(&mapping->i_pages);
>  }
> 
> Thanks,
> zhong jiang
> 
> On 2019/10/26 0:58, Matthew Wilcox wrote:
> > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> >
> > The RCU lock is insufficient to protect the radix tree iteration as
> > a deletion from the tree can occur before we take the spinlock to
> > tag the entry.  In 4.19, this has manifested as a bug with the following
> > trace:
> >
> > kernel BUG at lib/radix-tree.c:1429!
> > invalid opcode: 0000 [#1] SMP KASAN PTI
> > CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
> > RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
> > Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
> > RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
> > RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
> > RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
> > RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
> > R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
> > R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
> > FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
> > Call Trace:
> >  memfd_tag_pins mm/memfd.c:51 [inline]
> >  memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
> >  memfd_add_seals mm/memfd.c:215 [inline]
> >  memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
> >  do_fcntl+0x589/0xeb0 fs/fcntl.c:421
> >  __do_sys_fcntl fs/fcntl.c:463 [inline]
> >  __se_sys_fcntl fs/fcntl.c:448 [inline]
> >  __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
> >  do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293
> >
> > The problem does not occur in mainline due to the XArray rewrite which
> > changed the locking to exclude modification of the tree during iteration.
> > At the time, nobody realised this was a bugfix.  Backport the locking
> > changes to stable.
> >
> > Cc: stable@vger.kernel.org
> > Reported-by: zhong jiang <zhongjiang@huawei.com>
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  mm/memfd.c | 18 ++++++++++--------
> >  1 file changed, 10 insertions(+), 8 deletions(-)
> >
> > diff --git a/mm/memfd.c b/mm/memfd.c
> > index 2bb5e257080e..5859705dafe1 100644
> > --- a/mm/memfd.c
> > +++ b/mm/memfd.c
> > @@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
> >  	void __rcu **slot;
> >  	pgoff_t start;
> >  	struct page *page;
> > +	unsigned int tagged = 0;
> >  
> >  	lru_add_drain();
> >  	start = 0;
> > -	rcu_read_lock();
> >  
> > +	xa_lock_irq(&mapping->i_pages);
> >  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
> >  		page = radix_tree_deref_slot(slot);
> >  		if (!page || radix_tree_exception(page)) {
> > @@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
> >  				continue;
> >  			}
> >  		} else if (page_count(page) - page_mapcount(page) > 1) {
> > -			xa_lock_irq(&mapping->i_pages);
> >  			radix_tree_tag_set(&mapping->i_pages, iter.index,
> >  					   MEMFD_TAG_PINNED);
> > -			xa_unlock_irq(&mapping->i_pages);
> >  		}
> >  
> > -		if (need_resched()) {
> > -			slot = radix_tree_iter_resume(slot, &iter);
> > -			cond_resched_rcu();
> > -		}
> > +		if (++tagged % 1024)
> > +			continue;
> > +
> > +		slot = radix_tree_iter_resume(slot, &iter);
> > +		xa_unlock_irq(&mapping->i_pages);
> > +		cond_resched();
> > +		xa_lock_irq(&mapping->i_pages);
> >  	}
> > -	rcu_read_unlock();
> > +	xa_unlock_irq(&mapping->i_pages);
> >  }
> >  
> >  /*
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] memfd: Fix locking when tagging pins
  2019-11-14  2:26   ` Matthew Wilcox
@ 2019-11-15 10:46     ` zhong jiang
  0 siblings, 0 replies; 10+ messages in thread
From: zhong jiang @ 2019-11-15 10:46 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: gregkh, stable, dh.herrmann, linux-mm

On 2019/11/14 10:26, Matthew Wilcox wrote:
> On Wed, Nov 13, 2019 at 12:00:22PM +0800, zhong jiang wrote:
>> Hi,  Matthew
>>
>> When appling the following patch,  I  hit the an warning.
>>
>> *WARNING: suspicious RCU usage in memfd_wait_for_pins*
>>
>> It is because we remove the rcu_read_lock/read_read_unlock. but We still use
>> radix_tree_deref_slot checking rcu_read_lock_held().
> Ooh.  We should be using radix_tree_deref_slot_protected() instead.
> Can I trouble you to send that patch?
Will do . Thanks,

Sincerely,
zhong jiang
>> A simple fix as follows without test.
>>
>> diff --git a/mm/memfd.c b/mm/memfd.c
>> index 5859705dafe1..af244c9c8b6f 100644
>> --- a/mm/memfd.c
>> +++ b/mm/memfd.c
>> @@ -40,6 +40,7 @@ static void memfd_tag_pins(struct address_space *mapping)
>>         start = 0;
>>
>>         xa_lock_irq(&mapping->i_pages);
>> +       rcu_read_lock();
>>         radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>>                 page = radix_tree_deref_slot(slot);
>>                 if (!page || radix_tree_exception(page)) {
>> @@ -60,6 +61,7 @@ static void memfd_tag_pins(struct address_space *mapping)
>>                 cond_resched();
>>                 xa_lock_irq(&mapping->i_pages);
>>         }
>> +       rcu_read_unlock();
>>         xa_unlock_irq(&mapping->i_pages);
>>  }
>>
>> Thanks,
>> zhong jiang
>>
>> On 2019/10/26 0:58, Matthew Wilcox wrote:
>>> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>>>
>>> The RCU lock is insufficient to protect the radix tree iteration as
>>> a deletion from the tree can occur before we take the spinlock to
>>> tag the entry.  In 4.19, this has manifested as a bug with the following
>>> trace:
>>>
>>> kernel BUG at lib/radix-tree.c:1429!
>>> invalid opcode: 0000 [#1] SMP KASAN PTI
>>> CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25
>>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
>>> RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429
>>> Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe
>>> RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016
>>> RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0
>>> RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98
>>> RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e
>>> R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004
>>> R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002
>>> FS:  00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000
>>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0
>>> Call Trace:
>>>  memfd_tag_pins mm/memfd.c:51 [inline]
>>>  memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81
>>>  memfd_add_seals mm/memfd.c:215 [inline]
>>>  memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247
>>>  do_fcntl+0x589/0xeb0 fs/fcntl.c:421
>>>  __do_sys_fcntl fs/fcntl.c:463 [inline]
>>>  __se_sys_fcntl fs/fcntl.c:448 [inline]
>>>  __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448
>>>  do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293
>>>
>>> The problem does not occur in mainline due to the XArray rewrite which
>>> changed the locking to exclude modification of the tree during iteration.
>>> At the time, nobody realised this was a bugfix.  Backport the locking
>>> changes to stable.
>>>
>>> Cc: stable@vger.kernel.org
>>> Reported-by: zhong jiang <zhongjiang@huawei.com>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>> ---
>>>  mm/memfd.c | 18 ++++++++++--------
>>>  1 file changed, 10 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/mm/memfd.c b/mm/memfd.c
>>> index 2bb5e257080e..5859705dafe1 100644
>>> --- a/mm/memfd.c
>>> +++ b/mm/memfd.c
>>> @@ -34,11 +34,12 @@ static void memfd_tag_pins(struct address_space *mapping)
>>>  	void __rcu **slot;
>>>  	pgoff_t start;
>>>  	struct page *page;
>>> +	unsigned int tagged = 0;
>>>  
>>>  	lru_add_drain();
>>>  	start = 0;
>>> -	rcu_read_lock();
>>>  
>>> +	xa_lock_irq(&mapping->i_pages);
>>>  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
>>>  		page = radix_tree_deref_slot(slot);
>>>  		if (!page || radix_tree_exception(page)) {
>>> @@ -47,18 +48,19 @@ static void memfd_tag_pins(struct address_space *mapping)
>>>  				continue;
>>>  			}
>>>  		} else if (page_count(page) - page_mapcount(page) > 1) {
>>> -			xa_lock_irq(&mapping->i_pages);
>>>  			radix_tree_tag_set(&mapping->i_pages, iter.index,
>>>  					   MEMFD_TAG_PINNED);
>>> -			xa_unlock_irq(&mapping->i_pages);
>>>  		}
>>>  
>>> -		if (need_resched()) {
>>> -			slot = radix_tree_iter_resume(slot, &iter);
>>> -			cond_resched_rcu();
>>> -		}
>>> +		if (++tagged % 1024)
>>> +			continue;
>>> +
>>> +		slot = radix_tree_iter_resume(slot, &iter);
>>> +		xa_unlock_irq(&mapping->i_pages);
>>> +		cond_resched();
>>> +		xa_lock_irq(&mapping->i_pages);
>>>  	}
>>> -	rcu_read_unlock();
>>> +	xa_unlock_irq(&mapping->i_pages);
>>>  }
>>>  
>>>  /*
> .
>




^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-11-15 10:46 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-25 16:58 [PATCH 4.19] memfd: Fix locking when tagging pins Matthew Wilcox
2019-10-25 16:58 ` [PATCH 4.14] " Matthew Wilcox
2019-10-25 16:58 ` [PATCH 4.9] " Matthew Wilcox
2019-10-25 16:58 ` [PATCH 4.4] " Matthew Wilcox
2019-10-26  2:03 ` [PATCH 4.19] " zhong jiang
2019-10-26 15:34   ` Sasha Levin
2019-11-13  4:00 ` zhong jiang
2019-11-14  1:53   ` zhong jiang
2019-11-14  2:26   ` Matthew Wilcox
2019-11-15 10:46     ` zhong jiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).