linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
@ 2020-09-08  8:17 Pradeep P V K
  2020-09-08 11:25 ` Miklos Szeredi
  2020-09-08 11:27 ` Matthew Wilcox
  0 siblings, 2 replies; 9+ messages in thread
From: Pradeep P V K @ 2020-09-08  8:17 UTC (permalink / raw)
  To: miklos, linux-fsdevel, willy; +Cc: stummala, sayalil, Pradeep P V K

From: Pradeep P V K <ppvk@codeaurora.org>

There is a potential race between fuse_abort_conn() and
fuse_copy_page() as shown below, due to which VM_BUG_ON_PAGE
crash is observed for accessing a free page.

context#1:                      context#2:
fuse_dev_do_read()              fuse_abort_conn()
->fuse_copy_args()               ->end_requests()
 ->fuse_copy_pages()              ->request_end()
    ->fuse_copy_page()               ->fuse_writepage_end)
      ->fuse_ref_page()                ->fuse_writepage_free()
                                          ->__free_page()
					    ->put_page_testzero()

      ->get_page()
      ->VM_BUG_ON_PAGE()

This results in below crash as when ->put_page_testzero() in context#2
decrease the page reference and get_page() in context#1 accessed it
with zero page reference count.

[  174.391095]  (1)[10406:Thread-6]page dumped because:
VM_BUG_ON_PAGE(((unsigned int) page_ref_count(page) + 127u <= 127u))
[  174.391113]  (1)[10406:Thread-6]page allocated via order 0,
migratetype Unmovable, gfp_mask
0x620042(GFP_NOFS|__GFP_HIGHMEM|__GFP_HARDWALL), pid 261, ts
174390946312 ns

[  174.391135]  (1)[10406:Thread-6] prep_new_page+0x13c/0x210
[  174.391148]  (1)[10406:Thread-6] get_page_from_freelist+0x21ac/0x2370
[  174.391161]  (1)[10406:Thread-6] __alloc_pages_nodemask+0x244/0x14a8
[  174.391176]  (1)[10406:Thread-6] fuse_writepages_fill+0x150/0x708
[  174.391190]  (1)[10406:Thread-6] write_cache_pages+0x3d8/0x550
[  174.391202]  (1)[10406:Thread-6] fuse_writepages+0x94/0x130
[  174.391214]  (1)[10406:Thread-6] do_writepages+0x74/0x140
[  174.391228]  (1)[10406:Thread-6] __writeback_single_inode+0x168/0x788
[  174.391239]  (1)[10406:Thread-6] writeback_sb_inodes+0x56c/0xab8
[  174.391251]  (1)[10406:Thread-6] __writeback_inodes_wb+0x94/0x180
[  174.391262]  (1)[10406:Thread-6] wb_writeback+0x318/0x618
[  174.391274]  (1)[10406:Thread-6] wb_workfn+0x468/0x828
[  174.391290]  (1)[10406:Thread-6] process_one_work+0x3d0/0x720
[  174.391302]  (1)[10406:Thread-6] worker_thread+0x234/0x4c0
[  174.391314]  (1)[10406:Thread-6] kthread+0x144/0x158
[  174.391327]  (1)[10406:Thread-6] ret_from_fork+0x10/0x1c
[  174.391363]  (1)[10406:Thread-6]------------[ cut here ]------------
[  174.391371]  (1)[10406:Thread-6]kernel BUG at include/linux/mm.h:980!
[  174.391381]  (1)[10406:Thread-6]Internal error: Oops - BUG: 0 [#1]
...
[  174.486928]  (1)[10406:Thread-6]pc : fuse_copy_page+0x750/0x790
[  174.493029]  (1)[10406:Thread-6]lr : fuse_copy_page+0x750/0x790
[  174.718831]  (1)[10406:Thread-6] fuse_copy_page+0x750/0x790
[  174.718838]  (1)[10406:Thread-6] fuse_copy_args+0xb4/0x1e8
[  174.718843]  (1)[10406:Thread-6] fuse_dev_do_read+0x424/0x888
[  174.718848]  (1)[10406:Thread-6] fuse_dev_splice_read+0x94/0x200
[  174.718856]  (1)[10406:Thread-6] __arm64_sys_splice+0x874/0xb20
[  174.718864]  (1)[10406:Thread-6] el0_svc_common+0xc8/0x240
[  174.718869]  (1)[10406:Thread-6] el0_svc_handler+0x6c/0x88
[  174.718875]  (1)[10406:Thread-6] el0_svc+0x8/0xc
[  174.778853]  (1)[10406:Thread-6]Kernel panic - not syncing: Fatal

Fix this by protecting fuse_ref_page() with the same fc->lock as in
fuse_abort_conn().

Changes since V3:
- Fix smatch warnings.

Changes since V2:
- Moved the spin lock from fuse_copy_pages() to fuse_ref_page().

Changes since V1:
- Modified the logic as per kernel v5.9-rc1.
- Added Reported by tag.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pradeep P V K <ppvk@codeaurora.org>
---
 fs/fuse/dev.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 02b3c36..25f2086 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -875,7 +875,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 }
 
 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
-			 unsigned offset, unsigned count)
+			 unsigned offset, unsigned count, struct fuse_conn *fc)
 {
 	struct pipe_buffer *buf;
 	int err;
@@ -883,15 +883,18 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 	if (cs->nr_segs >= cs->pipe->max_usage)
 		return -EIO;
 
+	spin_lock(&fc->lock);
 	err = unlock_request(cs->req);
-	if (err)
+	if (err) {
+		spin_unlock(&fc->lock);
 		return err;
-
+	}
 	fuse_copy_finish(cs);
 
 	buf = cs->pipebufs;
 	get_page(page);
 	buf->page = page;
+	spin_unlock(&fc->lock);
 	buf->offset = offset;
 	buf->len = count;
 
@@ -907,7 +910,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
  * done atomically
  */
 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
-			  unsigned offset, unsigned count, int zeroing)
+			  unsigned offset, unsigned count, int zeroing, struct fuse_conn *fc)
 {
 	int err;
 	struct page *page = *pagep;
@@ -917,7 +920,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 
 	while (count) {
 		if (cs->write && cs->pipebufs && page) {
-			return fuse_ref_page(cs, page, offset, count);
+			return fuse_ref_page(cs, page, offset, count, fc);
 		} else if (!cs->len) {
 			if (cs->move_pages && page &&
 			    offset == 0 && count == PAGE_SIZE) {
@@ -945,7 +948,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 
 /* Copy pages in the request to/from userspace buffer */
 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
-			   int zeroing)
+			   int zeroing, struct fuse_conn *fc)
 {
 	unsigned i;
 	struct fuse_req *req = cs->req;
@@ -957,7 +960,7 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 		unsigned int offset = ap->descs[i].offset;
 		unsigned int count = min(nbytes, ap->descs[i].length);
 
-		err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
+		err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing, fc);
 		if (err)
 			return err;
 
@@ -983,7 +986,7 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 /* Copy request arguments to/from userspace buffer */
 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 			  unsigned argpages, struct fuse_arg *args,
-			  int zeroing)
+			  int zeroing, struct fuse_conn *fc)
 {
 	int err = 0;
 	unsigned i;
@@ -991,7 +994,7 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 	for (i = 0; !err && i < numargs; i++)  {
 		struct fuse_arg *arg = &args[i];
 		if (i == numargs - 1 && argpages)
-			err = fuse_copy_pages(cs, arg->size, zeroing);
+			err = fuse_copy_pages(cs, arg->size, zeroing, fc);
 		else
 			err = fuse_copy_one(cs, arg->value, arg->size);
 	}
@@ -1260,7 +1263,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 	err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
 	if (!err)
 		err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
-				     (struct fuse_arg *) args->in_args, 0);
+				     (struct fuse_arg *) args->in_args, 0, fc);
 	fuse_copy_finish(cs);
 	spin_lock(&fpq->lock);
 	clear_bit(FR_LOCKED, &req->flags);
@@ -1590,7 +1593,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 			goto out_iput;
 
 		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
-		err = fuse_copy_page(cs, &page, offset, this_num, 0);
+		err = fuse_copy_page(cs, &page, offset, this_num, 0, fc);
 		if (!err && offset == 0 &&
 		    (this_num == PAGE_SIZE || file_size == end))
 			SetPageUptodate(page);
@@ -1792,7 +1795,7 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
 }
 
 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
-			 unsigned nbytes)
+			 unsigned nbytes, struct fuse_conn *fc)
 {
 	unsigned reqsize = sizeof(struct fuse_out_header);
 
@@ -1809,7 +1812,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
 		lastarg->size -= diffsize;
 	}
 	return fuse_copy_args(cs, args->out_numargs, args->out_pages,
-			      args->out_args, args->page_zeroing);
+			      args->out_args, args->page_zeroing, fc);
 }
 
 /*
@@ -1894,7 +1897,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
 	if (oh.error)
 		err = nbytes != sizeof(oh) ? -EINVAL : 0;
 	else
-		err = copy_out_args(cs, req->args, nbytes);
+		err = copy_out_args(cs, req->args, nbytes, fc);
 	fuse_copy_finish(cs);
 
 	spin_lock(&fpq->lock);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-08  8:17 [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page Pradeep P V K
@ 2020-09-08 11:25 ` Miklos Szeredi
  2020-09-10 15:42   ` ppvk
       [not found]   ` <0101017478aef256-c8471520-26b1-4b87-a3b8-8266627b704f-000000@us-west-2.amazonses.com>
  2020-09-08 11:27 ` Matthew Wilcox
  1 sibling, 2 replies; 9+ messages in thread
From: Miklos Szeredi @ 2020-09-08 11:25 UTC (permalink / raw)
  To: Pradeep P V K
  Cc: linux-fsdevel, Matthew Wilcox, Sahitya Tummala, sayalil, Pradeep P V K

On Tue, Sep 8, 2020 at 10:17 AM Pradeep P V K <pragalla@qti.qualcomm.com> wrote:
>
> From: Pradeep P V K <ppvk@codeaurora.org>
>
> There is a potential race between fuse_abort_conn() and
> fuse_copy_page() as shown below, due to which VM_BUG_ON_PAGE
> crash is observed for accessing a free page.
>
> context#1:                      context#2:
> fuse_dev_do_read()              fuse_abort_conn()
> ->fuse_copy_args()               ->end_requests()

This shouldn't happen due to FR_LOCKED logic.   Are you seeing this on
an upstream kernel?  Which version?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-08  8:17 [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page Pradeep P V K
  2020-09-08 11:25 ` Miklos Szeredi
@ 2020-09-08 11:27 ` Matthew Wilcox
  2020-09-10 15:49   ` ppvk
  1 sibling, 1 reply; 9+ messages in thread
From: Matthew Wilcox @ 2020-09-08 11:27 UTC (permalink / raw)
  To: Pradeep P V K; +Cc: miklos, linux-fsdevel, stummala, sayalil, Pradeep P V K

On Tue, Sep 08, 2020 at 01:47:06PM +0530, Pradeep P V K wrote:
> Changes since V3:
> - Fix smatch warnings.
> 
> Changes since V2:
> - Moved the spin lock from fuse_copy_pages() to fuse_ref_page().
> 
> Changes since V1:
> - Modified the logic as per kernel v5.9-rc1.
> - Added Reported by tag.
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

Umm, the way this is written, it looks like Dan reported the original
bug rather than a bug in v3.  The usual way is to credit Dan in the
'Changes since' rather than putting in a 'Reported-by'.

>  static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
> -			 unsigned offset, unsigned count)
> +			 unsigned offset, unsigned count, struct fuse_conn *fc)

I'm no expert on fuse, but it looks to me like you should put a pointer
to the fuse_conn in struct fuse_copy_state rather than passing it down
through all these callers.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-08 11:25 ` Miklos Szeredi
@ 2020-09-10 15:42   ` ppvk
       [not found]   ` <0101017478aef256-c8471520-26b1-4b87-a3b8-8266627b704f-000000@us-west-2.amazonses.com>
  1 sibling, 0 replies; 9+ messages in thread
From: ppvk @ 2020-09-10 15:42 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Pradeep P V K, linux-fsdevel, Matthew Wilcox, Sahitya Tummala, sayalil

On 2020-09-08 16:55, Miklos Szeredi wrote:
> On Tue, Sep 8, 2020 at 10:17 AM Pradeep P V K 
> <pragalla@qti.qualcomm.com> wrote:
>> 
>> From: Pradeep P V K <ppvk@codeaurora.org>
>> 
>> There is a potential race between fuse_abort_conn() and
>> fuse_copy_page() as shown below, due to which VM_BUG_ON_PAGE
>> crash is observed for accessing a free page.
>> 
>> context#1:                      context#2:
>> fuse_dev_do_read()              fuse_abort_conn()
>> ->fuse_copy_args()               ->end_requests()
> 
> This shouldn't happen due to FR_LOCKED logic.   Are you seeing this on
> an upstream kernel?  Which version?
> 
> Thanks,
> Miklos

This is happen just after unlock_request() in fuse_ref_page(). In 
unlock_request(), it will clear the FR_LOCKED bit.
As there is no protection between context#1 & context#2 during 
unlock_request(), there are chances that it could happen.

The value of request flags under "fuse_req" DS is "1561" and this tells 
FR_PRIVATE bit is set and there by, it adds the request to end_io list 
and free.
This was seen on upstream kernel - v4.19 stable.

Thanks and Regards,
Pradeep

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-08 11:27 ` Matthew Wilcox
@ 2020-09-10 15:49   ` ppvk
  0 siblings, 0 replies; 9+ messages in thread
From: ppvk @ 2020-09-10 15:49 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Pradeep P V K, miklos, linux-fsdevel, stummala, sayalil

On 2020-09-08 16:57, Matthew Wilcox wrote:
> On Tue, Sep 08, 2020 at 01:47:06PM +0530, Pradeep P V K wrote:
>> Changes since V3:
>> - Fix smatch warnings.
>> 
>> Changes since V2:
>> - Moved the spin lock from fuse_copy_pages() to fuse_ref_page().
>> 
>> Changes since V1:
>> - Modified the logic as per kernel v5.9-rc1.
>> - Added Reported by tag.
>> 
>> Reported-by: kernel test robot <lkp@intel.com>
>> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> 
> Umm, the way this is written, it looks like Dan reported the original
> bug rather than a bug in v3.  The usual way is to credit Dan in the
> 'Changes since' rather than putting in a 'Reported-by'.
> 
sure thanks. i will note this for my next patches to upstream.

>>  static int fuse_ref_page(struct fuse_copy_state *cs, struct page 
>> *page,
>> -			 unsigned offset, unsigned count)
>> +			 unsigned offset, unsigned count, struct fuse_conn *fc)
> 
> I'm no expert on fuse, but it looks to me like you should put a pointer
> to the fuse_conn in struct fuse_copy_state rather than passing it down
> through all these callers.

True but will wait for other expert suggestions and comments too.

Thanks and Regards,
Pradeep

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
       [not found]   ` <0101017478aef256-c8471520-26b1-4b87-a3b8-8266627b704f-000000@us-west-2.amazonses.com>
@ 2020-09-14  8:11     ` Miklos Szeredi
  2020-09-14 13:32       ` ppvk
  0 siblings, 1 reply; 9+ messages in thread
From: Miklos Szeredi @ 2020-09-14  8:11 UTC (permalink / raw)
  To: Pradeep P V K
  Cc: Pradeep P V K, linux-fsdevel, Matthew Wilcox, Sahitya Tummala, sayalil

[-- Attachment #1: Type: text/plain, Size: 1309 bytes --]

On Thu, Sep 10, 2020 at 5:42 PM <ppvk@codeaurora.org> wrote:
>
> On 2020-09-08 16:55, Miklos Szeredi wrote:
> > On Tue, Sep 8, 2020 at 10:17 AM Pradeep P V K
> > <pragalla@qti.qualcomm.com> wrote:
> >>
> >> From: Pradeep P V K <ppvk@codeaurora.org>
> >>
> >> There is a potential race between fuse_abort_conn() and
> >> fuse_copy_page() as shown below, due to which VM_BUG_ON_PAGE
> >> crash is observed for accessing a free page.
> >>
> >> context#1:                      context#2:
> >> fuse_dev_do_read()              fuse_abort_conn()
> >> ->fuse_copy_args()               ->end_requests()
> >
> > This shouldn't happen due to FR_LOCKED logic.   Are you seeing this on
> > an upstream kernel?  Which version?
> >
> > Thanks,
> > Miklos
>
> This is happen just after unlock_request() in fuse_ref_page(). In
> unlock_request(), it will clear the FR_LOCKED bit.
> As there is no protection between context#1 & context#2 during
> unlock_request(), there are chances that it could happen.

Ah, indeed, I missed that one.

Similar issue in fuse_try_move_page(), which dereferences oldpage
after unlock_request().

Fix for both is to grab a reference to the page from ap->pages[] array
*before* calling unlock_request().

Attached untested patch.   Could you please verify that it fixes the bug?

Thanks,
Miklos

[-- Attachment #2: fuse-fix-page-dereference-after-free.patch --]
[-- Type: text/x-patch, Size: 2325 bytes --]

From: Miklos Szeredi <mszeredi@redhat.com>
Subject: fuse: fix page dereference after free

After unlock_request() pages from the ap->pages[] array may be put and can
be freed.

Prevent use after free by grabbing a reference to the page before calling
unlock_request().

This was originally reported by kernel test robot and the original patch
was created by Pradeep P V K.

Reported-by: Pradeep P V K <ppvk@codeaurora.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c |   28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -785,15 +785,16 @@ static int fuse_try_move_page(struct fus
 	struct page *newpage;
 	struct pipe_buffer *buf = cs->pipebufs;
 
+	get_page(oldpage);
 	err = unlock_request(cs->req);
 	if (err)
-		return err;
+		goto out_put_old;
 
 	fuse_copy_finish(cs);
 
 	err = pipe_buf_confirm(cs->pipe, buf);
 	if (err)
-		return err;
+		goto out_put_old;
 
 	BUG_ON(!cs->nr_segs);
 	cs->currbuf = buf;
@@ -833,7 +834,7 @@ static int fuse_try_move_page(struct fus
 	err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 	if (err) {
 		unlock_page(newpage);
-		return err;
+		goto out_put_old;
 	}
 
 	get_page(newpage);
@@ -852,14 +853,19 @@ static int fuse_try_move_page(struct fus
 	if (err) {
 		unlock_page(newpage);
 		put_page(newpage);
-		return err;
+		goto out_put_old;
 	}
 
 	unlock_page(oldpage);
+	/* Drop ref for ap->pages[] array */
 	put_page(oldpage);
 	cs->len = 0;
 
-	return 0;
+	err = 0;
+out_put_old:
+	/* Drop ref obtained in this function */
+	put_page(oldpage);
+	return err;
 
 out_fallback_unlock:
 	unlock_page(newpage);
@@ -868,10 +874,10 @@ static int fuse_try_move_page(struct fus
 	cs->offset = buf->offset;
 
 	err = lock_request(cs->req);
-	if (err)
-		return err;
+	if (!err)
+		err = 1;
 
-	return 1;
+	goto out_put_old;
 }
 
 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
@@ -883,14 +889,16 @@ static int fuse_ref_page(struct fuse_cop
 	if (cs->nr_segs >= cs->pipe->max_usage)
 		return -EIO;
 
+	get_page(page);
 	err = unlock_request(cs->req);
-	if (err)
+	if (err) {
+		put_page(page);
 		return err;
+	}
 
 	fuse_copy_finish(cs);
 
 	buf = cs->pipebufs;
-	get_page(page);
 	buf->page = page;
 	buf->offset = offset;
 	buf->len = count;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-14  8:11     ` Miklos Szeredi
@ 2020-09-14 13:32       ` ppvk
  2020-09-16 15:31         ` ppvk
  0 siblings, 1 reply; 9+ messages in thread
From: ppvk @ 2020-09-14 13:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Pradeep P V K, linux-fsdevel, Matthew Wilcox, Sahitya Tummala, sayalil

On 2020-09-14 13:41, Miklos Szeredi wrote:
> On Thu, Sep 10, 2020 at 5:42 PM <ppvk@codeaurora.org> wrote:
>> 
>> On 2020-09-08 16:55, Miklos Szeredi wrote:
>> > On Tue, Sep 8, 2020 at 10:17 AM Pradeep P V K
>> > <pragalla@qti.qualcomm.com> wrote:
>> >>
>> >> From: Pradeep P V K <ppvk@codeaurora.org>
>> >>
>> >> There is a potential race between fuse_abort_conn() and
>> >> fuse_copy_page() as shown below, due to which VM_BUG_ON_PAGE
>> >> crash is observed for accessing a free page.
>> >>
>> >> context#1:                      context#2:
>> >> fuse_dev_do_read()              fuse_abort_conn()
>> >> ->fuse_copy_args()               ->end_requests()
>> >
>> > This shouldn't happen due to FR_LOCKED logic.   Are you seeing this on
>> > an upstream kernel?  Which version?
>> >
>> > Thanks,
>> > Miklos
>> 
>> This is happen just after unlock_request() in fuse_ref_page(). In
>> unlock_request(), it will clear the FR_LOCKED bit.
>> As there is no protection between context#1 & context#2 during
>> unlock_request(), there are chances that it could happen.
> 
> Ah, indeed, I missed that one.
> 
> Similar issue in fuse_try_move_page(), which dereferences oldpage
> after unlock_request().
> 
> Fix for both is to grab a reference to the page from ap->pages[] array
> *before* calling unlock_request().
> 
> Attached untested patch.   Could you please verify that it fixes the 
> bug?
> 
Thanks for the patch. It is an one time issue and bit hard to reproduce 
but still we
will verify the above proposed patch and update the test results here.

Minor comments on the commit text of the proposed patch : This issue was 
originally reported by me and kernel test robot
identified compilation errors on the patch that i submitted.
This confusion might be due to un proper commit text note on "changes 
since v1"

> Thanks,
> Miklos

Thanks and Regards,
Pradeep

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-14 13:32       ` ppvk
@ 2020-09-16 15:31         ` ppvk
  2020-09-18  8:40           ` Miklos Szeredi
  0 siblings, 1 reply; 9+ messages in thread
From: ppvk @ 2020-09-16 15:31 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Pradeep P V K, linux-fsdevel, Matthew Wilcox, Sahitya Tummala, sayalil

[-- Attachment #1: Type: text/plain, Size: 2131 bytes --]

On 2020-09-14 19:02, ppvk@codeaurora.org wrote:
> On 2020-09-14 13:41, Miklos Szeredi wrote:
>> On Thu, Sep 10, 2020 at 5:42 PM <ppvk@codeaurora.org> wrote:
>>> 
>>> On 2020-09-08 16:55, Miklos Szeredi wrote:
>>> > On Tue, Sep 8, 2020 at 10:17 AM Pradeep P V K
>>> > <pragalla@qti.qualcomm.com> wrote:
>>> >>
>>> >> From: Pradeep P V K <ppvk@codeaurora.org>
>>> >>
>>> >> There is a potential race between fuse_abort_conn() and
>>> >> fuse_copy_page() as shown below, due to which VM_BUG_ON_PAGE
>>> >> crash is observed for accessing a free page.
>>> >>
>>> >> context#1:                      context#2:
>>> >> fuse_dev_do_read()              fuse_abort_conn()
>>> >> ->fuse_copy_args()               ->end_requests()
>>> >
>>> > This shouldn't happen due to FR_LOCKED logic.   Are you seeing this on
>>> > an upstream kernel?  Which version?
>>> >
>>> > Thanks,
>>> > Miklos
>>> 
>>> This is happen just after unlock_request() in fuse_ref_page(). In
>>> unlock_request(), it will clear the FR_LOCKED bit.
>>> As there is no protection between context#1 & context#2 during
>>> unlock_request(), there are chances that it could happen.
>> 
>> Ah, indeed, I missed that one.
>> 
>> Similar issue in fuse_try_move_page(), which dereferences oldpage
>> after unlock_request().
>> 
>> Fix for both is to grab a reference to the page from ap->pages[] array
>> *before* calling unlock_request().
>> 
>> Attached untested patch.   Could you please verify that it fixes the 
>> bug?
>> 
> Thanks for the patch. It is an one time issue and bit hard to
> reproduce but still we
> will verify the above proposed patch and update the test results here.
> 
Not seen any issue during 24 hours(+) of stability run with your 
proposed patch.
This covers reads/writes on fuse paths + reboots + other concurrency's.

> Minor comments on the commit text of the proposed patch : This issue
> was originally reported by me and kernel test robot
> identified compilation errors on the patch that i submitted.
> This confusion might be due to un proper commit text note on "changes 
> since v1"
> 
>> Thanks,
>> Miklos
> 
> Thanks and Regards,
> Pradeep

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: fuse-fix-page-dereference-after-free.patch --]
[-- Type: text/x-diff; name=fuse-fix-page-dereference-after-free.patch, Size: 2325 bytes --]

From: Miklos Szeredi <mszeredi@redhat.com>
Subject: fuse: fix page dereference after free

After unlock_request() pages from the ap->pages[] array may be put and can
be freed.

Prevent use after free by grabbing a reference to the page before calling
unlock_request().

This was originally reported by kernel test robot and the original patch
was created by Pradeep P V K.

Reported-by: Pradeep P V K <ppvk@codeaurora.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c |   28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -785,15 +785,16 @@ static int fuse_try_move_page(struct fus
 	struct page *newpage;
 	struct pipe_buffer *buf = cs->pipebufs;
 
+	get_page(oldpage);
 	err = unlock_request(cs->req);
 	if (err)
-		return err;
+		goto out_put_old;
 
 	fuse_copy_finish(cs);
 
 	err = pipe_buf_confirm(cs->pipe, buf);
 	if (err)
-		return err;
+		goto out_put_old;
 
 	BUG_ON(!cs->nr_segs);
 	cs->currbuf = buf;
@@ -833,7 +834,7 @@ static int fuse_try_move_page(struct fus
 	err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 	if (err) {
 		unlock_page(newpage);
-		return err;
+		goto out_put_old;
 	}
 
 	get_page(newpage);
@@ -852,14 +853,19 @@ static int fuse_try_move_page(struct fus
 	if (err) {
 		unlock_page(newpage);
 		put_page(newpage);
-		return err;
+		goto out_put_old;
 	}
 
 	unlock_page(oldpage);
+	/* Drop ref for ap->pages[] array */
 	put_page(oldpage);
 	cs->len = 0;
 
-	return 0;
+	err = 0;
+out_put_old:
+	/* Drop ref obtained in this function */
+	put_page(oldpage);
+	return err;
 
 out_fallback_unlock:
 	unlock_page(newpage);
@@ -868,10 +874,10 @@ static int fuse_try_move_page(struct fus
 	cs->offset = buf->offset;
 
 	err = lock_request(cs->req);
-	if (err)
-		return err;
+	if (!err)
+		err = 1;
 
-	return 1;
+	goto out_put_old;
 }
 
 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
@@ -883,14 +889,16 @@ static int fuse_ref_page(struct fuse_cop
 	if (cs->nr_segs >= cs->pipe->max_usage)
 		return -EIO;
 
+	get_page(page);
 	err = unlock_request(cs->req);
-	if (err)
+	if (err) {
+		put_page(page);
 		return err;
+	}
 
 	fuse_copy_finish(cs);
 
 	buf = cs->pipebufs;
-	get_page(page);
 	buf->page = page;
 	buf->offset = offset;
 	buf->len = count;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page
  2020-09-16 15:31         ` ppvk
@ 2020-09-18  8:40           ` Miklos Szeredi
  0 siblings, 0 replies; 9+ messages in thread
From: Miklos Szeredi @ 2020-09-18  8:40 UTC (permalink / raw)
  To: Pradeep P V K
  Cc: Pradeep P V K, linux-fsdevel, Matthew Wilcox, Sahitya Tummala, sayalil

On Wed, Sep 16, 2020 at 5:31 PM <ppvk@codeaurora.org> wrote:
>
> On 2020-09-14 19:02, ppvk@codeaurora.org wrote:

> > Thanks for the patch. It is an one time issue and bit hard to
> > reproduce but still we
> > will verify the above proposed patch and update the test results here.
> >
> Not seen any issue during 24 hours(+) of stability run with your
> proposed patch.
> This covers reads/writes on fuse paths + reboots + other concurrency's.

Great, thanks for verifying.

Pushed to fuse.git#for-next.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-09-18  8:40 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-08  8:17 [PATCH V4] fuse: Fix VM_BUG_ON_PAGE issue while accessing zero ref count page Pradeep P V K
2020-09-08 11:25 ` Miklos Szeredi
2020-09-10 15:42   ` ppvk
     [not found]   ` <0101017478aef256-c8471520-26b1-4b87-a3b8-8266627b704f-000000@us-west-2.amazonses.com>
2020-09-14  8:11     ` Miklos Szeredi
2020-09-14 13:32       ` ppvk
2020-09-16 15:31         ` ppvk
2020-09-18  8:40           ` Miklos Szeredi
2020-09-08 11:27 ` Matthew Wilcox
2020-09-10 15:49   ` ppvk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).