All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: cache index instead of part self to avoid use-after-free
@ 2020-01-06  7:35 Yufen Yu
  2020-01-08 15:01 ` Christoph Hellwig
  2020-01-09  1:35 ` Ming Lei
  0 siblings, 2 replies; 3+ messages in thread
From: Yufen Yu @ 2020-01-06  7:35 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, ming.lei, houtao1, hch, yi.zhang, zhengchuan

When delete partition executes concurrently with IOs issue,
it may cause use-after-free on part in disk_map_sector_rcu()
as following:

blk_account_io_start(req1)  delete_partition  blk_account_io_start(req2)

rcu_read_lock()
disk_map_sector_rcu
part = rcu_dereference(ptbl->part[4])
                           rcu_assign_pointer(ptbl->part[4], NULL);
                           rcu_assign_pointer(ptbl->last_lookup, NULL);
rcu_assign_pointer(ptbl->last_lookup, part);

                           hd_struct_kill(part)
!hd_struct_try_get
  part = &rq->rq_disk->part0;
rcu_read_unlock()
                           __delete_partition
                           call_rcu
                                            rcu_read_lock
                                            disk_map_sector_rcu
                                            part = rcu_dereference(ptbl->last_lookup);

                           delete_partition_work_fn
                           free(part)
                                            hd_struct_try_get(part)
                                            BUG_ON use-after-free

req1 try to get 'ptbl->part[4]', while the part is beening
deleted. Although the delete_partition() will set last_lookup
as NULL, req1 can overwrite it as 'part[4]' again.

After calling call_rcu() and free() for the part, req2 can
access the part by last_lookup, resulting in use after free.

In fact, this bug has been reported by syzbot:
    https://lkml.org/lkml/2019/1/4/357

To fix the bug, we try to cache index of part[] instead of
part[i] itself in last_lookup. Even if the index may been
re-assign, others can either get part[i] as value of NULL,
or get the new allocated part[i] after call_rcu. Both of
them is okay.

Signed-off-by: Yufen Yu <yuyufen@huawei.com>
---
 block/genhd.c             | 15 +++++++++------
 block/partition-generic.c |  2 +-
 include/linux/genhd.h     |  3 ++-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index ff6268970ddc..97447281a4f5 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -282,18 +282,21 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 	struct disk_part_tbl *ptbl;
 	struct hd_struct *part;
 	int i;
+	int last_lookup;
 
 	ptbl = rcu_dereference(disk->part_tbl);
-
-	part = rcu_dereference(ptbl->last_lookup);
-	if (part && sector_in_part(part, sector))
-		return part;
+	last_lookup = READ_ONCE(ptbl->last_lookup);
+	if (last_lookup > 0 && last_lookup < ptbl->len) {
+		part = rcu_dereference(ptbl->part[last_lookup]);
+		if (part && sector_in_part(part, sector))
+			return part;
+	}
 
 	for (i = 1; i < ptbl->len; i++) {
 		part = rcu_dereference(ptbl->part[i]);
 
 		if (part && sector_in_part(part, sector)) {
-			rcu_assign_pointer(ptbl->last_lookup, part);
+			WRITE_ONCE(ptbl->last_lookup, i);
 			return part;
 		}
 	}
@@ -1263,7 +1266,7 @@ static void disk_replace_part_tbl(struct gendisk *disk,
 	rcu_assign_pointer(disk->part_tbl, new_ptbl);
 
 	if (old_ptbl) {
-		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
+		WRITE_ONCE(old_ptbl->last_lookup, 0);
 		kfree_rcu(old_ptbl, rcu_head);
 	}
 }
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 1d20c9cf213f..a9fd24ae3acb 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -284,7 +284,7 @@ void delete_partition(struct gendisk *disk, int partno)
 		return;
 
 	rcu_assign_pointer(ptbl->part[partno], NULL);
-	rcu_assign_pointer(ptbl->last_lookup, NULL);
+	WRITE_ONCE(ptbl->last_lookup, 0);
 	kobject_put(part->holder_dir);
 	device_del(part_to_dev(part));
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8bb63027e4d6..9be4fb8f8b8b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -160,7 +160,8 @@ enum {
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
-	struct hd_struct __rcu *last_lookup;
+	/* Cache last lookup part[] index */
+	int last_lookup;
 	struct hd_struct __rcu *part[];
 };
 
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] block: cache index instead of part self to avoid use-after-free
  2020-01-06  7:35 [PATCH] block: cache index instead of part self to avoid use-after-free Yufen Yu
@ 2020-01-08 15:01 ` Christoph Hellwig
  2020-01-09  1:35 ` Ming Lei
  1 sibling, 0 replies; 3+ messages in thread
From: Christoph Hellwig @ 2020-01-08 15:01 UTC (permalink / raw)
  To: Yufen Yu; +Cc: axboe, linux-block, ming.lei, houtao1, hch, yi.zhang, zhengchuan

On Mon, Jan 06, 2020 at 03:35:10PM +0800, Yufen Yu wrote:
> When delete partition executes concurrently with IOs issue,
> it may cause use-after-free on part in disk_map_sector_rcu()
> as following:
> 
> blk_account_io_start(req1)  delete_partition  blk_account_io_start(req2)
> 
> rcu_read_lock()
> disk_map_sector_rcu
> part = rcu_dereference(ptbl->part[4])
>                            rcu_assign_pointer(ptbl->part[4], NULL);
>                            rcu_assign_pointer(ptbl->last_lookup, NULL);
> rcu_assign_pointer(ptbl->last_lookup, part);
> 
>                            hd_struct_kill(part)
> !hd_struct_try_get
>   part = &rq->rq_disk->part0;
> rcu_read_unlock()
>                            __delete_partition
>                            call_rcu
>                                             rcu_read_lock
>                                             disk_map_sector_rcu
>                                             part = rcu_dereference(ptbl->last_lookup);
> 
>                            delete_partition_work_fn
>                            free(part)
>                                             hd_struct_try_get(part)
>                                             BUG_ON use-after-free
> 
> req1 try to get 'ptbl->part[4]', while the part is beening
> deleted. Although the delete_partition() will set last_lookup
> as NULL, req1 can overwrite it as 'part[4]' again.
> 
> After calling call_rcu() and free() for the part, req2 can
> access the part by last_lookup, resulting in use after free.
> 
> In fact, this bug has been reported by syzbot:
>     https://lkml.org/lkml/2019/1/4/357
> 
> To fix the bug, we try to cache index of part[] instead of
> part[i] itself in last_lookup. Even if the index may been
> re-assign, others can either get part[i] as value of NULL,
> or get the new allocated part[i] after call_rcu. Both of
> them is okay.
> 
> Signed-off-by: Yufen Yu <yuyufen@huawei.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] block: cache index instead of part self to avoid use-after-free
  2020-01-06  7:35 [PATCH] block: cache index instead of part self to avoid use-after-free Yufen Yu
  2020-01-08 15:01 ` Christoph Hellwig
@ 2020-01-09  1:35 ` Ming Lei
  1 sibling, 0 replies; 3+ messages in thread
From: Ming Lei @ 2020-01-09  1:35 UTC (permalink / raw)
  To: Yufen Yu; +Cc: axboe, linux-block, houtao1, hch, yi.zhang, zhengchuan

On Mon, Jan 06, 2020 at 03:35:10PM +0800, Yufen Yu wrote:
> When delete partition executes concurrently with IOs issue,
> it may cause use-after-free on part in disk_map_sector_rcu()
> as following:
> 
> blk_account_io_start(req1)  delete_partition  blk_account_io_start(req2)
> 
> rcu_read_lock()
> disk_map_sector_rcu
> part = rcu_dereference(ptbl->part[4])
>                            rcu_assign_pointer(ptbl->part[4], NULL);
>                            rcu_assign_pointer(ptbl->last_lookup, NULL);
> rcu_assign_pointer(ptbl->last_lookup, part);
> 
>                            hd_struct_kill(part)
> !hd_struct_try_get
>   part = &rq->rq_disk->part0;
> rcu_read_unlock()
>                            __delete_partition
>                            call_rcu
>                                             rcu_read_lock
>                                             disk_map_sector_rcu
>                                             part = rcu_dereference(ptbl->last_lookup);
> 
>                            delete_partition_work_fn
>                            free(part)
>                                             hd_struct_try_get(part)
>                                             BUG_ON use-after-free
> 
> req1 try to get 'ptbl->part[4]', while the part is beening
> deleted. Although the delete_partition() will set last_lookup
> as NULL, req1 can overwrite it as 'part[4]' again.
> 
> After calling call_rcu() and free() for the part, req2 can
> access the part by last_lookup, resulting in use after free.
> 
> In fact, this bug has been reported by syzbot:
>     https://lkml.org/lkml/2019/1/4/357
> 
> To fix the bug, we try to cache index of part[] instead of
> part[i] itself in last_lookup. Even if the index may been
> re-assign, others can either get part[i] as value of NULL,
> or get the new allocated part[i] after call_rcu. Both of
> them is okay.
> 
> Signed-off-by: Yufen Yu <yuyufen@huawei.com>
> ---
>  block/genhd.c             | 15 +++++++++------
>  block/partition-generic.c |  2 +-
>  include/linux/genhd.h     |  3 ++-
>  3 files changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/block/genhd.c b/block/genhd.c
> index ff6268970ddc..97447281a4f5 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -282,18 +282,21 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
>  	struct disk_part_tbl *ptbl;
>  	struct hd_struct *part;
>  	int i;
> +	int last_lookup;
>  
>  	ptbl = rcu_dereference(disk->part_tbl);
> -
> -	part = rcu_dereference(ptbl->last_lookup);
> -	if (part && sector_in_part(part, sector))
> -		return part;
> +	last_lookup = READ_ONCE(ptbl->last_lookup);
> +	if (last_lookup > 0 && last_lookup < ptbl->len) {
> +		part = rcu_dereference(ptbl->part[last_lookup]);
> +		if (part && sector_in_part(part, sector))
> +			return part;
> +	}
>  
>  	for (i = 1; i < ptbl->len; i++) {
>  		part = rcu_dereference(ptbl->part[i]);
>  
>  		if (part && sector_in_part(part, sector)) {
> -			rcu_assign_pointer(ptbl->last_lookup, part);
> +			WRITE_ONCE(ptbl->last_lookup, i);
>  			return part;
>  		}
>  	}
> @@ -1263,7 +1266,7 @@ static void disk_replace_part_tbl(struct gendisk *disk,
>  	rcu_assign_pointer(disk->part_tbl, new_ptbl);
>  
>  	if (old_ptbl) {
> -		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
> +		WRITE_ONCE(old_ptbl->last_lookup, 0);
>  		kfree_rcu(old_ptbl, rcu_head);
>  	}
>  }
> diff --git a/block/partition-generic.c b/block/partition-generic.c
> index 1d20c9cf213f..a9fd24ae3acb 100644
> --- a/block/partition-generic.c
> +++ b/block/partition-generic.c
> @@ -284,7 +284,7 @@ void delete_partition(struct gendisk *disk, int partno)
>  		return;
>  
>  	rcu_assign_pointer(ptbl->part[partno], NULL);
> -	rcu_assign_pointer(ptbl->last_lookup, NULL);
> +	WRITE_ONCE(ptbl->last_lookup, 0);
>  	kobject_put(part->holder_dir);
>  	device_del(part_to_dev(part));
>  
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 8bb63027e4d6..9be4fb8f8b8b 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -160,7 +160,8 @@ enum {
>  struct disk_part_tbl {
>  	struct rcu_head rcu_head;
>  	int len;
> -	struct hd_struct __rcu *last_lookup;
> +	/* Cache last lookup part[] index */
> +	int last_lookup;
>  	struct hd_struct __rcu *part[];
>  };

As we discussed in the following link:

https://lore.kernel.org/linux-block/5cc465cc-d68c-088e-0729-2695279c7853@huawei.com/T/#m9a959cc91ff8c6387f83aa5c505581159b5b6571

This way works, but adding a little overhead to the fast path, one indirect
memory reference, especially ->part[->last_lookup] may take one extra cacheline.

I will post one patch to fix the issue without adding the extra overhead.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-01-09  1:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-06  7:35 [PATCH] block: cache index instead of part self to avoid use-after-free Yufen Yu
2020-01-08 15:01 ` Christoph Hellwig
2020-01-09  1:35 ` Ming Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.