linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] blktrace: Protect q->blk_trace with RCU
@ 2020-02-06 14:28 Jan Kara
  2020-02-06 18:46 ` Chaitanya Kulkarni
                   ` (5 more replies)
  0 siblings, 6 replies; 18+ messages in thread
From: Jan Kara @ 2020-02-06 14:28 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, tristmd, Jan Kara, stable

KASAN is reporting that __blk_add_trace() has a use-after-free issue
when accessing q->blk_trace. Indeed the switching of block tracing (and
thus eventual freeing of q->blk_trace) is completely unsynchronized with
the currently running tracing and thus it can happen that the blk_trace
structure is being freed just while __blk_add_trace() works on it.
Protect accesses to q->blk_trace by RCU during tracing and make sure we
wait for the end of RCU grace period when shutting down tracing. Luckily
that is rare enough event that we can afford that. Note that postponing
the freeing of blk_trace to an RCU callback should better be avoided as
it could have unexpected user visible side-effects as debugfs files
would be still existing for a short while block tracing has been shut
down.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
CC: stable@vger.kernel.org
Reported-by: Tristan <tristmd@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/blkdev.h       |   2 +-
 include/linux/blktrace_api.h |  18 +++++--
 kernel/trace/blktrace.c      | 114 +++++++++++++++++++++++++++++++------------
 3 files changed, 97 insertions(+), 37 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4c636c42ad68..1cb5afed5515 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -524,7 +524,7 @@ struct request_queue {
 	unsigned int		sg_reserved_size;
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
-	struct blk_trace	*blk_trace;
+	struct blk_trace __rcu	*blk_trace;
 	struct mutex		blk_trace_mutex;
 #endif
 	/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7bb2d8de9f30..3b6ff5902edc 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
  **/
 #define blk_add_cgroup_trace_msg(q, cg, fmt, ...)			\
 	do {								\
-		struct blk_trace *bt = (q)->blk_trace;			\
+		struct blk_trace *bt;					\
+									\
+		rcu_read_lock();					\
+		bt = rcu_dereference((q)->blk_trace);			\
 		if (unlikely(bt))					\
 			__trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
+		rcu_read_unlock();					\
 	} while (0)
 #define blk_add_trace_msg(q, fmt, ...)					\
 	blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
@@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
 
 static inline bool blk_trace_note_message_enabled(struct request_queue *q)
 {
-	struct blk_trace *bt = q->blk_trace;
-	if (likely(!bt))
-		return false;
-	return bt->act_mask & BLK_TC_NOTIFY;
+	struct blk_trace *bt;
+	bool ret;
+
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
+	ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
+	rcu_read_unlock();
+	return ret;
 }
 
 extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 475e29498bca..a6d3016410eb 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -335,6 +335,7 @@ static void put_probe_ref(void)
 
 static void blk_trace_cleanup(struct blk_trace *bt)
 {
+	synchronize_rcu();
 	blk_trace_free(bt);
 	put_probe_ref();
 }
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
 static int __blk_trace_startstop(struct request_queue *q, int start)
 {
 	int ret;
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
+	bt = rcu_dereference_protected(q->blk_trace,
+				       lockdep_is_held(&q->blk_trace_mutex));
 	if (bt == NULL)
 		return -EINVAL;
 
@@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 void blk_trace_shutdown(struct request_queue *q)
 {
 	mutex_lock(&q->blk_trace_mutex);
-
-	if (q->blk_trace) {
+	if (rcu_dereference_protected(q->blk_trace,
+				      lockdep_is_held(&q->blk_trace_mutex))) {
 		__blk_trace_startstop(q, 0);
 		__blk_trace_remove(q);
 	}
@@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
 #ifdef CONFIG_BLK_CGROUP
 static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
+	/* We don't use the 'bt' value here except as an optimization... */
+	bt = rcu_dereference_protected(q->blk_trace, 1);
 	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
 		return 0;
 
@@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
 static void blk_add_trace_rq(struct request *rq, int error,
 			     unsigned int nr_bytes, u32 what, u64 cgid)
 {
-	struct blk_trace *bt = rq->q->blk_trace;
+	struct blk_trace *bt;
 
-	if (likely(!bt))
+	rcu_read_lock();
+	bt = rcu_dereference(rq->q->blk_trace);
+	if (likely(!bt)) {
+		rcu_read_unlock();
 		return;
+	}
 
 	if (blk_rq_is_passthrough(rq))
 		what |= BLK_TC_ACT(BLK_TC_PC);
@@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
 
 	__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
 			rq->cmd_flags, what, error, 0, NULL, cgid);
+	rcu_read_unlock();
 }
 
 static void blk_add_trace_rq_insert(void *ignore,
@@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
 			      u32 what, int error)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
-	if (likely(!bt))
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
+	if (likely(!bt)) {
+		rcu_read_unlock();
 		return;
+	}
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
 			bio_op(bio), bio->bi_opf, what, error, 0, NULL,
 			blk_trace_bio_get_cgid(q, bio));
+	rcu_read_unlock();
 }
 
 static void blk_add_trace_bio_bounce(void *ignore,
@@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
 	if (bio)
 		blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
 	else {
-		struct blk_trace *bt = q->blk_trace;
+		struct blk_trace *bt;
 
+		rcu_read_lock();
+		bt = rcu_dereference(q->blk_trace);
 		if (bt)
 			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
 					NULL, 0);
+		rcu_read_unlock();
 	}
 }
 
@@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
 	if (bio)
 		blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
 	else {
-		struct blk_trace *bt = q->blk_trace;
+		struct blk_trace *bt;
 
+		rcu_read_lock();
+		bt = rcu_dereference(q->blk_trace);
 		if (bt)
 			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
 					0, 0, NULL, 0);
+		rcu_read_unlock();
 	}
 }
 
 static void blk_add_trace_plug(void *ignore, struct request_queue *q)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
 	if (bt)
 		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
+	rcu_read_unlock();
 }
 
 static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
 				    unsigned int depth, bool explicit)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
 	if (bt) {
 		__be64 rpdu = cpu_to_be64(depth);
 		u32 what;
@@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
 
 		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
 	}
+	rcu_read_unlock();
 }
 
 static void blk_add_trace_split(void *ignore,
 				struct request_queue *q, struct bio *bio,
 				unsigned int pdu)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
 	if (bt) {
 		__be64 rpdu = cpu_to_be64(pdu);
 
@@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
 				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
 				&rpdu, blk_trace_bio_get_cgid(q, bio));
 	}
+	rcu_read_unlock();
 }
 
 /**
@@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
 				    struct request_queue *q, struct bio *bio,
 				    dev_t dev, sector_t from)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 	struct blk_io_trace_remap r;
 
-	if (likely(!bt))
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
+	if (likely(!bt)) {
+		rcu_read_unlock();
 		return;
+	}
 
 	r.device_from = cpu_to_be32(dev);
 	r.device_to   = cpu_to_be32(bio_dev(bio));
@@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
 			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
 			sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
+	rcu_read_unlock();
 }
 
 /**
@@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
 				   struct request *rq, dev_t dev,
 				   sector_t from)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 	struct blk_io_trace_remap r;
 
-	if (likely(!bt))
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
+	if (likely(!bt)) {
+		rcu_read_unlock();
 		return;
+	}
 
 	r.device_from = cpu_to_be32(dev);
 	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
 	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
 			rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
 			sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
+	rcu_read_unlock();
 }
 
 /**
@@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
 			 struct request *rq,
 			 void *data, size_t len)
 {
-	struct blk_trace *bt = q->blk_trace;
+	struct blk_trace *bt;
 
-	if (likely(!bt))
+	rcu_read_lock();
+	bt = rcu_dereference(q->blk_trace);
+	if (likely(!bt)) {
+		rcu_read_unlock();
 		return;
+	}
 
 	__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
 				BLK_TA_DRV_DATA, 0, len, data,
 				blk_trace_request_get_cgid(q, rq));
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
 
@@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
 		return -EINVAL;
 
 	put_probe_ref();
+	synchronize_rcu();
 	blk_trace_free(bt);
 	return 0;
 }
@@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 	struct hd_struct *p = dev_to_part(dev);
 	struct request_queue *q;
 	struct block_device *bdev;
+	struct blk_trace *bt;
 	ssize_t ret = -ENXIO;
 
 	bdev = bdget(part_devt(p));
@@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 
 	mutex_lock(&q->blk_trace_mutex);
 
+	bt = rcu_dereference_protected(q->blk_trace,
+				       lockdep_is_held(&q->blk_trace_mutex));
 	if (attr == &dev_attr_enable) {
-		ret = sprintf(buf, "%u\n", !!q->blk_trace);
+		ret = sprintf(buf, "%u\n", !!bt);
 		goto out_unlock_bdev;
 	}
 
-	if (q->blk_trace == NULL)
+	if (bt == NULL)
 		ret = sprintf(buf, "disabled\n");
 	else if (attr == &dev_attr_act_mask)
-		ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
+		ret = blk_trace_mask2str(buf, bt->act_mask);
 	else if (attr == &dev_attr_pid)
-		ret = sprintf(buf, "%u\n", q->blk_trace->pid);
+		ret = sprintf(buf, "%u\n", bt->pid);
 	else if (attr == &dev_attr_start_lba)
-		ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
+		ret = sprintf(buf, "%llu\n", bt->start_lba);
 	else if (attr == &dev_attr_end_lba)
-		ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+		ret = sprintf(buf, "%llu\n", bt->end_lba);
 
 out_unlock_bdev:
 	mutex_unlock(&q->blk_trace_mutex);
@@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	struct block_device *bdev;
 	struct request_queue *q;
 	struct hd_struct *p;
+	struct blk_trace *bt;
 	u64 value;
 	ssize_t ret = -EINVAL;
 
@@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 
 	mutex_lock(&q->blk_trace_mutex);
 
+	bt = rcu_dereference_protected(q->blk_trace,
+				       lockdep_is_held(&q->blk_trace_mutex));
 	if (attr == &dev_attr_enable) {
-		if (!!value == !!q->blk_trace) {
+		if (!!value == !!bt) {
 			ret = 0;
 			goto out_unlock_bdev;
 		}
@@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	}
 
 	ret = 0;
-	if (q->blk_trace == NULL)
+	if (bt == NULL)
 		ret = blk_trace_setup_queue(q, bdev);
 
 	if (ret == 0) {
 		if (attr == &dev_attr_act_mask)
-			q->blk_trace->act_mask = value;
+			bt->act_mask = value;
 		else if (attr == &dev_attr_pid)
-			q->blk_trace->pid = value;
+			bt->pid = value;
 		else if (attr == &dev_attr_start_lba)
-			q->blk_trace->start_lba = value;
+			bt->start_lba = value;
 		else if (attr == &dev_attr_end_lba)
-			q->blk_trace->end_lba = value;
+			bt->end_lba = value;
 	}
 
 out_unlock_bdev:
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
@ 2020-02-06 18:46 ` Chaitanya Kulkarni
  2020-02-06 18:49   ` Jens Axboe
  2020-02-10  0:38 ` Chaitanya Kulkarni
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 18+ messages in thread
From: Chaitanya Kulkarni @ 2020-02-06 18:46 UTC (permalink / raw)
  To: Jan Kara; +Cc: Jens Axboe, linux-block, tristmd, stable, Damien Le Moal

Hi Jan,

What do you think about following patch on the top of yours ?

The new helper that I've added on the top of your patch will also
future uses of the rcu_dereference_protected(). e.g. blktrace
extension [1] support that I'm working on.

P.S. it is compile only if your okay I'll send a separate patch.

+
+/* Dereference q->blk_trace with q->blk_trace_mutex check only. */
+static inline struct blk_trace *blk_trace_rcu_deref(struct 
request_queue *q)
+{
+       return rcu_dereference_protected(q->blk_trace,
+ 
lockdep_is_held(&q->blk_trace_mutex));
+}
  /*
   * Send out a notify message.
   */
@@ -632,8 +639,7 @@ static int __blk_trace_startstop(struct 
request_queue *q, int start)
         int ret;
         struct blk_trace *bt;

-       bt = rcu_dereference_protected(q->blk_trace,
- 
lockdep_is_held(&q->blk_trace_mutex));
+       bt = blk_trace_rcu_deref(q);
         if (bt == NULL)
                 return -EINVAL;

@@ -743,8 +749,7 @@ int blk_trace_ioctl(struct block_device *bdev, 
unsigned cmd, char __user *arg)
  void blk_trace_shutdown(struct request_queue *q)
  {
         mutex_lock(&q->blk_trace_mutex);
-       if (rcu_dereference_protected(q->blk_trace,
- 
lockdep_is_held(&q->blk_trace_mutex))) {
+       if (blk_trace_rcu_deref(q)) {
                 __blk_trace_startstop(q, 0);
                 __blk_trace_remove(q);
         }
@@ -1817,8 +1822,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct 
device *dev,

         mutex_lock(&q->blk_trace_mutex);

-       bt = rcu_dereference_protected(q->blk_trace,
- 
lockdep_is_held(&q->blk_trace_mutex));
+       bt = blk_trace_rcu_deref(q);
         if (attr == &dev_attr_enable) {
                 ret = sprintf(buf, "%u\n", !!bt);
                 goto out_unlock_bdev;
@@ -1881,8 +1885,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct 
device *dev,

         mutex_lock(&q->blk_trace_mutex);

-       bt = rcu_dereference_protected(q->blk_trace,
- 
lockdep_is_held(&q->blk_trace_mutex));
+       bt = blk_trace_rcu_deref(q);
         if (attr == &dev_attr_enable) {
                 if (!!value == !!bt) {
                         ret = 0;

On 02/06/2020 06:28 AM, Jan Kara wrote:
> KASAN is reporting that __blk_add_trace() has a use-after-free issue
> when accessing q->blk_trace. Indeed the switching of block tracing (and
> thus eventual freeing of q->blk_trace) is completely unsynchronized with
> the currently running tracing and thus it can happen that the blk_trace
> structure is being freed just while __blk_add_trace() works on it.
> Protect accesses to q->blk_trace by RCU during tracing and make sure we
> wait for the end of RCU grace period when shutting down tracing. Luckily
> that is rare enough event that we can afford that. Note that postponing
> the freeing of blk_trace to an RCU callback should better be avoided as
> it could have unexpected user visible side-effects as debugfs files
> would be still existing for a short while block tracing has been shut
> down.
>
> Link:https://bugzilla.kernel.org/show_bug.cgi?id=205711
> CC:stable@vger.kernel.org
> Reported-by: Tristan<tristmd@gmail.com>
> Signed-off-by: Jan Kara<jack@suse.cz>
> ---

[1] https://marc.info/?l=linux-btrace&m=157422391521376&w=2



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 18:46 ` Chaitanya Kulkarni
@ 2020-02-06 18:49   ` Jens Axboe
  2020-02-06 19:37     ` Chaitanya Kulkarni
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-02-06 18:49 UTC (permalink / raw)
  To: Chaitanya Kulkarni, Jan Kara; +Cc: linux-block, tristmd, stable, Damien Le Moal

On 2/6/20 11:46 AM, Chaitanya Kulkarni wrote:
> Hi Jan,
> 
> What do you think about following patch on the top of yours ?
> 
> The new helper that I've added on the top of your patch will also
> future uses of the rcu_dereference_protected(). e.g. blktrace
> extension [1] support that I'm working on.
> 
> P.S. it is compile only if your okay I'll send a separate patch.
> 
> +
> +/* Dereference q->blk_trace with q->blk_trace_mutex check only. */
> +static inline struct blk_trace *blk_trace_rcu_deref(struct 
> request_queue *q)
> +{
> +       return rcu_dereference_protected(q->blk_trace,
> + 
> lockdep_is_held(&q->blk_trace_mutex));
> +}

Let's please not do that, it serves no real purpose and it just
obfuscates what's really going on.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 18:49   ` Jens Axboe
@ 2020-02-06 19:37     ` Chaitanya Kulkarni
  0 siblings, 0 replies; 18+ messages in thread
From: Chaitanya Kulkarni @ 2020-02-06 19:37 UTC (permalink / raw)
  To: Jens Axboe, Jan Kara; +Cc: linux-block, tristmd, stable, Damien Le Moal

On 02/06/2020 10:49 AM, Jens Axboe wrote:
> Let's please not do that, it serves no real purpose and it just
> obfuscates what's really going on.
>
> -- Jens Axboe

Okay, will avoid such changes and comments in future.

Thanks Jens for the clarification.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
  2020-02-06 18:46 ` Chaitanya Kulkarni
@ 2020-02-10  0:38 ` Chaitanya Kulkarni
  2020-02-10  2:19 ` Ming Lei
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 18+ messages in thread
From: Chaitanya Kulkarni @ 2020-02-10  0:38 UTC (permalink / raw)
  To: Jan Kara, Jens Axboe; +Cc: linux-block, tristmd, stable

Looks good.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>

On 02/06/2020 06:28 AM, Jan Kara wrote:
> KASAN is reporting that __blk_add_trace() has a use-after-free issue
> when accessing q->blk_trace. Indeed the switching of block tracing (and
> thus eventual freeing of q->blk_trace) is completely unsynchronized with
> the currently running tracing and thus it can happen that the blk_trace
> structure is being freed just while __blk_add_trace() works on it.
> Protect accesses to q->blk_trace by RCU during tracing and make sure we
> wait for the end of RCU grace period when shutting down tracing. Luckily
> that is rare enough event that we can afford that. Note that postponing
> the freeing of blk_trace to an RCU callback should better be avoided as
> it could have unexpected user visible side-effects as debugfs files
> would be still existing for a short while block tracing has been shut
> down.
>
> Link:https://bugzilla.kernel.org/show_bug.cgi?id=205711
> CC:stable@vger.kernel.org
> Reported-by: Tristan<tristmd@gmail.com>
> Signed-off-by: Jan Kara<jack@suse.cz>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
  2020-02-06 18:46 ` Chaitanya Kulkarni
  2020-02-10  0:38 ` Chaitanya Kulkarni
@ 2020-02-10  2:19 ` Ming Lei
  2020-02-10  3:54 ` Bart Van Assche
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 18+ messages in thread
From: Ming Lei @ 2020-02-10  2:19 UTC (permalink / raw)
  To: Jan Kara; +Cc: Jens Axboe, linux-block, tristmd, stable

On Thu, Feb 6, 2020 at 10:29 PM Jan Kara <jack@suse.cz> wrote:
>
> KASAN is reporting that __blk_add_trace() has a use-after-free issue
> when accessing q->blk_trace. Indeed the switching of block tracing (and
> thus eventual freeing of q->blk_trace) is completely unsynchronized with
> the currently running tracing and thus it can happen that the blk_trace
> structure is being freed just while __blk_add_trace() works on it.
> Protect accesses to q->blk_trace by RCU during tracing and make sure we
> wait for the end of RCU grace period when shutting down tracing. Luckily
> that is rare enough event that we can afford that. Note that postponing
> the freeing of blk_trace to an RCU callback should better be avoided as
> it could have unexpected user visible side-effects as debugfs files
> would be still existing for a short while block tracing has been shut
> down.
>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
> CC: stable@vger.kernel.org
> Reported-by: Tristan <tristmd@gmail.com>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  include/linux/blkdev.h       |   2 +-
>  include/linux/blktrace_api.h |  18 +++++--
>  kernel/trace/blktrace.c      | 114 +++++++++++++++++++++++++++++++------------
>  3 files changed, 97 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 4c636c42ad68..1cb5afed5515 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -524,7 +524,7 @@ struct request_queue {
>         unsigned int            sg_reserved_size;
>         int                     node;
>  #ifdef CONFIG_BLK_DEV_IO_TRACE
> -       struct blk_trace        *blk_trace;
> +       struct blk_trace __rcu  *blk_trace;
>         struct mutex            blk_trace_mutex;
>  #endif
>         /*
> diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
> index 7bb2d8de9f30..3b6ff5902edc 100644
> --- a/include/linux/blktrace_api.h
> +++ b/include/linux/blktrace_api.h
> @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
>   **/
>  #define blk_add_cgroup_trace_msg(q, cg, fmt, ...)                      \
>         do {                                                            \
> -               struct blk_trace *bt = (q)->blk_trace;                  \
> +               struct blk_trace *bt;                                   \
> +                                                                       \
> +               rcu_read_lock();                                        \
> +               bt = rcu_dereference((q)->blk_trace);                   \
>                 if (unlikely(bt))                                       \
>                         __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
> +               rcu_read_unlock();                                      \
>         } while (0)
>  #define blk_add_trace_msg(q, fmt, ...)                                 \
>         blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
> @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
>
>  static inline bool blk_trace_note_message_enabled(struct request_queue *q)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> -       if (likely(!bt))
> -               return false;
> -       return bt->act_mask & BLK_TC_NOTIFY;
> +       struct blk_trace *bt;
> +       bool ret;
> +
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
> +       ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
> +       rcu_read_unlock();
> +       return ret;
>  }
>
>  extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index 475e29498bca..a6d3016410eb 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -335,6 +335,7 @@ static void put_probe_ref(void)
>
>  static void blk_trace_cleanup(struct blk_trace *bt)
>  {
> +       synchronize_rcu();
>         blk_trace_free(bt);
>         put_probe_ref();
>  }
> @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
>  static int __blk_trace_startstop(struct request_queue *q, int start)
>  {
>         int ret;
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> +       bt = rcu_dereference_protected(q->blk_trace,
> +                                      lockdep_is_held(&q->blk_trace_mutex));
>         if (bt == NULL)
>                 return -EINVAL;
>
> @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
>  void blk_trace_shutdown(struct request_queue *q)
>  {
>         mutex_lock(&q->blk_trace_mutex);
> -
> -       if (q->blk_trace) {
> +       if (rcu_dereference_protected(q->blk_trace,
> +                                     lockdep_is_held(&q->blk_trace_mutex))) {
>                 __blk_trace_startstop(q, 0);
>                 __blk_trace_remove(q);
>         }
> @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
>  #ifdef CONFIG_BLK_CGROUP
>  static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> +       /* We don't use the 'bt' value here except as an optimization... */
> +       bt = rcu_dereference_protected(q->blk_trace, 1);
>         if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
>                 return 0;
>
> @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
>  static void blk_add_trace_rq(struct request *rq, int error,
>                              unsigned int nr_bytes, u32 what, u64 cgid)
>  {
> -       struct blk_trace *bt = rq->q->blk_trace;
> +       struct blk_trace *bt;
>
> -       if (likely(!bt))
> +       rcu_read_lock();
> +       bt = rcu_dereference(rq->q->blk_trace);
> +       if (likely(!bt)) {
> +               rcu_read_unlock();
>                 return;
> +       }
>
>         if (blk_rq_is_passthrough(rq))
>                 what |= BLK_TC_ACT(BLK_TC_PC);
> @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
>
>         __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
>                         rq->cmd_flags, what, error, 0, NULL, cgid);
> +       rcu_read_unlock();
>  }
>
>  static void blk_add_trace_rq_insert(void *ignore,
> @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
>  static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
>                               u32 what, int error)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> -       if (likely(!bt))
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
> +       if (likely(!bt)) {
> +               rcu_read_unlock();
>                 return;
> +       }
>
>         __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
>                         bio_op(bio), bio->bi_opf, what, error, 0, NULL,
>                         blk_trace_bio_get_cgid(q, bio));
> +       rcu_read_unlock();
>  }
>
>  static void blk_add_trace_bio_bounce(void *ignore,
> @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
>         if (bio)
>                 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
>         else {
> -               struct blk_trace *bt = q->blk_trace;
> +               struct blk_trace *bt;
>
> +               rcu_read_lock();
> +               bt = rcu_dereference(q->blk_trace);
>                 if (bt)
>                         __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
>                                         NULL, 0);
> +               rcu_read_unlock();
>         }
>  }
>
> @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
>         if (bio)
>                 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
>         else {
> -               struct blk_trace *bt = q->blk_trace;
> +               struct blk_trace *bt;
>
> +               rcu_read_lock();
> +               bt = rcu_dereference(q->blk_trace);
>                 if (bt)
>                         __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
>                                         0, 0, NULL, 0);
> +               rcu_read_unlock();
>         }
>  }
>
>  static void blk_add_trace_plug(void *ignore, struct request_queue *q)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
>         if (bt)
>                 __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
> +       rcu_read_unlock();
>  }
>
>  static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
>                                     unsigned int depth, bool explicit)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
>         if (bt) {
>                 __be64 rpdu = cpu_to_be64(depth);
>                 u32 what;
> @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
>
>                 __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
>         }
> +       rcu_read_unlock();
>  }
>
>  static void blk_add_trace_split(void *ignore,
>                                 struct request_queue *q, struct bio *bio,
>                                 unsigned int pdu)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
>         if (bt) {
>                 __be64 rpdu = cpu_to_be64(pdu);
>
> @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
>                                 BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
>                                 &rpdu, blk_trace_bio_get_cgid(q, bio));
>         }
> +       rcu_read_unlock();
>  }
>
>  /**
> @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
>                                     struct request_queue *q, struct bio *bio,
>                                     dev_t dev, sector_t from)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>         struct blk_io_trace_remap r;
>
> -       if (likely(!bt))
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
> +       if (likely(!bt)) {
> +               rcu_read_unlock();
>                 return;
> +       }
>
>         r.device_from = cpu_to_be32(dev);
>         r.device_to   = cpu_to_be32(bio_dev(bio));
> @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
>         __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
>                         bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
>                         sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
> +       rcu_read_unlock();
>  }
>
>  /**
> @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
>                                    struct request *rq, dev_t dev,
>                                    sector_t from)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>         struct blk_io_trace_remap r;
>
> -       if (likely(!bt))
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
> +       if (likely(!bt)) {
> +               rcu_read_unlock();
>                 return;
> +       }
>
>         r.device_from = cpu_to_be32(dev);
>         r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
> @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
>         __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
>                         rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
>                         sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
> +       rcu_read_unlock();
>  }
>
>  /**
> @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
>                          struct request *rq,
>                          void *data, size_t len)
>  {
> -       struct blk_trace *bt = q->blk_trace;
> +       struct blk_trace *bt;
>
> -       if (likely(!bt))
> +       rcu_read_lock();
> +       bt = rcu_dereference(q->blk_trace);
> +       if (likely(!bt)) {
> +               rcu_read_unlock();
>                 return;
> +       }
>
>         __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
>                                 BLK_TA_DRV_DATA, 0, len, data,
>                                 blk_trace_request_get_cgid(q, rq));
> +       rcu_read_unlock();
>  }
>  EXPORT_SYMBOL_GPL(blk_add_driver_data);
>
> @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
>                 return -EINVAL;
>
>         put_probe_ref();
> +       synchronize_rcu();
>         blk_trace_free(bt);
>         return 0;
>  }
> @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
>         struct hd_struct *p = dev_to_part(dev);
>         struct request_queue *q;
>         struct block_device *bdev;
> +       struct blk_trace *bt;
>         ssize_t ret = -ENXIO;
>
>         bdev = bdget(part_devt(p));
> @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
>
>         mutex_lock(&q->blk_trace_mutex);
>
> +       bt = rcu_dereference_protected(q->blk_trace,
> +                                      lockdep_is_held(&q->blk_trace_mutex));
>         if (attr == &dev_attr_enable) {
> -               ret = sprintf(buf, "%u\n", !!q->blk_trace);
> +               ret = sprintf(buf, "%u\n", !!bt);
>                 goto out_unlock_bdev;
>         }
>
> -       if (q->blk_trace == NULL)
> +       if (bt == NULL)
>                 ret = sprintf(buf, "disabled\n");
>         else if (attr == &dev_attr_act_mask)
> -               ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
> +               ret = blk_trace_mask2str(buf, bt->act_mask);
>         else if (attr == &dev_attr_pid)
> -               ret = sprintf(buf, "%u\n", q->blk_trace->pid);
> +               ret = sprintf(buf, "%u\n", bt->pid);
>         else if (attr == &dev_attr_start_lba)
> -               ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
> +               ret = sprintf(buf, "%llu\n", bt->start_lba);
>         else if (attr == &dev_attr_end_lba)
> -               ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
> +               ret = sprintf(buf, "%llu\n", bt->end_lba);
>
>  out_unlock_bdev:
>         mutex_unlock(&q->blk_trace_mutex);
> @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>         struct block_device *bdev;
>         struct request_queue *q;
>         struct hd_struct *p;
> +       struct blk_trace *bt;
>         u64 value;
>         ssize_t ret = -EINVAL;
>
> @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>
>         mutex_lock(&q->blk_trace_mutex);
>
> +       bt = rcu_dereference_protected(q->blk_trace,
> +                                      lockdep_is_held(&q->blk_trace_mutex));
>         if (attr == &dev_attr_enable) {
> -               if (!!value == !!q->blk_trace) {
> +               if (!!value == !!bt) {
>                         ret = 0;
>                         goto out_unlock_bdev;
>                 }
> @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>         }
>
>         ret = 0;
> -       if (q->blk_trace == NULL)
> +       if (bt == NULL)
>                 ret = blk_trace_setup_queue(q, bdev);
>
>         if (ret == 0) {
>                 if (attr == &dev_attr_act_mask)
> -                       q->blk_trace->act_mask = value;
> +                       bt->act_mask = value;
>                 else if (attr == &dev_attr_pid)
> -                       q->blk_trace->pid = value;
> +                       bt->pid = value;
>                 else if (attr == &dev_attr_start_lba)
> -                       q->blk_trace->start_lba = value;
> +                       bt->start_lba = value;
>                 else if (attr == &dev_attr_end_lba)
> -                       q->blk_trace->end_lba = value;
> +                       bt->end_lba = value;
>         }
>
>  out_unlock_bdev:
> --
> 2.16.4
>

Looks fine:

Reviewed-by: Ming Lei <ming.lei@redhat.com>

-- 
Ming Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
                   ` (2 preceding siblings ...)
  2020-02-10  2:19 ` Ming Lei
@ 2020-02-10  3:54 ` Bart Van Assche
  2020-02-19 12:59 ` Jan Kara
  2020-03-02 20:40 ` Bjorn Helgaas
  5 siblings, 0 replies; 18+ messages in thread
From: Bart Van Assche @ 2020-02-10  3:54 UTC (permalink / raw)
  To: Jan Kara, Jens Axboe; +Cc: linux-block, tristmd, stable

On 2020-02-06 06:28, Jan Kara wrote:
> KASAN is reporting that __blk_add_trace() has a use-after-free issue
> when accessing q->blk_trace. Indeed the switching of block tracing (and
> thus eventual freeing of q->blk_trace) is completely unsynchronized with
> the currently running tracing and thus it can happen that the blk_trace
> structure is being freed just while __blk_add_trace() works on it.
> Protect accesses to q->blk_trace by RCU during tracing and make sure we
> wait for the end of RCU grace period when shutting down tracing. Luckily
> that is rare enough event that we can afford that. Note that postponing
> the freeing of blk_trace to an RCU callback should better be avoided as
> it could have unexpected user visible side-effects as debugfs files
> would be still existing for a short while block tracing has been shut
> down.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
                   ` (3 preceding siblings ...)
  2020-02-10  3:54 ` Bart Van Assche
@ 2020-02-19 12:59 ` Jan Kara
  2020-02-25 10:20   ` Ming Lei
  2020-03-02 20:40 ` Bjorn Helgaas
  5 siblings, 1 reply; 18+ messages in thread
From: Jan Kara @ 2020-02-19 12:59 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, tristmd, Jan Kara, stable

On Thu 06-02-20 15:28:12, Jan Kara wrote:
> KASAN is reporting that __blk_add_trace() has a use-after-free issue
> when accessing q->blk_trace. Indeed the switching of block tracing (and
> thus eventual freeing of q->blk_trace) is completely unsynchronized with
> the currently running tracing and thus it can happen that the blk_trace
> structure is being freed just while __blk_add_trace() works on it.
> Protect accesses to q->blk_trace by RCU during tracing and make sure we
> wait for the end of RCU grace period when shutting down tracing. Luckily
> that is rare enough event that we can afford that. Note that postponing
> the freeing of blk_trace to an RCU callback should better be avoided as
> it could have unexpected user visible side-effects as debugfs files
> would be still existing for a short while block tracing has been shut
> down.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
> CC: stable@vger.kernel.org
> Reported-by: Tristan <tristmd@gmail.com>
> Signed-off-by: Jan Kara <jack@suse.cz>

Jens, do you plan to pick up the patch? Also the reporter asked me to
update the reference as:

Reported-by: Tristan Madani <tristmd@gmail.com>

Should I resend the patch with this update & reviewed-by's or will you fix
it up on commit? Thanks.

								Honza

> ---
>  include/linux/blkdev.h       |   2 +-
>  include/linux/blktrace_api.h |  18 +++++--
>  kernel/trace/blktrace.c      | 114 +++++++++++++++++++++++++++++++------------
>  3 files changed, 97 insertions(+), 37 deletions(-)
> 
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 4c636c42ad68..1cb5afed5515 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -524,7 +524,7 @@ struct request_queue {
>  	unsigned int		sg_reserved_size;
>  	int			node;
>  #ifdef CONFIG_BLK_DEV_IO_TRACE
> -	struct blk_trace	*blk_trace;
> +	struct blk_trace __rcu	*blk_trace;
>  	struct mutex		blk_trace_mutex;
>  #endif
>  	/*
> diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
> index 7bb2d8de9f30..3b6ff5902edc 100644
> --- a/include/linux/blktrace_api.h
> +++ b/include/linux/blktrace_api.h
> @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
>   **/
>  #define blk_add_cgroup_trace_msg(q, cg, fmt, ...)			\
>  	do {								\
> -		struct blk_trace *bt = (q)->blk_trace;			\
> +		struct blk_trace *bt;					\
> +									\
> +		rcu_read_lock();					\
> +		bt = rcu_dereference((q)->blk_trace);			\
>  		if (unlikely(bt))					\
>  			__trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
> +		rcu_read_unlock();					\
>  	} while (0)
>  #define blk_add_trace_msg(q, fmt, ...)					\
>  	blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
> @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
>  
>  static inline bool blk_trace_note_message_enabled(struct request_queue *q)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> -	if (likely(!bt))
> -		return false;
> -	return bt->act_mask & BLK_TC_NOTIFY;
> +	struct blk_trace *bt;
> +	bool ret;
> +
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
> +	ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
> +	rcu_read_unlock();
> +	return ret;
>  }
>  
>  extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index 475e29498bca..a6d3016410eb 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -335,6 +335,7 @@ static void put_probe_ref(void)
>  
>  static void blk_trace_cleanup(struct blk_trace *bt)
>  {
> +	synchronize_rcu();
>  	blk_trace_free(bt);
>  	put_probe_ref();
>  }
> @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
>  static int __blk_trace_startstop(struct request_queue *q, int start)
>  {
>  	int ret;
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> +	bt = rcu_dereference_protected(q->blk_trace,
> +				       lockdep_is_held(&q->blk_trace_mutex));
>  	if (bt == NULL)
>  		return -EINVAL;
>  
> @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
>  void blk_trace_shutdown(struct request_queue *q)
>  {
>  	mutex_lock(&q->blk_trace_mutex);
> -
> -	if (q->blk_trace) {
> +	if (rcu_dereference_protected(q->blk_trace,
> +				      lockdep_is_held(&q->blk_trace_mutex))) {
>  		__blk_trace_startstop(q, 0);
>  		__blk_trace_remove(q);
>  	}
> @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
>  #ifdef CONFIG_BLK_CGROUP
>  static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> +	/* We don't use the 'bt' value here except as an optimization... */
> +	bt = rcu_dereference_protected(q->blk_trace, 1);
>  	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
>  		return 0;
>  
> @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
>  static void blk_add_trace_rq(struct request *rq, int error,
>  			     unsigned int nr_bytes, u32 what, u64 cgid)
>  {
> -	struct blk_trace *bt = rq->q->blk_trace;
> +	struct blk_trace *bt;
>  
> -	if (likely(!bt))
> +	rcu_read_lock();
> +	bt = rcu_dereference(rq->q->blk_trace);
> +	if (likely(!bt)) {
> +		rcu_read_unlock();
>  		return;
> +	}
>  
>  	if (blk_rq_is_passthrough(rq))
>  		what |= BLK_TC_ACT(BLK_TC_PC);
> @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
>  
>  	__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
>  			rq->cmd_flags, what, error, 0, NULL, cgid);
> +	rcu_read_unlock();
>  }
>  
>  static void blk_add_trace_rq_insert(void *ignore,
> @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
>  static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
>  			      u32 what, int error)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> -	if (likely(!bt))
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
> +	if (likely(!bt)) {
> +		rcu_read_unlock();
>  		return;
> +	}
>  
>  	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
>  			bio_op(bio), bio->bi_opf, what, error, 0, NULL,
>  			blk_trace_bio_get_cgid(q, bio));
> +	rcu_read_unlock();
>  }
>  
>  static void blk_add_trace_bio_bounce(void *ignore,
> @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
>  	if (bio)
>  		blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
>  	else {
> -		struct blk_trace *bt = q->blk_trace;
> +		struct blk_trace *bt;
>  
> +		rcu_read_lock();
> +		bt = rcu_dereference(q->blk_trace);
>  		if (bt)
>  			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
>  					NULL, 0);
> +		rcu_read_unlock();
>  	}
>  }
>  
> @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
>  	if (bio)
>  		blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
>  	else {
> -		struct blk_trace *bt = q->blk_trace;
> +		struct blk_trace *bt;
>  
> +		rcu_read_lock();
> +		bt = rcu_dereference(q->blk_trace);
>  		if (bt)
>  			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
>  					0, 0, NULL, 0);
> +		rcu_read_unlock();
>  	}
>  }
>  
>  static void blk_add_trace_plug(void *ignore, struct request_queue *q)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
>  	if (bt)
>  		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
> +	rcu_read_unlock();
>  }
>  
>  static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
>  				    unsigned int depth, bool explicit)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
>  	if (bt) {
>  		__be64 rpdu = cpu_to_be64(depth);
>  		u32 what;
> @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
>  
>  		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
>  	}
> +	rcu_read_unlock();
>  }
>  
>  static void blk_add_trace_split(void *ignore,
>  				struct request_queue *q, struct bio *bio,
>  				unsigned int pdu)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
>  	if (bt) {
>  		__be64 rpdu = cpu_to_be64(pdu);
>  
> @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
>  				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
>  				&rpdu, blk_trace_bio_get_cgid(q, bio));
>  	}
> +	rcu_read_unlock();
>  }
>  
>  /**
> @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
>  				    struct request_queue *q, struct bio *bio,
>  				    dev_t dev, sector_t from)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  	struct blk_io_trace_remap r;
>  
> -	if (likely(!bt))
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
> +	if (likely(!bt)) {
> +		rcu_read_unlock();
>  		return;
> +	}
>  
>  	r.device_from = cpu_to_be32(dev);
>  	r.device_to   = cpu_to_be32(bio_dev(bio));
> @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
>  	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
>  			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
>  			sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
> +	rcu_read_unlock();
>  }
>  
>  /**
> @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
>  				   struct request *rq, dev_t dev,
>  				   sector_t from)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  	struct blk_io_trace_remap r;
>  
> -	if (likely(!bt))
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
> +	if (likely(!bt)) {
> +		rcu_read_unlock();
>  		return;
> +	}
>  
>  	r.device_from = cpu_to_be32(dev);
>  	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
> @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
>  	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
>  			rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
>  			sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
> +	rcu_read_unlock();
>  }
>  
>  /**
> @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
>  			 struct request *rq,
>  			 void *data, size_t len)
>  {
> -	struct blk_trace *bt = q->blk_trace;
> +	struct blk_trace *bt;
>  
> -	if (likely(!bt))
> +	rcu_read_lock();
> +	bt = rcu_dereference(q->blk_trace);
> +	if (likely(!bt)) {
> +		rcu_read_unlock();
>  		return;
> +	}
>  
>  	__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
>  				BLK_TA_DRV_DATA, 0, len, data,
>  				blk_trace_request_get_cgid(q, rq));
> +	rcu_read_unlock();
>  }
>  EXPORT_SYMBOL_GPL(blk_add_driver_data);
>  
> @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
>  		return -EINVAL;
>  
>  	put_probe_ref();
> +	synchronize_rcu();
>  	blk_trace_free(bt);
>  	return 0;
>  }
> @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
>  	struct hd_struct *p = dev_to_part(dev);
>  	struct request_queue *q;
>  	struct block_device *bdev;
> +	struct blk_trace *bt;
>  	ssize_t ret = -ENXIO;
>  
>  	bdev = bdget(part_devt(p));
> @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
>  
>  	mutex_lock(&q->blk_trace_mutex);
>  
> +	bt = rcu_dereference_protected(q->blk_trace,
> +				       lockdep_is_held(&q->blk_trace_mutex));
>  	if (attr == &dev_attr_enable) {
> -		ret = sprintf(buf, "%u\n", !!q->blk_trace);
> +		ret = sprintf(buf, "%u\n", !!bt);
>  		goto out_unlock_bdev;
>  	}
>  
> -	if (q->blk_trace == NULL)
> +	if (bt == NULL)
>  		ret = sprintf(buf, "disabled\n");
>  	else if (attr == &dev_attr_act_mask)
> -		ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
> +		ret = blk_trace_mask2str(buf, bt->act_mask);
>  	else if (attr == &dev_attr_pid)
> -		ret = sprintf(buf, "%u\n", q->blk_trace->pid);
> +		ret = sprintf(buf, "%u\n", bt->pid);
>  	else if (attr == &dev_attr_start_lba)
> -		ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
> +		ret = sprintf(buf, "%llu\n", bt->start_lba);
>  	else if (attr == &dev_attr_end_lba)
> -		ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
> +		ret = sprintf(buf, "%llu\n", bt->end_lba);
>  
>  out_unlock_bdev:
>  	mutex_unlock(&q->blk_trace_mutex);
> @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>  	struct block_device *bdev;
>  	struct request_queue *q;
>  	struct hd_struct *p;
> +	struct blk_trace *bt;
>  	u64 value;
>  	ssize_t ret = -EINVAL;
>  
> @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>  
>  	mutex_lock(&q->blk_trace_mutex);
>  
> +	bt = rcu_dereference_protected(q->blk_trace,
> +				       lockdep_is_held(&q->blk_trace_mutex));
>  	if (attr == &dev_attr_enable) {
> -		if (!!value == !!q->blk_trace) {
> +		if (!!value == !!bt) {
>  			ret = 0;
>  			goto out_unlock_bdev;
>  		}
> @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>  	}
>  
>  	ret = 0;
> -	if (q->blk_trace == NULL)
> +	if (bt == NULL)
>  		ret = blk_trace_setup_queue(q, bdev);
>  
>  	if (ret == 0) {
>  		if (attr == &dev_attr_act_mask)
> -			q->blk_trace->act_mask = value;
> +			bt->act_mask = value;
>  		else if (attr == &dev_attr_pid)
> -			q->blk_trace->pid = value;
> +			bt->pid = value;
>  		else if (attr == &dev_attr_start_lba)
> -			q->blk_trace->start_lba = value;
> +			bt->start_lba = value;
>  		else if (attr == &dev_attr_end_lba)
> -			q->blk_trace->end_lba = value;
> +			bt->end_lba = value;
>  	}
>  
>  out_unlock_bdev:
> -- 
> 2.16.4
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-19 12:59 ` Jan Kara
@ 2020-02-25 10:20   ` Ming Lei
  2020-02-25 15:40     ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Ming Lei @ 2020-02-25 10:20 UTC (permalink / raw)
  To: Jan Kara; +Cc: Jens Axboe, linux-block, tristmd, stable

On Wed, Feb 19, 2020 at 01:59:47PM +0100, Jan Kara wrote:
> On Thu 06-02-20 15:28:12, Jan Kara wrote:
> > KASAN is reporting that __blk_add_trace() has a use-after-free issue
> > when accessing q->blk_trace. Indeed the switching of block tracing (and
> > thus eventual freeing of q->blk_trace) is completely unsynchronized with
> > the currently running tracing and thus it can happen that the blk_trace
> > structure is being freed just while __blk_add_trace() works on it.
> > Protect accesses to q->blk_trace by RCU during tracing and make sure we
> > wait for the end of RCU grace period when shutting down tracing. Luckily
> > that is rare enough event that we can afford that. Note that postponing
> > the freeing of blk_trace to an RCU callback should better be avoided as
> > it could have unexpected user visible side-effects as debugfs files
> > would be still existing for a short while block tracing has been shut
> > down.
> > 
> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
> > CC: stable@vger.kernel.org
> > Reported-by: Tristan <tristmd@gmail.com>
> > Signed-off-by: Jan Kara <jack@suse.cz>
> 
> Jens, do you plan to pick up the patch? Also the reporter asked me to
> update the reference as:
> 
> Reported-by: Tristan Madani <tristmd@gmail.com>
> 
> Should I resend the patch with this update & reviewed-by's or will you fix
> it up on commit? Thanks.
> 

I have run concurrent quick/repeated blktrace & long-time heavy IO, looks
this patch just works fine, so:

Tested-by: Ming Lei <ming.lei@redhat.com>

Jens, any chance to take a look at this CVE issue?

Thanks,
Ming


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-25 10:20   ` Ming Lei
@ 2020-02-25 15:40     ` Jens Axboe
  0 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2020-02-25 15:40 UTC (permalink / raw)
  To: Ming Lei, Jan Kara; +Cc: linux-block, tristmd, stable

On 2/25/20 3:20 AM, Ming Lei wrote:
> On Wed, Feb 19, 2020 at 01:59:47PM +0100, Jan Kara wrote:
>> On Thu 06-02-20 15:28:12, Jan Kara wrote:
>>> KASAN is reporting that __blk_add_trace() has a use-after-free issue
>>> when accessing q->blk_trace. Indeed the switching of block tracing (and
>>> thus eventual freeing of q->blk_trace) is completely unsynchronized with
>>> the currently running tracing and thus it can happen that the blk_trace
>>> structure is being freed just while __blk_add_trace() works on it.
>>> Protect accesses to q->blk_trace by RCU during tracing and make sure we
>>> wait for the end of RCU grace period when shutting down tracing. Luckily
>>> that is rare enough event that we can afford that. Note that postponing
>>> the freeing of blk_trace to an RCU callback should better be avoided as
>>> it could have unexpected user visible side-effects as debugfs files
>>> would be still existing for a short while block tracing has been shut
>>> down.
>>>
>>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
>>> CC: stable@vger.kernel.org
>>> Reported-by: Tristan <tristmd@gmail.com>
>>> Signed-off-by: Jan Kara <jack@suse.cz>
>>
>> Jens, do you plan to pick up the patch? Also the reporter asked me to
>> update the reference as:
>>
>> Reported-by: Tristan Madani <tristmd@gmail.com>
>>
>> Should I resend the patch with this update & reviewed-by's or will you fix
>> it up on commit? Thanks.
>>
> 
> I have run concurrent quick/repeated blktrace & long-time heavy IO, looks
> this patch just works fine, so:
> 
> Tested-by: Ming Lei <ming.lei@redhat.com>
> 
> Jens, any chance to take a look at this CVE issue?

Queued up for 5.6, thanks everyone.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
                   ` (4 preceding siblings ...)
  2020-02-19 12:59 ` Jan Kara
@ 2020-03-02 20:40 ` Bjorn Helgaas
  2020-03-02 21:19   ` Chaitanya Kulkarni
  5 siblings, 1 reply; 18+ messages in thread
From: Bjorn Helgaas @ 2020-03-02 20:40 UTC (permalink / raw)
  To: Jan Kara; +Cc: Jens Axboe, linux-block, tristmd, stable

On Thu, Feb 06, 2020 at 03:28:12PM +0100, Jan Kara wrote:

> @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>  	}
>  
>  	ret = 0;
> -	if (q->blk_trace == NULL)
> +	if (bt == NULL)
>  		ret = blk_trace_setup_queue(q, bdev);
>  
>  	if (ret == 0) {
>  		if (attr == &dev_attr_act_mask)
> -			q->blk_trace->act_mask = value;
> +			bt->act_mask = value;

You've likely seen this already, but Coverity complains that "bt" may
be a NULL pointer here, since we checked it for NULL above.

  CID 1460458:  Null pointer dereferences  (FORWARD_NULL)

>  		else if (attr == &dev_attr_pid)
> -			q->blk_trace->pid = value;
> +			bt->pid = value;
>  		else if (attr == &dev_attr_start_lba)
> -			q->blk_trace->start_lba = value;
> +			bt->start_lba = value;
>  		else if (attr == &dev_attr_end_lba)
> -			q->blk_trace->end_lba = value;
> +			bt->end_lba = value;
>  	}
>  
>  out_unlock_bdev:
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-02 20:40 ` Bjorn Helgaas
@ 2020-03-02 21:19   ` Chaitanya Kulkarni
  2020-03-02 21:59     ` Bjorn Helgaas
  2020-03-02 22:06     ` Keith Busch
  0 siblings, 2 replies; 18+ messages in thread
From: Chaitanya Kulkarni @ 2020-03-02 21:19 UTC (permalink / raw)
  To: Bjorn Helgaas, Jan Kara; +Cc: Jens Axboe, linux-block, tristmd, stable

By any chance will the following patch be able to get rid of
the warning ?

index 4560878f0bac..889555910555 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1895,9 +1895,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct 
device *dev,
                 goto out_unlock_bdev;
         }

-       ret = 0;
-       if (bt == NULL)
-               ret = blk_trace_setup_queue(q, bdev);
+       ret = bt == NULL ? blk_trace_setup_queue(q, bdev) : 0;

         if (ret == 0) {
                 if (attr == &dev_attr_act_mask)

On 03/02/2020 12:41 PM, Bjorn Helgaas wrote:
> On Thu, Feb 06, 2020 at 03:28:12PM +0100, Jan Kara wrote:
>
>> @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>>   	}
>>
>>   	ret = 0;
>> -	if (q->blk_trace == NULL)
>> +	if (bt == NULL)
>>   		ret = blk_trace_setup_queue(q, bdev);
>>
>>   	if (ret == 0) {
>>   		if (attr == &dev_attr_act_mask)
>> -			q->blk_trace->act_mask = value;
>> +			bt->act_mask = value;
>
> You've likely seen this already, but Coverity complains that "bt" may
> be a NULL pointer here, since we checked it for NULL above.
>
>    CID 1460458:  Null pointer dereferences  (FORWARD_NULL)
>
>>   		else if (attr == &dev_attr_pid)
>> -			q->blk_trace->pid = value;
>> +			bt->pid = value;
>>   		else if (attr == &dev_attr_start_lba)
>> -			q->blk_trace->start_lba = value;
>> +			bt->start_lba = value;
>>   		else if (attr == &dev_attr_end_lba)
>> -			q->blk_trace->end_lba = value;
>> +			bt->end_lba = value;
>>   	}
>>
>>   out_unlock_bdev:
>>
>


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-02 21:19   ` Chaitanya Kulkarni
@ 2020-03-02 21:59     ` Bjorn Helgaas
  2020-03-02 22:06     ` Keith Busch
  1 sibling, 0 replies; 18+ messages in thread
From: Bjorn Helgaas @ 2020-03-02 21:59 UTC (permalink / raw)
  To: Chaitanya Kulkarni; +Cc: Jan Kara, Jens Axboe, linux-block, tristmd, stable

On Mon, Mar 02, 2020 at 09:19:33PM +0000, Chaitanya Kulkarni wrote:
> By any chance will the following patch be able to get rid of
> the warning ?
> 
> index 4560878f0bac..889555910555 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -1895,9 +1895,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct 
> device *dev,
>                  goto out_unlock_bdev;
>          }
> 
> -       ret = 0;
> -       if (bt == NULL)
> -               ret = blk_trace_setup_queue(q, bdev);
> +       ret = bt == NULL ? blk_trace_setup_queue(q, bdev) : 0;
> 
>          if (ret == 0) {
>                  if (attr == &dev_attr_act_mask)

I don't have a way to run Coverity (I just get periodic reports), but
I don't see the point of this patch.  I assume "bt" can still be NULL
(otherwise there'd be no point in testing it) and we still reference
"bt->pid" below.

> On 03/02/2020 12:41 PM, Bjorn Helgaas wrote:
> > On Thu, Feb 06, 2020 at 03:28:12PM +0100, Jan Kara wrote:
> >
> >> @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
> >>   	}
> >>
> >>   	ret = 0;
> >> -	if (q->blk_trace == NULL)
> >> +	if (bt == NULL)
> >>   		ret = blk_trace_setup_queue(q, bdev);
> >>
> >>   	if (ret == 0) {
> >>   		if (attr == &dev_attr_act_mask)
> >> -			q->blk_trace->act_mask = value;
> >> +			bt->act_mask = value;
> >
> > You've likely seen this already, but Coverity complains that "bt" may
> > be a NULL pointer here, since we checked it for NULL above.
> >
> >    CID 1460458:  Null pointer dereferences  (FORWARD_NULL)
> >
> >>   		else if (attr == &dev_attr_pid)
> >> -			q->blk_trace->pid = value;
> >> +			bt->pid = value;
> >>   		else if (attr == &dev_attr_start_lba)
> >> -			q->blk_trace->start_lba = value;
> >> +			bt->start_lba = value;
> >>   		else if (attr == &dev_attr_end_lba)
> >> -			q->blk_trace->end_lba = value;
> >> +			bt->end_lba = value;
> >>   	}
> >>
> >>   out_unlock_bdev:
> >>
> >
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-02 21:19   ` Chaitanya Kulkarni
  2020-03-02 21:59     ` Bjorn Helgaas
@ 2020-03-02 22:06     ` Keith Busch
  2020-03-03 11:07       ` Cengiz Can
  1 sibling, 1 reply; 18+ messages in thread
From: Keith Busch @ 2020-03-02 22:06 UTC (permalink / raw)
  To: Chaitanya Kulkarni
  Cc: Bjorn Helgaas, Jan Kara, Jens Axboe, linux-block, tristmd, stable

On Mon, Mar 02, 2020 at 09:19:33PM +0000, Chaitanya Kulkarni wrote:
> By any chance will the following patch be able to get rid of
> the warning ?
> 
> index 4560878f0bac..889555910555 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -1895,9 +1895,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct 
> device *dev,
>                  goto out_unlock_bdev;
>          }
> 
> -       ret = 0;
> -       if (bt == NULL)
> -               ret = blk_trace_setup_queue(q, bdev);
> +       ret = bt == NULL ? blk_trace_setup_queue(q, bdev) : 0;
> 
>          if (ret == 0) {
>                  if (attr == &dev_attr_act_mask)

That looks the same thing as what created the warning, just more
compact.

I think the right thing to do is update bt to the one allocated from a
successful blk_trace_setup_queue().

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-02 22:06     ` Keith Busch
@ 2020-03-03 11:07       ` Cengiz Can
  2020-03-03 12:17         ` Greg KH
  2020-03-05  1:51         ` Ming Lei
  0 siblings, 2 replies; 18+ messages in thread
From: Cengiz Can @ 2020-03-03 11:07 UTC (permalink / raw)
  To: kbusch
  Cc: Chaitanya.Kulkarni, axboe, helgaas, jack, linux-block, stable,
	tristmd, Cengiz Can

Added a reassignment into the NULL check block to fix the issue.

Fixes: c780e86dd48 ("blktrace: Protect q->blk_trace with RCU")

Signed-off-by: Cengiz Can <cengiz@kernel.wtf>
---
 kernel/trace/blktrace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 4560878f0bac..29ea88f10b87 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1896,8 +1896,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	}

 	ret = 0;
-	if (bt == NULL)
+	if (bt == NULL) {
 		ret = blk_trace_setup_queue(q, bdev);
+		bt = q->blk_trace;
+	}

 	if (ret == 0) {
 		if (attr == &dev_attr_act_mask)
--
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-03 11:07       ` Cengiz Can
@ 2020-03-03 12:17         ` Greg KH
  2020-03-05  1:51         ` Ming Lei
  1 sibling, 0 replies; 18+ messages in thread
From: Greg KH @ 2020-03-03 12:17 UTC (permalink / raw)
  To: Cengiz Can
  Cc: kbusch, Chaitanya.Kulkarni, axboe, helgaas, jack, linux-block,
	stable, tristmd

On Tue, Mar 03, 2020 at 02:07:32PM +0300, Cengiz Can wrote:
> Added a reassignment into the NULL check block to fix the issue.
> 
> Fixes: c780e86dd48 ("blktrace: Protect q->blk_trace with RCU")
> 
> Signed-off-by: Cengiz Can <cengiz@kernel.wtf>
> ---
>  kernel/trace/blktrace.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index 4560878f0bac..29ea88f10b87 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -1896,8 +1896,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>  	}
> 
>  	ret = 0;
> -	if (bt == NULL)
> +	if (bt == NULL) {
>  		ret = blk_trace_setup_queue(q, bdev);
> +		bt = q->blk_trace;
> +	}
> 
>  	if (ret == 0) {
>  		if (attr == &dev_attr_act_mask)
> --
> 2.25.1
>

<formletter>

This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read:
    https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.

</formletter>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-03 11:07       ` Cengiz Can
  2020-03-03 12:17         ` Greg KH
@ 2020-03-05  1:51         ` Ming Lei
  2020-03-05  4:27           ` Cengiz Can
  1 sibling, 1 reply; 18+ messages in thread
From: Ming Lei @ 2020-03-05  1:51 UTC (permalink / raw)
  To: Cengiz Can
  Cc: Keith Busch, Chaitanya Kulkarni, Jens Axboe, Bjorn Helgaas,
	Jan Kara, linux-block, stable, tristmd

On Tue, Mar 3, 2020 at 8:37 PM Cengiz Can <cengiz@kernel.wtf> wrote:
>
> Added a reassignment into the NULL check block to fix the issue.
>
> Fixes: c780e86dd48 ("blktrace: Protect q->blk_trace with RCU")
>
> Signed-off-by: Cengiz Can <cengiz@kernel.wtf>
> ---
>  kernel/trace/blktrace.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index 4560878f0bac..29ea88f10b87 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -1896,8 +1896,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
>         }
>
>         ret = 0;
> -       if (bt == NULL)
> +       if (bt == NULL) {
>                 ret = blk_trace_setup_queue(q, bdev);
> +               bt = q->blk_trace;

I'd suggest to use the following line for avoiding RCU warning:

   bt = rcu_dereference_protected(q->blk_trace,
                                       lockdep_is_held(&q->blk_trace_mutex));

Otherwise, the patch looks fine for me:

Reviewed-by: Ming Lei <ming.lei@redhat.com>

Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] blktrace: Protect q->blk_trace with RCU
  2020-03-05  1:51         ` Ming Lei
@ 2020-03-05  4:27           ` Cengiz Can
  0 siblings, 0 replies; 18+ messages in thread
From: Cengiz Can @ 2020-03-05  4:27 UTC (permalink / raw)
  To: Ming Lei
  Cc: Keith Busch, Chaitanya Kulkarni, Jens Axboe, Bjorn Helgaas,
	Jan Kara, linux-block, stable, tristmd



On March 5, 2020 04:51:51 Ming Lei <tom.leiming@gmail.com> wrote:

> On Tue, Mar 3, 2020 at 8:37 PM Cengiz Can <cengiz@kernel.wtf> wrote:
>>
>> Added a reassignment into the NULL check block to fix the issue.
>>
>> Fixes: c780e86dd48 ("blktrace: Protect q->blk_trace with RCU")
>>
>> Signed-off-by: Cengiz Can <cengiz@kernel.wtf>
>> ---
>> kernel/trace/blktrace.c | 4 +++-
>> 1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
>> index 4560878f0bac..29ea88f10b87 100644
>> --- a/kernel/trace/blktrace.c
>> +++ b/kernel/trace/blktrace.c
>> @@ -1896,8 +1896,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct 
>> device *dev,
>> }
>>
>> ret = 0;
>> -       if (bt == NULL)
>> +       if (bt == NULL) {
>>        ret = blk_trace_setup_queue(q, bdev);
>> +               bt = q->blk_trace;
>
> I'd suggest to use the following line for avoiding RCU warning:
>
>   bt = rcu_dereference_protected(q->blk_trace,
>                                       lockdep_is_held(&q->blk_trace_mutex));
>
> Otherwise, the patch looks fine for me:

Thank you.

Please kindly see v2.

>
> Reviewed-by: Ming Lei <ming.lei@redhat.com>
>
> Thanks,
> Ming Lei




^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-03-05  4:27 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-06 14:28 [PATCH] blktrace: Protect q->blk_trace with RCU Jan Kara
2020-02-06 18:46 ` Chaitanya Kulkarni
2020-02-06 18:49   ` Jens Axboe
2020-02-06 19:37     ` Chaitanya Kulkarni
2020-02-10  0:38 ` Chaitanya Kulkarni
2020-02-10  2:19 ` Ming Lei
2020-02-10  3:54 ` Bart Van Assche
2020-02-19 12:59 ` Jan Kara
2020-02-25 10:20   ` Ming Lei
2020-02-25 15:40     ` Jens Axboe
2020-03-02 20:40 ` Bjorn Helgaas
2020-03-02 21:19   ` Chaitanya Kulkarni
2020-03-02 21:59     ` Bjorn Helgaas
2020-03-02 22:06     ` Keith Busch
2020-03-03 11:07       ` Cengiz Can
2020-03-03 12:17         ` Greg KH
2020-03-05  1:51         ` Ming Lei
2020-03-05  4:27           ` Cengiz Can

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).