linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>,
	"jianchao.wang" <jianchao.w.wang@oracle.com>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>
Subject: Re: v4.20-rc6: Sporadic use-after-free in bt_iter()
Date: Thu, 20 Dec 2018 12:56:02 -0800	[thread overview]
Message-ID: <1545339362.185366.511.camel@acm.org> (raw)
In-Reply-To: <b9c3bd7c-9e82-7300-d525-0bd72ed66f66@kernel.dk>

On Thu, 2018-12-20 at 11:33 -0700, Jens Axboe wrote:
> [ ... ]
> Something like this, totally untested. If the queue matches, we know it's
> safe to dereference it.
> 
> A safer API to export as well...
> 
> 
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 2089c6c62f44..78192b544fa2 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
>  
>  	if (!reserved)
>  		bitnr += tags->nr_reserved_tags;
> -	rq = tags->rqs[bitnr];
> +	if (tags->rqs[bitnr].queue != hctx->queue)
> +		return true;
>  
>  	/*
>  	 * We can hit rq == NULL here, because the tagging functions
>  	 * test and set the bit before assigning ->rqs[].
>  	 */
> -	if (rq && rq->q == hctx->queue)
> +	rq = tags->rqs[bitnr].rq;
> +	if (rq)
>  		return iter_data->fn(hctx, rq, iter_data->data, reserved);
>  	return true;
>  }
> @@ -268,6 +270,7 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
>  
>  struct bt_tags_iter_data {
>  	struct blk_mq_tags *tags;
> +	struct blk_mq_hw_ctx *hctx;
>  	busy_tag_iter_fn *fn;
>  	void *data;
>  	bool reserved;
> @@ -287,7 +290,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
>  	 * We can hit rq == NULL here, because the tagging functions
>  	 * test and set the bit before assining ->rqs[].
>  	 */
> -	rq = tags->rqs[bitnr];
> +	rq = tags->rqs[bitnr].rq;
>  	if (rq && blk_mq_request_started(rq))
>  		return iter_data->fn(rq, iter_data->data, reserved);
>  
> diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
> index 61deab0b5a5a..bb84d1f099c7 100644
> --- a/block/blk-mq-tag.h
> +++ b/block/blk-mq-tag.h
> @@ -4,6 +4,11 @@
>  
>  #include "blk-mq.h"
>  
> +struct rq_tag_entry {
> +	struct request_queue *queue;
> +	struct request *rq;
> +};
> +
>  /*
>   * Tag address space map.
>   */
> @@ -16,7 +21,7 @@ struct blk_mq_tags {
>  	struct sbitmap_queue bitmap_tags;
>  	struct sbitmap_queue breserved_tags;
>  
> -	struct request **rqs;
> +	struct rq_tag_entry *rqs;
>  	struct request **static_rqs;
>  	struct list_head page_list;
>  };
> @@ -78,7 +83,8 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>  static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
>  		unsigned int tag, struct request *rq)
>  {
> -	hctx->tags->rqs[tag] = rq;
> +	hctx->tags->rqs[tag].queue = hctx->queue;
> +	hctx->tags->rqs[tag].rq = rq;
>  }
>  
>  static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ba37b9e15e9..4f194946dbd9 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -298,13 +298,16 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>  		rq->tag = -1;
>  		rq->internal_tag = tag;
>  	} else {
> -		if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
> +		struct blk_mq_hw_ctx *hctx = data->hctx;
> +
> +		if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
>  			rq_flags = RQF_MQ_INFLIGHT;
> -			atomic_inc(&data->hctx->nr_active);
> +			atomic_inc(&hctx->nr_active);
>  		}
>  		rq->tag = tag;
>  		rq->internal_tag = -1;
> -		data->hctx->tags->rqs[rq->tag] = rq;
> +		hctx->tags->rqs[rq->tag].queue = hctx->queue;
> +		hctx->tags->rqs[rq->tag].rq = rq;
>  	}
>  
>  	/* csd/requeue_work/fifo_time is initialized before use */
> @@ -797,8 +800,8 @@ EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
>  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>  {
>  	if (tag < tags->nr_tags) {
> -		prefetch(tags->rqs[tag]);
> -		return tags->rqs[tag];
> +		prefetch(tags->rqs[tag].rq);
> +		return tags->rqs[tag].rq;
>  	}
>  
>  	return NULL;
> @@ -809,10 +812,11 @@ static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  			       void *priv, bool reserved)
>  {
>  	/*
> -	 * If we find a request that is inflight and the queue matches,
> -	 * we know the queue is busy. Return false to stop the iteration.
> +	 * We're only called here if the queue matches. If the rq state is
> +	 * inflight, we know the queue is busy. Return false to stop the
> +	 * iteration.
>  	 */
> -	if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
> +	if (rq->state == MQ_RQ_IN_FLIGHT) {
>  		bool *busy = priv;
>  
>  		*busy = true;
> @@ -1049,11 +1053,14 @@ bool blk_mq_get_driver_tag(struct request *rq)
>  	shared = blk_mq_tag_busy(data.hctx);
>  	rq->tag = blk_mq_get_tag(&data);
>  	if (rq->tag >= 0) {
> +		struct blk_mq_hw_ctx *hctx = data.hctx;
> +
>  		if (shared) {
>  			rq->rq_flags |= RQF_MQ_INFLIGHT;
>  			atomic_inc(&data.hctx->nr_active);
>  		}
> -		data.hctx->tags->rqs[rq->tag] = rq;
> +		hctx->tags->rqs[rq->tag].queue = hctx->queue;
> +		hctx->tags->rqs[rq->tag].rq = rq;
>  	}
>  
>  done:
> @@ -2069,7 +2076,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
>  	if (!tags)
>  		return NULL;
>  
> -	tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
> +	tags->rqs = kcalloc_node(nr_tags, sizeof(struct rq_tag_entry),
>  				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
>  				 node);
>  	if (!tags->rqs) {

Hi Jens,

How about the patch below? Its behavior should be similar to your patch but I think
this patch is simpler.

Thanks,

Bart.


---
 block/blk-mq.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6a7566244de3..c57611b1f2a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -86,6 +86,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 }
 
 struct mq_inflight {
+	struct request_queue *q;
 	struct hd_struct *part;
 	unsigned int *inflight;
 };
@@ -96,6 +97,9 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
 {
 	struct mq_inflight *mi = priv;
 
+	if (rq->q != mi->q)
+		return;
+
 	/*
 	 * index[0] counts the specific partition that was asked for. index[1]
 	 * counts the ones that are active on the whole device, so increment
@@ -110,7 +114,11 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
 void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
 		      unsigned int inflight[2])
 {
-	struct mq_inflight mi = { .part = part, .inflight = inflight, };
+	struct mq_inflight mi = {
+		.q = q,
+		.part = part,
+		.inflight = inflight,
+	};
 
 	inflight[0] = inflight[1] = 0;
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
@@ -129,7 +137,11 @@ static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
 void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
 			 unsigned int inflight[2])
 {
-	struct mq_inflight mi = { .part = part, .inflight = inflight, };
+	struct mq_inflight mi = {
+		.q = q,
+		.part = part,
+		.inflight = inflight,
+	};
 
 	inflight[0] = inflight[1] = 0;
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
@@ -477,6 +489,8 @@ static void __blk_mq_free_request(struct request *rq)
 	const int sched_tag = rq->internal_tag;
 
 	blk_pm_mark_last_busy(rq);
+	/* Avoid that blk_mq_queue_tag_busy_iter() picks up this request. */
+	rq->q = NULL;
 	if (rq->tag != -1)
 		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
 	if (sched_tag != -1)



  reply	other threads:[~2018-12-20 20:56 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-19 23:24 v4.20-rc6: Sporadic use-after-free in bt_iter() Bart Van Assche
2018-12-19 23:27 ` Jens Axboe
2018-12-20  0:16   ` Bart Van Assche
2018-12-20  3:17     ` Jens Axboe
2018-12-20  3:24       ` jianchao.wang
2018-12-20  4:19         ` Jens Axboe
2018-12-20  4:32           ` jianchao.wang
2018-12-20  4:48             ` Jens Axboe
2018-12-20  5:03               ` jianchao.wang
2018-12-20 13:02                 ` Jens Axboe
2018-12-20 13:07                   ` Jens Axboe
2018-12-20 18:01                     ` Bart Van Assche
2018-12-20 18:21                       ` Jens Axboe
2018-12-20 18:33                         ` Jens Axboe
2018-12-20 20:56                           ` Bart Van Assche [this message]
2018-12-20 21:00                             ` Jens Axboe
2018-12-20 21:23                               ` Bart Van Assche
2018-12-20 21:26                                 ` Jens Axboe
2018-12-20 21:31                                   ` Bart Van Assche
2018-12-20 21:34                                     ` Jens Axboe
2018-12-20 21:40                                       ` Bart Van Assche
2018-12-20 21:44                                         ` Jens Axboe
2018-12-20 21:48                                           ` Jens Axboe
2018-12-20 22:19                                             ` Bart Van Assche
2018-12-20 22:23                                               ` Jens Axboe
2018-12-20 22:33                                                 ` Jens Axboe
2018-12-20 22:47                                                   ` Jens Axboe
2018-12-20 22:50                                                     ` Jens Axboe
2019-02-14 23:36                                                       ` Bart Van Assche
2019-02-15 18:29                                                         ` Evan Green
2019-02-19 16:48                                                           ` Bart Van Assche
2019-02-21 20:54                                                             ` Evan Green
2019-02-15  2:57                                                       ` jianchao.wang
2018-12-20  4:06 ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1545339362.185366.511.camel@acm.org \
    --to=bvanassche@acm.org \
    --cc=axboe@kernel.dk \
    --cc=jianchao.w.wang@oracle.com \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).