From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>,
"jianchao.wang" <jianchao.w.wang@oracle.com>,
"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>
Subject: Re: v4.20-rc6: Sporadic use-after-free in bt_iter()
Date: Thu, 20 Dec 2018 12:56:02 -0800 [thread overview]
Message-ID: <1545339362.185366.511.camel@acm.org> (raw)
In-Reply-To: <b9c3bd7c-9e82-7300-d525-0bd72ed66f66@kernel.dk>
On Thu, 2018-12-20 at 11:33 -0700, Jens Axboe wrote:
> [ ... ]
> Something like this, totally untested. If the queue matches, we know it's
> safe to dereference it.
>
> A safer API to export as well...
>
>
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 2089c6c62f44..78192b544fa2 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
>
> if (!reserved)
> bitnr += tags->nr_reserved_tags;
> - rq = tags->rqs[bitnr];
> + if (tags->rqs[bitnr].queue != hctx->queue)
> + return true;
>
> /*
> * We can hit rq == NULL here, because the tagging functions
> * test and set the bit before assigning ->rqs[].
> */
> - if (rq && rq->q == hctx->queue)
> + rq = tags->rqs[bitnr].rq;
> + if (rq)
> return iter_data->fn(hctx, rq, iter_data->data, reserved);
> return true;
> }
> @@ -268,6 +270,7 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
>
> struct bt_tags_iter_data {
> struct blk_mq_tags *tags;
> + struct blk_mq_hw_ctx *hctx;
> busy_tag_iter_fn *fn;
> void *data;
> bool reserved;
> @@ -287,7 +290,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
> * We can hit rq == NULL here, because the tagging functions
> * test and set the bit before assining ->rqs[].
> */
> - rq = tags->rqs[bitnr];
> + rq = tags->rqs[bitnr].rq;
> if (rq && blk_mq_request_started(rq))
> return iter_data->fn(rq, iter_data->data, reserved);
>
> diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
> index 61deab0b5a5a..bb84d1f099c7 100644
> --- a/block/blk-mq-tag.h
> +++ b/block/blk-mq-tag.h
> @@ -4,6 +4,11 @@
>
> #include "blk-mq.h"
>
> +struct rq_tag_entry {
> + struct request_queue *queue;
> + struct request *rq;
> +};
> +
> /*
> * Tag address space map.
> */
> @@ -16,7 +21,7 @@ struct blk_mq_tags {
> struct sbitmap_queue bitmap_tags;
> struct sbitmap_queue breserved_tags;
>
> - struct request **rqs;
> + struct rq_tag_entry *rqs;
> struct request **static_rqs;
> struct list_head page_list;
> };
> @@ -78,7 +83,8 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
> static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
> unsigned int tag, struct request *rq)
> {
> - hctx->tags->rqs[tag] = rq;
> + hctx->tags->rqs[tag].queue = hctx->queue;
> + hctx->tags->rqs[tag].rq = rq;
> }
>
> static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ba37b9e15e9..4f194946dbd9 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -298,13 +298,16 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> rq->tag = -1;
> rq->internal_tag = tag;
> } else {
> - if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
> + struct blk_mq_hw_ctx *hctx = data->hctx;
> +
> + if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
> rq_flags = RQF_MQ_INFLIGHT;
> - atomic_inc(&data->hctx->nr_active);
> + atomic_inc(&hctx->nr_active);
> }
> rq->tag = tag;
> rq->internal_tag = -1;
> - data->hctx->tags->rqs[rq->tag] = rq;
> + hctx->tags->rqs[rq->tag].queue = hctx->queue;
> + hctx->tags->rqs[rq->tag].rq = rq;
> }
>
> /* csd/requeue_work/fifo_time is initialized before use */
> @@ -797,8 +800,8 @@ EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
> {
> if (tag < tags->nr_tags) {
> - prefetch(tags->rqs[tag]);
> - return tags->rqs[tag];
> + prefetch(tags->rqs[tag].rq);
> + return tags->rqs[tag].rq;
> }
>
> return NULL;
> @@ -809,10 +812,11 @@ static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
> void *priv, bool reserved)
> {
> /*
> - * If we find a request that is inflight and the queue matches,
> - * we know the queue is busy. Return false to stop the iteration.
> + * We're only called here if the queue matches. If the rq state is
> + * inflight, we know the queue is busy. Return false to stop the
> + * iteration.
> */
> - if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
> + if (rq->state == MQ_RQ_IN_FLIGHT) {
> bool *busy = priv;
>
> *busy = true;
> @@ -1049,11 +1053,14 @@ bool blk_mq_get_driver_tag(struct request *rq)
> shared = blk_mq_tag_busy(data.hctx);
> rq->tag = blk_mq_get_tag(&data);
> if (rq->tag >= 0) {
> + struct blk_mq_hw_ctx *hctx = data.hctx;
> +
> if (shared) {
> rq->rq_flags |= RQF_MQ_INFLIGHT;
> atomic_inc(&data.hctx->nr_active);
> }
> - data.hctx->tags->rqs[rq->tag] = rq;
> + hctx->tags->rqs[rq->tag].queue = hctx->queue;
> + hctx->tags->rqs[rq->tag].rq = rq;
> }
>
> done:
> @@ -2069,7 +2076,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
> if (!tags)
> return NULL;
>
> - tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
> + tags->rqs = kcalloc_node(nr_tags, sizeof(struct rq_tag_entry),
> GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
> node);
> if (!tags->rqs) {
Hi Jens,
How about the patch below? Its behavior should be similar to your patch but I think
this patch is simpler.
Thanks,
Bart.
---
block/blk-mq.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6a7566244de3..c57611b1f2a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -86,6 +86,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
}
struct mq_inflight {
+ struct request_queue *q;
struct hd_struct *part;
unsigned int *inflight;
};
@@ -96,6 +97,9 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
+ if (rq->q != mi->q)
+ return;
+
/*
* index[0] counts the specific partition that was asked for. index[1]
* counts the ones that are active on the whole device, so increment
@@ -110,7 +114,11 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = {
+ .q = q,
+ .part = part,
+ .inflight = inflight,
+ };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
@@ -129,7 +137,11 @@ static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = {
+ .q = q,
+ .part = part,
+ .inflight = inflight,
+ };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
@@ -477,6 +489,8 @@ static void __blk_mq_free_request(struct request *rq)
const int sched_tag = rq->internal_tag;
blk_pm_mark_last_busy(rq);
+ /* Avoid that blk_mq_queue_tag_busy_iter() picks up this request. */
+ rq->q = NULL;
if (rq->tag != -1)
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
if (sched_tag != -1)
next prev parent reply other threads:[~2018-12-20 20:56 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-19 23:24 v4.20-rc6: Sporadic use-after-free in bt_iter() Bart Van Assche
2018-12-19 23:27 ` Jens Axboe
2018-12-20 0:16 ` Bart Van Assche
2018-12-20 3:17 ` Jens Axboe
2018-12-20 3:24 ` jianchao.wang
2018-12-20 4:19 ` Jens Axboe
2018-12-20 4:32 ` jianchao.wang
2018-12-20 4:48 ` Jens Axboe
2018-12-20 5:03 ` jianchao.wang
2018-12-20 13:02 ` Jens Axboe
2018-12-20 13:07 ` Jens Axboe
2018-12-20 18:01 ` Bart Van Assche
2018-12-20 18:21 ` Jens Axboe
2018-12-20 18:33 ` Jens Axboe
2018-12-20 20:56 ` Bart Van Assche [this message]
2018-12-20 21:00 ` Jens Axboe
2018-12-20 21:23 ` Bart Van Assche
2018-12-20 21:26 ` Jens Axboe
2018-12-20 21:31 ` Bart Van Assche
2018-12-20 21:34 ` Jens Axboe
2018-12-20 21:40 ` Bart Van Assche
2018-12-20 21:44 ` Jens Axboe
2018-12-20 21:48 ` Jens Axboe
2018-12-20 22:19 ` Bart Van Assche
2018-12-20 22:23 ` Jens Axboe
2018-12-20 22:33 ` Jens Axboe
2018-12-20 22:47 ` Jens Axboe
2018-12-20 22:50 ` Jens Axboe
2019-02-14 23:36 ` Bart Van Assche
2019-02-15 18:29 ` Evan Green
2019-02-19 16:48 ` Bart Van Assche
2019-02-21 20:54 ` Evan Green
2019-02-15 2:57 ` jianchao.wang
2018-12-20 4:06 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1545339362.185366.511.camel@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=jianchao.w.wang@oracle.com \
--cc=linux-block@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).