* [PATCH v3 1/5] blk-mq: add a new interface to get request by tag
2021-08-24 14:12 [PATCH v3 0/5] fix request uaf in nbd_read_stat() Yu Kuai
@ 2021-08-24 14:12 ` Yu Kuai
2021-08-24 14:12 ` [PATCH v3 2/5] nbd: convert to use blk_mq_get_rq_by_tag() Yu Kuai
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Yu Kuai @ 2021-08-24 14:12 UTC (permalink / raw)
To: axboe, josef, ming.lei, bvanassche
Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
Ming Lei had fixed the request uaf while iterating tags in
commit bd63141d585b ("blk-mq: clear stale request in tags->rq[] before
freeing one request pool").
However, hctx->tags->rq[] will point to hctx->sched_tags->static_rq[]
in blk_mq_get_driver_tag(), and blk_mq_tag_to_rq() can access such
request in some drivers. Generally it won't be a problem if the
driver can make sure to get drivet tag before calling
blk_mq_tag_to_rq(). However, nbd will do such thing once it receive
a reply message from server, and there isn't any mechanism to
protect that it won't handle the reply message without a corresponding
request message.
Thus add new interface to make sure it won't return a freed request,
and then nbd can check if it had sent the corresponding request message.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-mq-tag.c | 37 +++++++++++++++++++++++++++++++++++++
block/blk-mq.c | 1 +
block/blk-mq.h | 1 -
include/linux/blk-mq.h | 4 ++++
4 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 86f87346232a..ddb159414661 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -652,3 +652,40 @@ u32 blk_mq_unique_tag(struct request *rq)
(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
}
EXPORT_SYMBOL(blk_mq_unique_tag);
+
+
+/**
+ * blk_mq_get_rq_by_tag - if the request that is represented by the tag is
+ * not idle, increment it's reference and then return it. Otherwise return
+ * NULL.
+ *
+ * @tags: the tags we are looking from
+ * @tag: the tag that represents the request
+ */
+struct request *blk_mq_get_rq_by_tag(struct blk_mq_tags *tags,
+ unsigned int tag)
+{
+ unsigned long flags;
+ struct request *rq;
+
+ /* hold lock to prevent accessing freed request by tag */
+ spin_lock_irqsave(&tags->lock, flags);
+ rq = blk_mq_tag_to_rq(tags, tag);
+ if (!rq)
+ goto out_unlock;
+
+ if (!refcount_inc_not_zero(&rq->ref)) {
+ rq = NULL;
+ goto out_unlock;
+ }
+
+ if (!blk_mq_request_started(rq)) {
+ blk_mq_put_rq_ref(rq);
+ rq = NULL;
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&tags->lock, flags);
+ return rq;
+}
+EXPORT_SYMBOL(blk_mq_get_rq_by_tag);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0b3d3e2acb6a..c756a26ed92d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -916,6 +916,7 @@ void blk_mq_put_rq_ref(struct request *rq)
else if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq);
}
+EXPORT_SYMBOL_GPL(blk_mq_put_rq_ref);
static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d08779f77a26..20ef743a3ff6 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -47,7 +47,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start);
-void blk_mq_put_rq_ref(struct request *rq);
/*
* Internal helpers for allocating/freeing the request map
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 22215db36122..ccd8fc4a0bdb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -641,4 +641,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio);
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
struct lock_class_key *key);
+struct request *blk_mq_get_rq_by_tag(struct blk_mq_tags *tags,
+ unsigned int tag);
+void blk_mq_put_rq_ref(struct request *rq);
+
#endif
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 2/5] nbd: convert to use blk_mq_get_rq_by_tag()
2021-08-24 14:12 [PATCH v3 0/5] fix request uaf in nbd_read_stat() Yu Kuai
2021-08-24 14:12 ` [PATCH v3 1/5] blk-mq: add a new interface to get request by tag Yu Kuai
@ 2021-08-24 14:12 ` Yu Kuai
2021-08-24 14:12 ` [PATCH v3 3/5] nbd: don't handle response without a corresponding request message Yu Kuai
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Yu Kuai @ 2021-08-24 14:12 UTC (permalink / raw)
To: axboe, josef, ming.lei, bvanassche
Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
blk_mq_tag_to_rq() can only ensure to return valid request in
following situation:
1) client send request message to server first
submit_bio
...
blk_mq_get_tag
...
blk_mq_get_driver_tag
...
nbd_queue_rq
nbd_handle_cmd
nbd_send_cmd
2) client receive respond message from server
recv_work
nbd_read_stat
blk_mq_tag_to_rq
If step 1) is missing, blk_mq_tag_to_rq() will return a stale
request, which might be freed. Thus convert to use
blk_mq_get_rq_by_tag() to make sure the returned request is not
freed. However, there are still some problems if the request is
started, and this will be fixed in later patches.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 76983185a9a5..ca54a0736090 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -733,11 +733,10 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
- req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
- blk_mq_unique_tag_to_tag(tag));
- if (!req || !blk_mq_request_started(req)) {
- dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
- tag, req);
+ req = blk_mq_get_rq_by_tag(nbd->tag_set.tags[hwq],
+ blk_mq_unique_tag_to_tag(tag));
+ if (!req) {
+ dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d\n", tag);
return ERR_PTR(-ENOENT);
}
trace_nbd_header_received(req, handle);
@@ -799,6 +798,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
}
out:
trace_nbd_payload_received(req, handle);
+ if (req)
+ blk_mq_put_rq_ref(req);
mutex_unlock(&cmd->lock);
return ret ? ERR_PTR(ret) : cmd;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 3/5] nbd: don't handle response without a corresponding request message
2021-08-24 14:12 [PATCH v3 0/5] fix request uaf in nbd_read_stat() Yu Kuai
2021-08-24 14:12 ` [PATCH v3 1/5] blk-mq: add a new interface to get request by tag Yu Kuai
2021-08-24 14:12 ` [PATCH v3 2/5] nbd: convert to use blk_mq_get_rq_by_tag() Yu Kuai
@ 2021-08-24 14:12 ` Yu Kuai
2021-08-24 14:12 ` [PATCH v3 4/5] nbd: make sure request completion won't concurrent Yu Kuai
2021-08-24 14:12 ` [PATCH v3 5/5] nbd: don't start request if nbd_queue_rq() failed Yu Kuai
4 siblings, 0 replies; 6+ messages in thread
From: Yu Kuai @ 2021-08-24 14:12 UTC (permalink / raw)
To: axboe, josef, ming.lei, bvanassche
Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
While handling a response message from server, nbd_read_stat() will
try to get request by tag, and then complete the request. However,
this is problematic if nbd haven't sent a corresponding request
message:
t1 t2
submit_bio
nbd_queue_rq
blk_mq_start_request
recv_work
nbd_read_stat
blk_mq_get_rq_by_tag
blk_mq_complete_request
nbd_send_cmd
Thus add a new cmd flag 'NBD_CMD_INFLIGHT', it will be set in
nbd_send_cmd() and checked in nbd_read_stat().
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ca54a0736090..7b9e19675224 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -127,6 +127,11 @@ struct nbd_device {
};
#define NBD_CMD_REQUEUED 1
+/*
+ * This flag will be set if nbd_send_cmd() succeed, and will be checked in
+ * normal completion.
+ */
+#define NBD_CMD_INFLIGHT 2
struct nbd_cmd {
struct nbd_device *nbd;
@@ -743,6 +748,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "NBD_CMD_INFLIGHT is not set %d\n",
+ tag);
+ ret = -ENOENT;
+ goto out;
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
@@ -980,6 +991,8 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
* returns EAGAIN can be retried on a different socket.
*/
ret = nbd_send_cmd(nbd, cmd, index);
+ if (!ret)
+ set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
if (ret == -EAGAIN) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 4/5] nbd: make sure request completion won't concurrent
2021-08-24 14:12 [PATCH v3 0/5] fix request uaf in nbd_read_stat() Yu Kuai
` (2 preceding siblings ...)
2021-08-24 14:12 ` [PATCH v3 3/5] nbd: don't handle response without a corresponding request message Yu Kuai
@ 2021-08-24 14:12 ` Yu Kuai
2021-08-24 14:12 ` [PATCH v3 5/5] nbd: don't start request if nbd_queue_rq() failed Yu Kuai
4 siblings, 0 replies; 6+ messages in thread
From: Yu Kuai @ 2021-08-24 14:12 UTC (permalink / raw)
To: axboe, josef, ming.lei, bvanassche
Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
commit cddce0116058 ("nbd: Aovid double completion of a request")
try to fix that nbd_clear_que() and recv_work() can complete a
request concurrently. However, the problem still exists:
t1 t2 t3
nbd_disconnect_and_put
flush_workqueue
recv_work
blk_mq_complete_request
blk_mq_complete_request_remote -> this is true
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE)
blk_mq_raise_softirq
blk_done_softirq
blk_complete_reqs
nbd_complete_rq
blk_mq_end_request
blk_mq_free_request
WRITE_ONCE(rq->state, MQ_RQ_IDLE)
nbd_clear_que
blk_mq_tagset_busy_iter
nbd_clear_req
__blk_mq_free_request
blk_mq_put_tag
blk_mq_complete_request
There are three places where request can be completed in nbd:
recv_work(), nbd_clear_que() and nbd_xmit_timeout(). Since they
all hold cmd->lock before completing the request, it's easy to
avoid the problem by setting and checking a cmd flag.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7b9e19675224..4d5098d01758 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -416,12 +416,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
+ bool need_complete;
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
+ need_complete =
+ test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
goto done;
}
@@ -490,11 +493,13 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
+ need_complete = test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
done:
- blk_mq_complete_request(req);
+ if (need_complete)
+ blk_mq_complete_request(req);
return BLK_EH_DONE;
}
@@ -849,6 +854,7 @@ static void recv_work(struct work_struct *work)
static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+ bool need_complete;
/* don't abort one completed request */
if (blk_mq_request_completed(req))
@@ -856,9 +862,11 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
+ need_complete = test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
- blk_mq_complete_request(req);
+ if (need_complete)
+ blk_mq_complete_request(req);
return true;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 5/5] nbd: don't start request if nbd_queue_rq() failed
2021-08-24 14:12 [PATCH v3 0/5] fix request uaf in nbd_read_stat() Yu Kuai
` (3 preceding siblings ...)
2021-08-24 14:12 ` [PATCH v3 4/5] nbd: make sure request completion won't concurrent Yu Kuai
@ 2021-08-24 14:12 ` Yu Kuai
4 siblings, 0 replies; 6+ messages in thread
From: Yu Kuai @ 2021-08-24 14:12 UTC (permalink / raw)
To: axboe, josef, ming.lei, bvanassche
Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
Currently, blk_mq_end_request() will be called if nbd_queue_rq()
failed, thus start request in such situation is useless.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4d5098d01758..c22dbb9b5065 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -944,7 +944,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
if (!refcount_inc_not_zero(&nbd->config_refs)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Socks array is empty\n");
- blk_mq_start_request(req);
return -EINVAL;
}
config = nbd->config;
@@ -953,7 +952,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n");
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EINVAL;
}
cmd->status = BLK_STS_OK;
@@ -977,7 +975,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
*/
sock_shutdown(nbd);
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EIO;
}
goto again;
--
2.31.1
^ permalink raw reply related [flat|nested] 6+ messages in thread