* [PATCH 4.21 V3] blk-mq: not embed .mq_kobj and ctx->kobj into queue instance
@ 2018-11-20 1:44 Ming Lei
2018-11-20 7:51 ` Greg Kroah-Hartman
0 siblings, 1 reply; 4+ messages in thread
From: Ming Lei @ 2018-11-20 1:44 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, Ming Lei, jianchao.wang, Guenter Roeck,
Greg Kroah-Hartman, stable
Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime
from block layer's view, actually they don't because userspace may
grab one kobject anytime via sysfs.
This patch fixes the issue by the following approach:
1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing
all ctxs
2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release
handler of .mq_kobj
3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that
.mq_kobj is always released after all ctxs are freed.
This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE
is enabled.
Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: "jianchao.wang" <jianchao.w.wang@oracle.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V3:
- keep to allocate q->queue_ctx via percpu allocator, so one extra
pointer reference can be saved for getting ctx
V2:
- allocate 'blk_mq_ctx' inside blk_mq_init_allocated_queue()
- allocate q->mq_kobj directly
block/blk-mq-sysfs.c | 34 ++++++++++++++++++++++++----------
block/blk-mq.c | 39 ++++++++++++++++++++++++++++++++-------
block/blk-mq.h | 6 ++++++
include/linux/blkdev.h | 2 +-
4 files changed, 63 insertions(+), 18 deletions(-)
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 3d25b9c419e9..6efef1f679f0 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -15,6 +15,18 @@
static void blk_mq_sysfs_release(struct kobject *kobj)
{
+ struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj);
+
+ free_percpu(ctxs->queue_ctx);
+ kfree(ctxs);
+}
+
+static void blk_mq_ctx_sysfs_release(struct kobject *kobj)
+{
+ struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+
+ /* ctx->ctxs won't be released until all ctx are freed */
+ kobject_put(&ctx->ctxs->kobj);
}
static void blk_mq_hw_sysfs_release(struct kobject *kobj)
@@ -213,7 +225,7 @@ static struct kobj_type blk_mq_ktype = {
static struct kobj_type blk_mq_ctx_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
.default_attrs = default_ctx_attrs,
- .release = blk_mq_sysfs_release,
+ .release = blk_mq_ctx_sysfs_release,
};
static struct kobj_type blk_mq_hw_ktype = {
@@ -245,7 +257,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
if (!hctx->nr_ctx)
return 0;
- ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
+ ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num);
if (ret)
return ret;
@@ -268,8 +280,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
- kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
- kobject_del(&q->mq_kobj);
+ kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+ kobject_del(q->mq_kobj);
kobject_put(&dev->kobj);
q->mq_sysfs_init_done = false;
@@ -289,7 +301,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q)
ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_put(&ctx->kobj);
}
- kobject_put(&q->mq_kobj);
+ kobject_put(q->mq_kobj);
}
void blk_mq_sysfs_init(struct request_queue *q)
@@ -297,10 +309,12 @@ void blk_mq_sysfs_init(struct request_queue *q)
struct blk_mq_ctx *ctx;
int cpu;
- kobject_init(&q->mq_kobj, &blk_mq_ktype);
+ kobject_init(q->mq_kobj, &blk_mq_ktype);
for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu);
+
+ kobject_get(q->mq_kobj);
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
}
@@ -313,11 +327,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
WARN_ON_ONCE(!q->kobj.parent);
lockdep_assert_held(&q->sysfs_lock);
- ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
+ ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0)
goto out;
- kobject_uevent(&q->mq_kobj, KOBJ_ADD);
+ kobject_uevent(q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
@@ -334,8 +348,8 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
while (--i >= 0)
blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
- kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
- kobject_del(&q->mq_kobj);
+ kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+ kobject_del(q->mq_kobj);
kobject_put(&dev->kobj);
return ret;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32b246ed44c0..9228f2e9bd40 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2515,6 +2515,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
mutex_unlock(&set->tag_list_lock);
}
+/* All allocations will be freed in release handler of q->mq_kobj */
+static int blk_mq_alloc_ctxs(struct request_queue *q)
+{
+ struct blk_mq_ctxs *ctxs;
+ int cpu;
+
+ ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
+ if (!ctxs)
+ return -ENOMEM;
+
+ ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+ if (!ctxs->queue_ctx)
+ goto fail;
+
+ for_each_possible_cpu(cpu) {
+ struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
+ ctx->ctxs = ctxs;
+ }
+
+ q->mq_kobj = &ctxs->kobj;
+ q->queue_ctx = ctxs->queue_ctx;
+
+ return 0;
+ fail:
+ kfree(ctxs);
+ return -ENOMEM;
+}
+
/*
* It is the actual release handler for mq, but we do it from
* request queue's release handler for avoiding use-after-free
@@ -2540,8 +2568,6 @@ void blk_mq_release(struct request_queue *q)
* both share lifetime with request queue.
*/
blk_mq_sysfs_deinit(q);
-
- free_percpu(q->queue_ctx);
}
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
@@ -2731,8 +2757,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!q->poll_cb)
goto err_exit;
- q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
- if (!q->queue_ctx)
+ if (blk_mq_alloc_ctxs(q))
goto err_exit;
/* init q->mq_kobj and sw queues' kobjects */
@@ -2742,7 +2767,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
GFP_KERNEL, set->numa_node);
if (!q->queue_hw_ctx)
- goto err_percpu;
+ goto err_sys_init;
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
@@ -2794,8 +2819,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
kfree(q->queue_hw_ctx);
-err_percpu:
- free_percpu(q->queue_ctx);
+err_sys_init:
+ blk_mq_sysfs_deinit(q);
err_exit:
q->mq_ops = NULL;
return ERR_PTR(-ENOMEM);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index facb6e9ddce4..9ae8e9f8f8b1 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -7,6 +7,11 @@
struct blk_mq_tag_set;
+struct blk_mq_ctxs {
+ struct kobject kobj;
+ struct blk_mq_ctx __percpu *queue_ctx;
+};
+
/**
* struct blk_mq_ctx - State for a software queue facing the submitting CPUs
*/
@@ -27,6 +32,7 @@ struct blk_mq_ctx {
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
struct request_queue *queue;
+ struct blk_mq_ctxs *ctxs;
struct kobject kobj;
} ____cacheline_aligned_in_smp;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ad6eafc43f2..efd9a31bd226 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -456,7 +456,7 @@ struct request_queue {
/*
* mq queue kobject
*/
- struct kobject mq_kobj;
+ struct kobject *mq_kobj;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity integrity;
--
2.9.5
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 4.21 V3] blk-mq: not embed .mq_kobj and ctx->kobj into queue instance
2018-11-20 1:44 [PATCH 4.21 V3] blk-mq: not embed .mq_kobj and ctx->kobj into queue instance Ming Lei
@ 2018-11-20 7:51 ` Greg Kroah-Hartman
2018-11-20 16:45 ` Guenter Roeck
0 siblings, 1 reply; 4+ messages in thread
From: Greg Kroah-Hartman @ 2018-11-20 7:51 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block, jianchao.wang, Guenter Roeck, stable
On Tue, Nov 20, 2018 at 09:44:35AM +0800, Ming Lei wrote:
> Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime
> from block layer's view, actually they don't because userspace may
> grab one kobject anytime via sysfs.
>
> This patch fixes the issue by the following approach:
>
> 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing
> all ctxs
>
> 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release
> handler of .mq_kobj
>
> 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that
> .mq_kobj is always released after all ctxs are freed.
>
> This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE
> is enabled.
>
> Reported-by: Guenter Roeck <linux@roeck-us.net>
> Cc: "jianchao.wang" <jianchao.w.wang@oracle.com>
> Cc: Guenter Roeck <linux@roeck-us.net>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: stable@vger.kernel.org
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> V3:
> - keep to allocate q->queue_ctx via percpu allocator, so one extra
> pointer reference can be saved for getting ctx
> V2:
> - allocate 'blk_mq_ctx' inside blk_mq_init_allocated_queue()
> - allocate q->mq_kobj directly
Not tested, but seems sane from a kobject point-of-view:
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 4.21 V3] blk-mq: not embed .mq_kobj and ctx->kobj into queue instance
2018-11-20 7:51 ` Greg Kroah-Hartman
@ 2018-11-20 16:45 ` Guenter Roeck
2018-11-20 17:35 ` Jens Axboe
0 siblings, 1 reply; 4+ messages in thread
From: Guenter Roeck @ 2018-11-20 16:45 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: Ming Lei, Jens Axboe, linux-block, jianchao.wang, stable
On Tue, Nov 20, 2018 at 08:51:50AM +0100, Greg Kroah-Hartman wrote:
> On Tue, Nov 20, 2018 at 09:44:35AM +0800, Ming Lei wrote:
> > Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime
> > from block layer's view, actually they don't because userspace may
> > grab one kobject anytime via sysfs.
> >
> > This patch fixes the issue by the following approach:
> >
> > 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing
> > all ctxs
> >
> > 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release
> > handler of .mq_kobj
> >
> > 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that
> > .mq_kobj is always released after all ctxs are freed.
> >
> > This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE
> > is enabled.
> >
> > Reported-by: Guenter Roeck <linux@roeck-us.net>
> > Cc: "jianchao.wang" <jianchao.w.wang@oracle.com>
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > V3:
> > - keep to allocate q->queue_ctx via percpu allocator, so one extra
> > pointer reference can be saved for getting ctx
> > V2:
> > - allocate 'blk_mq_ctx' inside blk_mq_init_allocated_queue()
> > - allocate q->mq_kobj directly
>
> Not tested, but seems sane from a kobject point-of-view:
>
> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Guenter Roeck <linux@roeck-us.net>
with v4.14.y and v4.19.y.
The patch is marked for v4.21. I would kindly suggest to not wait for v4.21
but apply it to v4.20. This would let us enable DEBUG_KOBJECT_RELEASE
with syzbot on upstream and stable kernels.
Greg, applying the patch to v4.14.y will require a backport due to a minor
context conflict. I'll send that to you after the patch is available in
mainline.
Thanks,
Guenter
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 4.21 V3] blk-mq: not embed .mq_kobj and ctx->kobj into queue instance
2018-11-20 16:45 ` Guenter Roeck
@ 2018-11-20 17:35 ` Jens Axboe
0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2018-11-20 17:35 UTC (permalink / raw)
To: Guenter Roeck, Greg Kroah-Hartman
Cc: Ming Lei, linux-block, jianchao.wang, stable
On 11/20/18 9:45 AM, Guenter Roeck wrote:
> On Tue, Nov 20, 2018 at 08:51:50AM +0100, Greg Kroah-Hartman wrote:
>> On Tue, Nov 20, 2018 at 09:44:35AM +0800, Ming Lei wrote:
>>> Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime
>>> from block layer's view, actually they don't because userspace may
>>> grab one kobject anytime via sysfs.
>>>
>>> This patch fixes the issue by the following approach:
>>>
>>> 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing
>>> all ctxs
>>>
>>> 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release
>>> handler of .mq_kobj
>>>
>>> 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that
>>> .mq_kobj is always released after all ctxs are freed.
>>>
>>> This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE
>>> is enabled.
>>>
>>> Reported-by: Guenter Roeck <linux@roeck-us.net>
>>> Cc: "jianchao.wang" <jianchao.w.wang@oracle.com>
>>> Cc: Guenter Roeck <linux@roeck-us.net>
>>> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
>>> ---
>>> V3:
>>> - keep to allocate q->queue_ctx via percpu allocator, so one extra
>>> pointer reference can be saved for getting ctx
>>> V2:
>>> - allocate 'blk_mq_ctx' inside blk_mq_init_allocated_queue()
>>> - allocate q->mq_kobj directly
>>
>> Not tested, but seems sane from a kobject point-of-view:
>>
>> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>
> Tested-by: Guenter Roeck <linux@roeck-us.net>
>
> with v4.14.y and v4.19.y.
>
> The patch is marked for v4.21. I would kindly suggest to not wait for v4.21
> but apply it to v4.20. This would let us enable DEBUG_KOBJECT_RELEASE
> with syzbot on upstream and stable kernels.
I'd very much like to put this into 4.21, and not 4.20, as that's much
less risky. This isn't a new regression anyway, so there's no rush to
put it into 4.20 as far as I'm concerned.
--
Jens Axboe
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2018-11-21 4:05 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-20 1:44 [PATCH 4.21 V3] blk-mq: not embed .mq_kobj and ctx->kobj into queue instance Ming Lei
2018-11-20 7:51 ` Greg Kroah-Hartman
2018-11-20 16:45 ` Guenter Roeck
2018-11-20 17:35 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).