From: Yufen Yu <yuyufen@huawei.com>
To: <axboe@kernel.dk>, <linux-block@vger.kernel.org>,
<linux-fsdevel@vger.kernel.org>
Cc: <tj@kernel.org>, <jack@suse.cz>, <bvanassche@acm.org>, <tytso@mit.edu>
Subject: [PATCH v2 3/7] bdi: protect device lifetime with RCU
Date: Wed, 26 Feb 2020 19:18:47 +0800 [thread overview]
Message-ID: <20200226111851.55348-4-yuyufen@huawei.com> (raw)
In-Reply-To: <20200226111851.55348-1-yuyufen@huawei.com>
We reported a kernel crash:
[201962.639350] Call trace:
[201962.644403] string+0x28/0xa0
[201962.650501] vsnprintf+0x5f0/0x748
[201962.657472] seq_vprintf+0x70/0x98
[201962.664442] seq_printf+0x7c/0xa0
[201962.671238] __blkg_prfill_rwstat+0x84/0x128
[201962.679949] blkg_prfill_rwstat_field+0x94/0xc0
[201962.689182] blkcg_print_blkgs+0xcc/0x140
[201962.697370] blkg_print_stat_bytes+0x4c/0x60
[201962.706083] cgroup_seqfile_show+0x58/0xc0
[201962.714446] kernfs_seq_show+0x44/0x50
[201962.722112] seq_read+0xd4/0x4a8
[201962.728732] kernfs_fop_read+0x16c/0x218
[201962.736748] __vfs_read+0x60/0x188
[201962.743717] vfs_read+0x94/0x150
[201962.750338] ksys_read+0x6c/0xd8
[201962.756958] __arm64_sys_read+0x24/0x30
[201962.764800] el0_svc_common+0x78/0x130
[201962.772466] el0_svc_handler+0x38/0x78
[201962.780131] el0_svc+0x8/0xc
__blkg_prfill_rwstat() tries to get the device name by 'bdi->dev',
while the device and kobj->name has been freed by bdi_unregister().
Then, blkg_dev_name() will return an invalid name pointer, resulting
in crash on string(). The race as following:
CPU1 CPU2
blkg_print_stat_bytes
__scsi_remove_device
del_gendisk
bdi_unregister
put_device(bdi->dev)
kfree(bdi->dev)
__blkg_prfill_rwstat
blkg_dev_name
//use the freed bdi->dev
dev_name(blkg->q->backing_dev_info->dev)
bdi->dev = NULL
We fix the bug by protecting the device lifetime with RCU.
call_rcu() will free the device until all of the readers complete.
Since both of blkg_dev_name() and device name have been protected
by rcu_read_lock/unlock(). Thus, we don't need to do it again.
Just use rcu_dereference() to fetch rcu_dev here.
Signed-off-by: Yufen Yu <yuyufen@huawei.com>
---
block/bfq-iosched.c | 2 +-
block/blk-cgroup.c | 8 ++++--
block/genhd.c | 4 +--
fs/ext4/super.c | 2 +-
include/linux/backing-dev-defs.h | 8 +++++-
include/linux/backing-dev.h | 4 +--
mm/backing-dev.c | 59 +++++++++++++++++++++++++++++++++-------
7 files changed, 68 insertions(+), 19 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 8c436abfaf14..00904611b8e4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4978,7 +4978,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
switch (ioprio_class) {
default:
- dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
+ dev_err(&bfqq->bfqd->queue->backing_dev_info->rcu_dev->dev,
"bfq: bad prio class %d\n", ioprio_class);
/* fall through */
case IOPRIO_CLASS_NONE:
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index a229b94d5390..7ab1af3925c5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -494,9 +494,13 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
const char *blkg_dev_name(struct blkcg_gq *blkg)
{
+ struct bdi_rcu_device *rcu_dev;
+
+ rcu_dev = rcu_dereference(blkg->q->backing_dev_info->rcu_dev);
+
/* some drivers (floppy) instantiate a queue w/o disk registered */
- if (blkg->q->backing_dev_info->dev)
- return dev_name(blkg->q->backing_dev_info->dev);
+ if (rcu_dev)
+ return dev_name(&rcu_dev->dev);
return NULL;
}
diff --git a/block/genhd.c b/block/genhd.c
index ff6268970ddc..bd406b0fb471 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -668,9 +668,9 @@ static void register_disk(struct device *parent, struct gendisk *disk,
kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
disk_part_iter_exit(&piter);
- if (disk->queue->backing_dev_info->dev) {
+ if (disk->queue->backing_dev_info->rcu_dev) {
err = sysfs_create_link(&ddev->kobj,
- &disk->queue->backing_dev_info->dev->kobj,
+ &disk->queue->backing_dev_info->rcu_dev->dev.kobj,
"bdi");
WARN_ON(err);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ff1b764b0c0e..7781773f9f77 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -388,7 +388,7 @@ static int block_device_ejected(struct super_block *sb)
struct inode *bd_inode = sb->s_bdev->bd_inode;
struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
- return bdi->dev == NULL;
+ return bdi->rcu_dev == NULL;
}
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 4fc87dee005a..3d2294c428c1 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -13,6 +13,7 @@
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/refcount.h>
+#include <linux/device.h>
struct page;
struct device;
@@ -185,6 +186,11 @@ struct bdi_writeback {
#endif
};
+struct bdi_rcu_device {
+ struct device dev;
+ struct rcu_head rcu_head;
+};
+
struct backing_dev_info {
u64 id;
struct rb_node rb_node; /* keyed by ->id */
@@ -219,7 +225,7 @@ struct backing_dev_info {
#endif
wait_queue_head_t wb_waitq;
- struct device *dev;
+ struct bdi_rcu_device *rcu_dev;
struct device *owner;
struct timer_list laptop_mode_wb_timer;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f88197c1ffc2..67e429b203a1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -509,9 +509,9 @@ extern const char *bdi_unknown_name;
static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
{
- if (!bdi || !bdi->dev)
+ if (!bdi || !bdi->rcu_dev)
return bdi_unknown_name;
- return dev_name(bdi->dev);
+ return dev_name(&bdi->rcu_dev->dev);
}
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 62f05f605fb5..b29c0ad43477 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -850,7 +850,7 @@ static int bdi_init(struct backing_dev_info *bdi)
{
int ret;
- bdi->dev = NULL;
+ bdi->rcu_dev = NULL;
kref_init(&bdi->refcnt);
bdi->min_ratio = 0;
@@ -930,20 +930,44 @@ struct backing_dev_info *bdi_get_by_id(u64 id)
return bdi;
}
+static void bdi_device_release(struct device *dev)
+{
+ struct bdi_rcu_device *rcu_dev = container_of(dev,
+ struct bdi_rcu_device, dev);
+ kfree(rcu_dev);
+}
+
int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
{
struct device *dev;
struct rb_node *parent, **p;
+ struct bdi_rcu_device *rcu_dev;
+ int retval = -ENODEV;
- if (bdi->dev) /* The driver needs to use separate queues per device */
+ /* The driver needs to use separate queues per device */
+ if (bdi->rcu_dev)
return 0;
- dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
- if (IS_ERR(dev))
- return PTR_ERR(dev);
+ rcu_dev = kzalloc(sizeof(struct bdi_rcu_device), GFP_KERNEL);
+ if (!rcu_dev)
+ return -ENOMEM;
+
+ /* initialize device */
+ dev = &rcu_dev->dev;
+ device_initialize(dev);
+ dev->class = bdi_class;
+ dev->release = bdi_device_release;
+ dev_set_drvdata(dev, (void *)bdi);
+ retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
+ if (retval)
+ goto error;
+
+ retval = device_add(dev);
+ if (retval)
+ goto error;
cgwb_bdi_register(bdi);
- bdi->dev = dev;
+ bdi->rcu_dev = rcu_dev;
bdi_debug_register(bdi, dev_name(dev));
set_bit(WB_registered, &bdi->wb.state);
@@ -962,6 +986,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
trace_writeback_bdi_register(bdi);
return 0;
+
+error:
+ put_device(&rcu_dev->dev);
+ return retval;
}
EXPORT_SYMBOL(bdi_register_va);
@@ -1005,17 +1033,28 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
synchronize_rcu_expedited();
}
+static void bdi_put_device_rcu(struct rcu_head *rcu)
+{
+ struct bdi_rcu_device *rcu_dev = container_of(rcu,
+ struct bdi_rcu_device, rcu_head);
+ put_device(&rcu_dev->dev);
+}
+
void bdi_unregister(struct backing_dev_info *bdi)
{
+ struct bdi_rcu_device *rcu_dev = bdi->rcu_dev;
+
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
wb_shutdown(&bdi->wb);
cgwb_bdi_unregister(bdi);
- if (bdi->dev) {
+ if (rcu_dev) {
bdi_debug_unregister(bdi);
- device_unregister(bdi->dev);
- bdi->dev = NULL;
+ get_device(&rcu_dev->dev);
+ device_unregister(&rcu_dev->dev);
+ rcu_assign_pointer(bdi->rcu_dev, NULL);
+ call_rcu(&rcu_dev->rcu_head, bdi_put_device_rcu);
}
if (bdi->owner) {
@@ -1031,7 +1070,7 @@ static void release_bdi(struct kref *ref)
if (test_bit(WB_registered, &bdi->wb.state))
bdi_unregister(bdi);
- WARN_ON_ONCE(bdi->dev);
+ WARN_ON_ONCE(bdi->rcu_dev);
wb_exit(&bdi->wb);
cgwb_bdi_exit(bdi);
kfree(bdi);
--
2.16.2.dirty
next prev parent reply other threads:[~2020-02-26 11:11 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-26 11:18 [PATCH v2 0/7] bdi: fix use-after-free for bdi device Yufen Yu
2020-02-26 11:18 ` [PATCH v2 1/7] blk-wbt: use bdi_dev_name() to get device name Yufen Yu
2020-02-26 11:18 ` [PATCH v2 2/7] fs/ceph: " Yufen Yu
2020-02-26 11:18 ` Yufen Yu [this message]
2020-03-04 17:05 ` [PATCH v2 3/7] bdi: protect device lifetime with RCU Tejun Heo
2020-03-04 17:22 ` Greg Kroah-Hartman
2020-03-04 17:23 ` Greg Kroah-Hartman
2020-03-04 18:50 ` Tejun Heo
2020-03-04 19:10 ` Theodore Y. Ts'o
2020-03-04 19:15 ` Tejun Heo
2020-03-04 20:05 ` Greg Kroah-Hartman
2020-03-05 1:22 ` Tejun Heo
2020-03-06 16:25 ` Greg Kroah-Hartman
2020-03-07 9:13 ` Yufen Yu
2020-02-26 11:18 ` [PATCH v2 4/7] bdi: create a new function bdi_get_dev_name() Yufen Yu
2020-02-26 11:18 ` [PATCH v2 5/7] bfq: fix potential kernel crash when print dev err info Yufen Yu
2020-02-26 11:18 ` [PATCH v2 6/7] memcg: fix crash in wb_workfn when bdi unregister Yufen Yu
2020-02-26 11:18 ` [PATCH v2 7/7] blk-wbt: replace bdi_dev_name() with bdi_get_dev_name() Yufen Yu
2020-03-04 17:29 ` [PATCH v2 0/7] bdi: fix use-after-free for bdi device Greg KH
2020-03-04 18:57 ` Tejun Heo
2020-03-04 20:07 ` Greg KH
2020-03-04 19:02 ` Theodore Y. Ts'o
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200226111851.55348-4-yuyufen@huawei.com \
--to=yuyufen@huawei.com \
--cc=axboe@kernel.dk \
--cc=bvanassche@acm.org \
--cc=jack@suse.cz \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=tj@kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).