* [PATCH v3 1/3] bcache: add dirty_data in struct bcache_device
@ 2022-12-12 12:32 mingzhe.zou
2022-12-12 12:32 ` [PATCH v3 2/3] bcache: allocate stripe memory when partial_stripes_expensive is true mingzhe.zou
2022-12-12 12:32 ` [PATCH v3 3/3] bcache: support online resizing of cached_dev mingzhe.zou
0 siblings, 2 replies; 3+ messages in thread
From: mingzhe.zou @ 2022-12-12 12:32 UTC (permalink / raw)
To: colyli, linux-bcache; +Cc: zoumingzhe, andrea.tomassetti-opensource, bcache
From: mingzhe <mingzhe.zou@easystack.cn>
Currently, the dirty_data of cached_dev and flash_dev depend on the stripe.
Since the flash device supports resize, it may cause a bug (resize the flash
from 1T to 2T, and nr_stripes from 1 to 2).
The patch add dirty_data in struct bcache_device, we can get the value of
dirty_data quickly and fixes the bug of resize flash device.
Signed-off-by: mingzhe <mingzhe.zou@easystack.cn>
---
drivers/md/bcache/bcache.h | 1 +
drivers/md/bcache/writeback.c | 2 ++
drivers/md/bcache/writeback.h | 7 +------
3 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 621a2ae1767b..5da991505b45 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -268,6 +268,7 @@ struct bcache_device {
unsigned int stripe_size;
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
+ atomic_long_t dirty_sectors;
struct bio_set bio_split;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f21295dea71b..7b5009e8b4ff 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -769,6 +769,8 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
if (stripe < 0)
return;
+ atomic_long_add(nr_sectors, &d->dirty_sectors);
+
if (UUID_FLASH_ONLY(&c->uuids[inode]))
atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors);
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 7e5a2fe03429..12765c0dfd5c 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -56,12 +56,7 @@ struct bch_dirty_init_state {
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
{
- uint64_t i, ret = 0;
-
- for (i = 0; i < d->nr_stripes; i++)
- ret += atomic_read(d->stripe_sectors_dirty + i);
-
- return ret;
+ return atomic_long_read(&d->dirty_sectors);
}
static inline int offset_to_stripe(struct bcache_device *d,
--
2.17.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v3 2/3] bcache: allocate stripe memory when partial_stripes_expensive is true
2022-12-12 12:32 [PATCH v3 1/3] bcache: add dirty_data in struct bcache_device mingzhe.zou
@ 2022-12-12 12:32 ` mingzhe.zou
2022-12-12 12:32 ` [PATCH v3 3/3] bcache: support online resizing of cached_dev mingzhe.zou
1 sibling, 0 replies; 3+ messages in thread
From: mingzhe.zou @ 2022-12-12 12:32 UTC (permalink / raw)
To: colyli, linux-bcache; +Cc: zoumingzhe, andrea.tomassetti-opensource, bcache
From: mingzhe <mingzhe.zou@easystack.cn>
Currently, bcache_device (cached_dev and flash_dev) always allocate
memory for stripe_sectors_dirty and full_dirty_stripes, regardless of
whether partial_stripes_expensive is true or not. When the device's
partial_stripes_expensive is false, only bcache_dev_sectors_dirty_add()
will use stripe_sectors_dirty.
When stripe_size is 0, it is forced to 2^31, which is about 1T (2^31*512).
However, some non-raid devices (such as rbd) will provide non-zero io_opt.
In https://bugzilla.redhat.com/show_bug.cgi?id=1783075, some block devices
which large capacity (e.g. 8TB) but small io_opt size (e.g. 8 sectors), the
nr_stripes will be very large. Even though the overflow bug is fixed in
65f0f017e7be and 7a1481267999, it still returns an error when register.
I don't think it's necessary to allocate stripe memory for devices where
partial_stripes_expensive is false. This patch will allocate stripe memory
when partial_stripes_expensive is true.
Signed-off-by: mingzhe <mingzhe.zou@easystack.cn>
---
Changelog:
v2: Fix up errors.
v1: Original verison.
---
drivers/md/bcache/super.c | 32 ++++++++++++++++++++++----------
drivers/md/bcache/writeback.c | 14 ++++++++++----
2 files changed, 32 insertions(+), 14 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index a91a1c3f4055..125f607d58f0 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -887,15 +887,20 @@ static void bcache_device_free(struct bcache_device *d)
}
bioset_exit(&d->bio_split);
- kvfree(d->full_dirty_stripes);
- kvfree(d->stripe_sectors_dirty);
+
+ if (d->full_dirty_stripes)
+ kvfree(d->full_dirty_stripes);
+
+ if (d->stripe_sectors_dirty)
+ kvfree(d->stripe_sectors_dirty);
closure_debug_destroy(&d->cl);
}
static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
- sector_t sectors, struct block_device *cached_bdev,
- const struct block_device_operations *ops)
+ sector_t sectors, bool enable_stripe,
+ struct block_device *cached_bdev,
+ const struct block_device_operations *ops)
{
struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
@@ -903,6 +908,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
uint64_t n;
int idx;
+ if (!enable_stripe)
+ goto skip_stripe;
+
if (!d->stripe_size)
d->stripe_size = 1 << 31;
@@ -924,6 +932,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
if (!d->full_dirty_stripes)
goto out_free_stripe_sectors_dirty;
+skip_stripe:
idx = ida_simple_get(&bcache_device_idx, 0,
BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
if (idx < 0)
@@ -982,9 +991,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
out_ida_remove:
ida_simple_remove(&bcache_device_idx, idx);
out_free_full_dirty_stripes:
- kvfree(d->full_dirty_stripes);
+ if (d->full_dirty_stripes)
+ kvfree(d->full_dirty_stripes);
out_free_stripe_sectors_dirty:
- kvfree(d->stripe_sectors_dirty);
+ if (d->stripe_sectors_dirty)
+ kvfree(d->stripe_sectors_dirty);
return -ENOMEM;
}
@@ -1397,6 +1408,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
int ret;
struct io *io;
struct request_queue *q = bdev_get_queue(dc->bdev);
+ sector_t sectors = bdev_nr_sectors(dc->bdev) - dc->sb.data_offset;
__module_get(THIS_MODULE);
INIT_LIST_HEAD(&dc->list);
@@ -1422,9 +1434,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
dc->partial_stripes_expensive =
q->limits.raid_partial_stripes_expensive;
- ret = bcache_device_init(&dc->disk, block_size,
- bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
- dc->bdev, &bcache_cached_ops);
+ ret = bcache_device_init(&dc->disk, block_size, sectors,
+ dc->partial_stripes_expensive,
+ dc->bdev, &bcache_cached_ops);
if (ret)
return ret;
@@ -1535,7 +1547,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- if (bcache_device_init(d, block_bytes(c->cache), u->sectors,
+ if (bcache_device_init(d, block_bytes(c->cache), u->sectors, false,
NULL, &bcache_flash_ops))
goto err;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 7b5009e8b4ff..3f4af7ce6936 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -758,6 +758,7 @@ static void read_dirty(struct cached_dev *dc)
void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
uint64_t offset, int nr_sectors)
{
+ struct cached_dev *dc = NULL;
struct bcache_device *d = c->devices[inode];
unsigned int stripe_offset, sectors_dirty;
int stripe;
@@ -765,14 +766,19 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
if (!d)
return;
- stripe = offset_to_stripe(d, offset);
- if (stripe < 0)
- return;
-
atomic_long_add(nr_sectors, &d->dirty_sectors);
if (UUID_FLASH_ONLY(&c->uuids[inode]))
atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors);
+ else
+ dc = container_of(d, struct cached_dev, disk);
+
+ if (!dc || !dc->partial_stripes_expensive)
+ return;
+
+ stripe = offset_to_stripe(d, offset);
+ if (stripe < 0)
+ return;
stripe_offset = offset & (d->stripe_size - 1);
--
2.17.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v3 3/3] bcache: support online resizing of cached_dev
2022-12-12 12:32 [PATCH v3 1/3] bcache: add dirty_data in struct bcache_device mingzhe.zou
2022-12-12 12:32 ` [PATCH v3 2/3] bcache: allocate stripe memory when partial_stripes_expensive is true mingzhe.zou
@ 2022-12-12 12:32 ` mingzhe.zou
1 sibling, 0 replies; 3+ messages in thread
From: mingzhe.zou @ 2022-12-12 12:32 UTC (permalink / raw)
To: colyli, linux-bcache; +Cc: zoumingzhe, andrea.tomassetti-opensource, bcache
From: mingzhe <mingzhe.zou@easystack.cn>
When partial_stripes_expensive is false, resizing causes nr_stripes to change.
So, stripe_sectors_dirty and full_dirty_stripes memory must be reallocated.
If the device is smaller, only nr_stripes need to be modified.
Signed-off-by: mingzhe <mingzhe.zou@easystack.cn>
---
Changelog:
v3: Fix up errors.
v2: Fix up errors.
v1: Original verison.
---
drivers/md/bcache/bcache.h | 1 +
drivers/md/bcache/btree.c | 31 ++++++++++++++
drivers/md/bcache/btree.h | 2 +
drivers/md/bcache/super.c | 85 ++++++++++++++++++++++++++++++++++++++
drivers/md/bcache/sysfs.c | 14 +++++++
5 files changed, 133 insertions(+)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 5da991505b45..70e1f6ec12d5 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -1040,6 +1040,7 @@ void bcache_write_super(struct cache_set *c);
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
+int bch_cached_dev_resize(struct cached_dev *dc, sector_t sectors);
int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
uint8_t *set_uuid);
void bch_cached_dev_detach(struct cached_dev *dc);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 147c493a989a..07388e51ff9c 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2467,6 +2467,37 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
return ret;
}
+int bch_btree_insert_invalidate(struct cache_set *c, unsigned int inode,
+ sector_t offset, sector_t length)
+{
+ int ret = 0;
+ sector_t num;
+ struct keylist insert_keys;
+
+ bch_keylist_init(&insert_keys);
+ while (!ret && length) {
+ num = min_t(sector_t, length, 1U << (KEY_SIZE_BITS - 1));
+
+ if ((ret = __bch_keylist_realloc(&insert_keys, 2))) {
+ pr_err("cannot allocate memory");
+ break;
+ }
+
+ offset += num;
+ length -= num;
+
+ bch_keylist_add(&insert_keys, &KEY(inode, offset, num));
+ if ((ret = bch_btree_insert(c, &insert_keys, NULL, NULL))) {
+ pr_err("invalidating %llu sectors from %llu error %d",
+ num, offset - num, ret);
+ break;
+ }
+ }
+ bch_keylist_free(&insert_keys);
+
+ return ret;
+}
+
void bch_btree_set_root(struct btree *b)
{
unsigned int i;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 1b5fdbc0d83e..28c8885ecea1 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -276,6 +276,8 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
struct bkey *check_key);
int bch_btree_insert(struct cache_set *c, struct keylist *keys,
atomic_t *journal_ref, struct bkey *replace_key);
+int bch_btree_insert_invalidate(struct cache_set *c, unsigned int inode,
+ sector_t offset, sector_t length);
int bch_gc_thread_start(struct cache_set *c);
void bch_initial_gc_finish(struct cache_set *c);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 125f607d58f0..8c67ef43a875 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1345,6 +1345,91 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
return 0;
}
+int bch_cached_dev_resize(struct cached_dev *dc, sector_t sectors)
+{
+ struct cache_set *c = dc->disk.c;
+ uint64_t nr_stripes, n, i;
+ sector_t length, sectors_dirty;
+ atomic_t *stripe_sectors_dirty;
+ unsigned long *full_dirty_stripes;
+ const size_t max_stripes = min_t(size_t, INT_MAX,
+ SIZE_MAX / sizeof(atomic_t));
+
+ /* Block writeback thread and all requests */
+ down_write(&dc->writeback_lock);
+
+ if (!dc->partial_stripes_expensive)
+ goto set_capacity;
+
+ nr_stripes = DIV_ROUND_UP_ULL(sectors, dc->disk.stripe_size);
+ if (!nr_stripes || nr_stripes > max_stripes) {
+ pr_err("nr_stripes too large or invalid: %llu", nr_stripes);
+ up_write(&dc->writeback_lock);
+ return -ENOMEM;
+ }
+
+ if (nr_stripes > dc->disk.nr_stripes)
+ goto realloc;
+
+ for (i = nr_stripes; i < dc->disk.nr_stripes; i++) {
+ sectors_dirty = atomic_read(dc->disk.stripe_sectors_dirty + i);
+ atomic_long_sub(sectors_dirty, &dc->disk.dirty_sectors);
+ }
+ goto nr_stripes;
+
+realloc:
+ n = nr_stripes * sizeof(atomic_t);
+ stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL);
+ if (!stripe_sectors_dirty) {
+ up_write(&dc->writeback_lock);
+ return -ENOMEM;
+ }
+
+ n = BITS_TO_LONGS(nr_stripes) * sizeof(unsigned long);
+ full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
+ if (!full_dirty_stripes) {
+ kvfree(stripe_sectors_dirty);
+ up_write(&dc->writeback_lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < dc->disk.nr_stripes; i++) {
+ sectors_dirty = atomic_read(dc->disk.stripe_sectors_dirty + i);
+ atomic_set(stripe_sectors_dirty + i, sectors_dirty);
+ if (sectors_dirty == dc->disk.stripe_size)
+ set_bit(i, full_dirty_stripes);
+ }
+
+ kvfree(dc->disk.full_dirty_stripes);
+ kvfree(dc->disk.stripe_sectors_dirty);
+ dc->disk.stripe_sectors_dirty = stripe_sectors_dirty;
+ dc->disk.full_dirty_stripes = full_dirty_stripes;
+
+nr_stripes:
+ dc->disk.nr_stripes = nr_stripes;
+
+set_capacity:
+ length = get_capacity(dc->disk.disk) - sectors;
+ set_capacity(dc->disk.disk, sectors);
+
+ if (length <= 0)
+ goto skip_invalidate;
+
+ /* invalidate dirty data not used */
+ pr_info("invalidating %llu sectors from %llu", length, sectors);
+ bch_btree_insert_invalidate(c, dc->disk.id, sectors, length);
+
+ /* recount dirty sectors */
+ if (!dc->partial_stripes_expensive) {
+ atomic_long_set(&dc->disk.dirty_sectors, 0);
+ bch_sectors_dirty_init(&dc->disk);
+ }
+
+skip_invalidate:
+ up_write(&dc->writeback_lock);
+ return 0;
+}
+
/* when dc->disk.kobj released */
void bch_cached_dev_release(struct kobject *kobj)
{
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 8d1a86249f99..4f480c579b2a 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -199,6 +199,7 @@ SHOW(__bch_cached_dev)
sysfs_printf(data_csum, "%i", dc->disk.data_csum);
+ sysfs_hprint(size, get_capacity(dc->disk.disk) << 9);
var_printf(verify, "%i");
var_printf(bypass_torture_test, "%i");
var_printf(writeback_metadata, "%i");
@@ -312,6 +313,18 @@ STORE(__cached_dev)
#define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
sysfs_strtoul(data_csum, dc->disk.data_csum);
+
+ if (attr == &sysfs_size) {
+ ssize_t ret;
+ sector_t v, sectors;
+
+ strtoi_h_or_return(buf, v);
+ sectors = clamp_t(sector_t, v >> 9, 0,
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
+ ret = bch_cached_dev_resize(dc, sectors);
+ return ret ? ret : size;
+ }
+
d_strtoul(verify);
sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
@@ -558,6 +571,7 @@ static struct attribute *bch_cached_dev_attrs[] = {
&sysfs_running,
&sysfs_state,
&sysfs_label,
+ &sysfs_size,
#ifdef CONFIG_BCACHE_DEBUG
&sysfs_verify,
&sysfs_bypass_torture_test,
--
2.17.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-12-12 12:33 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-12 12:32 [PATCH v3 1/3] bcache: add dirty_data in struct bcache_device mingzhe.zou
2022-12-12 12:32 ` [PATCH v3 2/3] bcache: allocate stripe memory when partial_stripes_expensive is true mingzhe.zou
2022-12-12 12:32 ` [PATCH v3 3/3] bcache: support online resizing of cached_dev mingzhe.zou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).