* [PATCH v5 2/3] btrfs: rename delete_unused_bgs_mutex
2021-04-19 7:40 [PATCH v5 0/3] btrfs: zoned: automatic BG reclaim Johannes Thumshirn
2021-04-19 7:41 ` [PATCH v5 1/3] btrfs: zoned: reset zones of relocated block groups Johannes Thumshirn
@ 2021-04-19 7:41 ` Johannes Thumshirn
2021-04-19 7:41 ` [PATCH v5 3/3] btrfs: zoned: automatically reclaim zones Johannes Thumshirn
2 siblings, 0 replies; 11+ messages in thread
From: Johannes Thumshirn @ 2021-04-19 7:41 UTC (permalink / raw)
To: David Sterba
Cc: Johannes Thumshirn, linux-btrfs, Josef Bacik, Naohiro Aota,
Filipe Manana, Anand Jain
As a preparation for another user, rename the unused_bgs_mutex into
reclaim_bgs_lock.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
fs/btrfs/block-group.c | 6 +++---
fs/btrfs/ctree.h | 3 ++-
fs/btrfs/disk-io.c | 6 +++---
fs/btrfs/volumes.c | 46 +++++++++++++++++++++---------------------
4 files changed, 31 insertions(+), 30 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 293f3169be80..bbb5a6e170c7 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1289,7 +1289,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Long running balances can keep us blocked here for eternity, so
* simply skip deletion if we're unable to get the mutex.
*/
- if (!mutex_trylock(&fs_info->delete_unused_bgs_mutex))
+ if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
return;
spin_lock(&fs_info->unused_bgs_lock);
@@ -1462,12 +1462,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
}
spin_unlock(&fs_info->unused_bgs_lock);
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
return;
flip_async:
btrfs_end_transaction(trans);
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_put_block_group(block_group);
btrfs_discard_punt_unused_bgs_list(fs_info);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c858d5349c8..68ee130b5a2a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -957,7 +957,8 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
- struct mutex delete_unused_bgs_mutex;
+ /* protects about to be deleted block groups */
+ struct mutex reclaim_bgs_lock;
/* Cached block sizes */
u32 nodesize;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0a1182694f48..e52b89ad0a61 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1890,10 +1890,10 @@ static int cleaner_kthread(void *arg)
btrfs_run_defrag_inodes(fs_info);
/*
- * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+ * Acquires fs_info->reclaim_bgs_lock to avoid racing
* with relocation (btrfs_relocate_chunk) and relocation
* acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
- * after acquiring fs_info->delete_unused_bgs_mutex. So we
+ * after acquiring fs_info->reclaim_bgs_lock. So we
* can't hold, nor need to, fs_info->cleaner_mutex when deleting
* unused block groups.
*/
@@ -2876,7 +2876,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->treelog_bg_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
- mutex_init(&fs_info->delete_unused_bgs_mutex);
+ mutex_init(&fs_info->reclaim_bgs_lock);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
mutex_init(&fs_info->zoned_meta_io_lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 124c17ec53e9..c6568f61b2b0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3118,7 +3118,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
* we release the path used to search the chunk/dev tree and before
* the current task acquires this mutex and calls us.
*/
- lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
+ lockdep_assert_held(&fs_info->reclaim_bgs_lock);
/* step one, relocate all the extents inside this chunk */
btrfs_scrub_pause(fs_info);
@@ -3187,10 +3187,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
key.type = BTRFS_CHUNK_ITEM_KEY;
while (1) {
- mutex_lock(&fs_info->delete_unused_bgs_mutex);
+ mutex_lock(&fs_info->reclaim_bgs_lock);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
BUG_ON(ret == 0); /* Corruption */
@@ -3198,7 +3198,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type);
if (ret)
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret < 0)
goto error;
if (ret > 0)
@@ -3219,7 +3219,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
else
BUG_ON(ret);
}
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
if (found_key.offset == 0)
break;
@@ -3759,10 +3759,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
goto error;
}
- mutex_lock(&fs_info->delete_unused_bgs_mutex);
+ mutex_lock(&fs_info->reclaim_bgs_lock);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
@@ -3776,7 +3776,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = btrfs_previous_item(chunk_root, path, 0,
BTRFS_CHUNK_ITEM_KEY);
if (ret) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
ret = 0;
break;
}
@@ -3786,7 +3786,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
break;
}
@@ -3803,12 +3803,12 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
btrfs_release_path(path);
if (!ret) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto loop;
}
if (counting) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
spin_lock(&fs_info->balance_lock);
bctl->stat.expected++;
spin_unlock(&fs_info->balance_lock);
@@ -3833,7 +3833,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
count_meta < bctl->meta.limit_min)
|| ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
count_sys < bctl->sys.limit_min)) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto loop;
}
@@ -3847,7 +3847,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = btrfs_may_alloc_data_chunk(fs_info,
found_key.offset);
if (ret < 0) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
} else if (ret == 1) {
chunk_reserved = 1;
@@ -3855,7 +3855,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
}
ret = btrfs_relocate_chunk(fs_info, found_key.offset);
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret == -ENOSPC) {
enospc_errors++;
} else if (ret == -ETXTBSY) {
@@ -4740,16 +4740,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
key.type = BTRFS_DEV_EXTENT_KEY;
do {
- mutex_lock(&fs_info->delete_unused_bgs_mutex);
+ mutex_lock(&fs_info->reclaim_bgs_lock);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto done;
}
ret = btrfs_previous_item(root, path, 0, key.type);
if (ret) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret < 0)
goto done;
ret = 0;
@@ -4762,7 +4762,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
if (key.objectid != device->devid) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_release_path(path);
break;
}
@@ -4771,7 +4771,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
length = btrfs_dev_extent_length(l, dev_extent);
if (key.offset + length <= new_size) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_release_path(path);
break;
}
@@ -4787,12 +4787,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
*/
ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
if (ret < 0) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto done;
}
ret = btrfs_relocate_chunk(fs_info, chunk_offset);
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret == -ENOSPC) {
failed++;
} else if (ret) {
@@ -8018,7 +8018,7 @@ static int relocating_repair_kthread(void *data)
return -EBUSY;
}
- mutex_lock(&fs_info->delete_unused_bgs_mutex);
+ mutex_lock(&fs_info->reclaim_bgs_lock);
/* Ensure block group still exists */
cache = btrfs_lookup_block_group(fs_info, target);
@@ -8040,7 +8040,7 @@ static int relocating_repair_kthread(void *data)
out:
if (cache)
btrfs_put_block_group(cache);
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
return ret;
--
2.30.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v5 3/3] btrfs: zoned: automatically reclaim zones
2021-04-19 7:40 [PATCH v5 0/3] btrfs: zoned: automatic BG reclaim Johannes Thumshirn
2021-04-19 7:41 ` [PATCH v5 1/3] btrfs: zoned: reset zones of relocated block groups Johannes Thumshirn
2021-04-19 7:41 ` [PATCH v5 2/3] btrfs: rename delete_unused_bgs_mutex Johannes Thumshirn
@ 2021-04-19 7:41 ` Johannes Thumshirn
2021-04-19 17:18 ` David Sterba
2021-04-19 17:26 ` David Sterba
2 siblings, 2 replies; 11+ messages in thread
From: Johannes Thumshirn @ 2021-04-19 7:41 UTC (permalink / raw)
To: David Sterba
Cc: Johannes Thumshirn, linux-btrfs, Josef Bacik, Naohiro Aota,
Filipe Manana, Anand Jain
When a file gets deleted on a zoned file system, the space freed is not
returned back into the block group's free space, but is migrated to
zone_unusable.
As this zone_unusable space is behind the current write pointer it is not
possible to use it for new allocations. In the current implementation a
zone is reset once all of the block group's space is accounted as zone
unusable.
This behaviour can lead to premature ENOSPC errors on a busy file system.
Instead of only reclaiming the zone once it is completely unusable,
kick off a reclaim job once the amount of unusable bytes exceeds a user
configurable threshold between 51% and 100%. It can be set per mounted
filesystem via the sysfs tunable bg_reclaim_threshold which is set to 75%
per default.
Similar to reclaiming unused block groups, these dirty block groups are
added to a to_reclaim list and then on a transaction commit, the reclaim
process is triggered but after we deleted unused block groups, which will
free space for the relocation process.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
---
fs/btrfs/block-group.c | 100 +++++++++++++++++++++++++++++++++++
fs/btrfs/block-group.h | 3 ++
fs/btrfs/ctree.h | 5 ++
fs/btrfs/disk-io.c | 13 +++++
fs/btrfs/free-space-cache.c | 9 +++-
fs/btrfs/sysfs.c | 35 ++++++++++++
fs/btrfs/volumes.c | 2 +-
fs/btrfs/volumes.h | 1 +
fs/btrfs/zoned.h | 2 +
include/trace/events/btrfs.h | 12 +++++
10 files changed, 180 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index bbb5a6e170c7..327f29739aeb 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1485,6 +1485,96 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
spin_unlock(&fs_info->unused_bgs_lock);
}
+void btrfs_reclaim_bgs_work(struct work_struct *work)
+{
+ struct btrfs_fs_info *fs_info =
+ container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
+ struct btrfs_block_group *bg;
+ struct btrfs_space_info *space_info;
+ int ret;
+
+ if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
+ return;
+
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
+ return;
+
+ mutex_lock(&fs_info->reclaim_bgs_lock);
+ spin_lock(&fs_info->unused_bgs_lock);
+ while (!list_empty(&fs_info->reclaim_bgs)) {
+ bg = list_first_entry(&fs_info->reclaim_bgs,
+ struct btrfs_block_group,
+ bg_list);
+ list_del_init(&bg->bg_list);
+
+ space_info = bg->space_info;
+ spin_unlock(&fs_info->unused_bgs_lock);
+
+ /* Don't want to race with allocators so take the groups_sem */
+ down_write(&space_info->groups_sem);
+
+ spin_lock(&bg->lock);
+ if (bg->reserved || bg->pinned || bg->ro) {
+ /*
+ * We want to bail if we made new allocations or have
+ * outstanding allocations in this block group. We do
+ * the ro check in case balance is currently acting on
+ * this block group.
+ */
+ spin_unlock(&bg->lock);
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
+ spin_unlock(&bg->lock);
+
+ /* Get out fast, in case we're unmounting the FS. */
+ if (btrfs_fs_closing(fs_info)) {
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
+
+ ret = inc_block_group_ro(bg, 0);
+ up_write(&space_info->groups_sem);
+ if (ret < 0)
+ goto next;
+
+ btrfs_info(fs_info, "reclaiming chunk %llu", bg->start);
+ trace_btrfs_reclaim_block_group(bg);
+ ret = btrfs_relocate_chunk(fs_info, bg->start);
+ if (ret)
+ btrfs_err(fs_info, "error relocating chunk %llu",
+ bg->start);
+
+next:
+ btrfs_put_block_group(bg);
+ spin_lock(&fs_info->unused_bgs_lock);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
+ btrfs_exclop_finish(fs_info);
+}
+
+void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
+{
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (!list_empty(&fs_info->reclaim_bgs))
+ queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
+ spin_unlock(&fs_info->unused_bgs_lock);
+}
+
+void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
+{
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+ trace_btrfs_add_reclaim_block_group(bg);
+ list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+}
+
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
struct btrfs_path *path)
{
@@ -3446,6 +3536,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
}
spin_unlock(&info->unused_bgs_lock);
+ spin_lock(&info->unused_bgs_lock);
+ while (!list_empty(&info->reclaim_bgs)) {
+ block_group = list_first_entry(&info->reclaim_bgs,
+ struct btrfs_block_group,
+ bg_list);
+ list_del_init(&block_group->bg_list);
+ btrfs_put_block_group(block_group);
+ }
+ spin_unlock(&info->unused_bgs_lock);
+
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group,
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 3ecc3372a5ce..7b927425dc71 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -264,6 +264,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
+void btrfs_reclaim_bgs_work(struct work_struct *work);
+void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info);
+void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68ee130b5a2a..7c72dcd71547 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -954,6 +954,11 @@ struct btrfs_fs_info {
struct work_struct async_data_reclaim_work;
struct work_struct preempt_reclaim_work;
+ /* Used to reclaim partially filled block groups in the background */
+ struct work_struct reclaim_bgs_work;
+ struct list_head reclaim_bgs;
+ int bg_reclaim_threshold;
+
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e52b89ad0a61..b7e81c17d3e2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1898,6 +1898,13 @@ static int cleaner_kthread(void *arg)
* unused block groups.
*/
btrfs_delete_unused_bgs(fs_info);
+
+ /*
+ * Reclaim block groups in the reclaim_bgs list after we deleted
+ * all unused block_groups. This possibly gives us some more free
+ * space.
+ */
+ btrfs_reclaim_bgs(fs_info);
sleep:
clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
if (kthread_should_park())
@@ -2886,6 +2893,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_LIST_HEAD(&fs_info->unused_bgs);
+ INIT_LIST_HEAD(&fs_info->reclaim_bgs);
#ifdef CONFIG_BTRFS_DEBUG
INIT_LIST_HEAD(&fs_info->allocated_roots);
INIT_LIST_HEAD(&fs_info->allocated_ebs);
@@ -2974,6 +2982,9 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
fs_info->swapfile_pins = RB_ROOT;
fs_info->send_in_progress = 0;
+
+ fs_info->bg_reclaim_threshold = DEFAULT_RECLAIM_THRESH;
+ INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
}
static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
@@ -4332,6 +4343,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
+ cancel_work_sync(&fs_info->reclaim_bgs_work);
+
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 9988decd5717..e54466fc101f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -11,6 +11,7 @@
#include <linux/ratelimit.h>
#include <linux/error-injection.h>
#include <linux/sched/mm.h>
+#include "misc.h"
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
@@ -2539,6 +2540,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 bytenr, u64 size, bool used)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
@@ -2569,8 +2571,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
}
/* All the region is now unusable. Mark it as unused and reclaim */
- if (block_group->zone_unusable == block_group->length)
+ if (block_group->zone_unusable == block_group->length) {
btrfs_mark_bg_unused(block_group);
+ } else if (block_group->zone_unusable >=
+ div_factor_fine(block_group->length,
+ fs_info->bg_reclaim_threshold)) {
+ btrfs_mark_bg_to_reclaim(block_group);
+ }
return 0;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index a99d1f415a7f..436ac7b4b334 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -980,6 +980,40 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
}
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
+static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
+ struct kobj_attribute *a,
+ char *buf)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ ssize_t ret;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", fs_info->bg_reclaim_threshold);
+
+ return ret;
+}
+
+static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
+ struct kobj_attribute *a,
+ const char *buf, size_t len)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ int thresh;
+ int ret;
+
+ ret = kstrtoint(buf, 10, &thresh);
+ if (ret)
+ return ret;
+
+ if (thresh <= 50 || thresh > 100)
+ return -EINVAL;
+
+ fs_info->bg_reclaim_threshold = thresh;
+
+ return len;
+}
+BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
+ btrfs_bg_reclaim_threshold_store);
+
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(, nodesize),
@@ -991,6 +1025,7 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, exclusive_operation),
BTRFS_ATTR_PTR(, generation),
BTRFS_ATTR_PTR(, read_policy),
+ BTRFS_ATTR_PTR(, bg_reclaim_threshold),
NULL,
};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c6568f61b2b0..08541ef5fc39 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3098,7 +3098,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
return ret;
}
-static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
+int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
{
struct btrfs_root *root = fs_info->chunk_root;
struct btrfs_trans_handle *trans;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d4c3e0dd32b8..9c0d84e5ec06 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -484,6 +484,7 @@ void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf);
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
+int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_uuid_scan_kthread(void *data);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 61e969652fe1..5d74b453b9b9 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -9,6 +9,8 @@
#include "disk-io.h"
#include "block-group.h"
+#define DEFAULT_RECLAIM_THRESH 75
+
struct btrfs_zoned_device_info {
/*
* Number of zones, zone size and types of zones if bdev is a
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0551ea65374f..a41dd8a0c730 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1903,6 +1903,18 @@ DEFINE_EVENT(btrfs__block_group, btrfs_add_unused_block_group,
TP_ARGS(bg_cache)
);
+DEFINE_EVENT(btrfs__block_group, btrfs_add_reclaim_block_group,
+ TP_PROTO(const struct btrfs_block_group *bg_cache),
+
+ TP_ARGS(bg_cache)
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_reclaim_block_group,
+ TP_PROTO(const struct btrfs_block_group *bg_cache),
+
+ TP_ARGS(bg_cache)
+);
+
DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group,
TP_PROTO(const struct btrfs_block_group *bg_cache),
--
2.30.0
^ permalink raw reply related [flat|nested] 11+ messages in thread