linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size
@ 2022-02-08 19:31 Stefan Roesch
  2022-02-08 19:31 ` [PATCH v1 1/3] btrfs: store chunk size in space-info struct Stefan Roesch
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Stefan Roesch @ 2022-02-08 19:31 UTC (permalink / raw)
  To: linux-btrfs, kernel-team; +Cc: shr

The btrfs allocator is currently not ideal for all workloads. It tends
to suffer from overallocating data block groups and underallocating
metadata block groups. This results in filesystems becoming read-only
even though there is plenty of "free" space.

This patch adds the ability to query and set the metadata allocation
size.

  Patch 1: btrfs: store chunk size in space-info struct.
    Store the stripe and chunk size in the btrfs_space_info structure
    to be able to expose and set the metadata allocation size.
    
  Patch 2: btrfs: expose chunk size in sysfs.
    Add a sysfs entry to read and write the above information.
    
  btrfs: add force_chunk_alloc sysfs entry to force allocation
    For testing purposes and under a debug flag add a sysfs entry to
    force a space allocation.


Stefan Roesch (3):
  btrfs: store chunk size in space-info struct.
  btrfs: expose chunk size in sysfs.
  btrfs: add force_chunk_alloc sysfs entry to force allocation

 fs/btrfs/space-info.c |  41 ++++++++++++
 fs/btrfs/space-info.h |   3 +
 fs/btrfs/sysfs.c      | 152 ++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.c    |  28 +++-----
 4 files changed, 205 insertions(+), 19 deletions(-)


base-commit: dfd42facf1e4ada021b939b4e19c935dcdd55566
-- 
2.30.2


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v1 1/3] btrfs: store chunk size in space-info struct.
  2022-02-08 19:31 [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Stefan Roesch
@ 2022-02-08 19:31 ` Stefan Roesch
  2022-04-04 16:36   ` David Sterba
  2022-02-08 19:31 ` [PATCH v1 2/3] btrfs: expose chunk size in sysfs Stefan Roesch
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Stefan Roesch @ 2022-02-08 19:31 UTC (permalink / raw)
  To: linux-btrfs, kernel-team; +Cc: shr

The chunk size is stored in the btrfs_space_info structure.
It is initialized at the start and is then used.

A new api is added to update the current chunk size.

This api is used to be able to expose the chunk_size
as a sysfs setting.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/btrfs/space-info.c | 41 +++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/space-info.h |  3 +++
 fs/btrfs/volumes.c    | 28 +++++++++-------------------
 3 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 294242c194d8..302522a250d8 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -181,6 +181,46 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
 		found->full = 0;
 }
 
+/*
+ * Compute chunk size depending on block type for regular volumes.
+ */
+static u64 compute_chunk_size_regular(struct btrfs_fs_info *info, u64 flags)
+{
+	ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+
+	if (flags & BTRFS_BLOCK_GROUP_DATA)
+		return SZ_1G;
+	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+		return SZ_32M;
+
+	/* Handle BTRFS_BLOCK_GROUP_METADATA */
+	if (info->fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+		return SZ_1G;
+
+	return SZ_256M;
+}
+
+/*
+ * Compute chunk size depending on volume type.
+ */
+static u64 compute_chunk_size(struct btrfs_fs_info *info, u64 flags)
+{
+	if (btrfs_is_zoned(info))
+		return info->zone_size;
+
+	return compute_chunk_size_regular(info, flags);
+}
+
+/*
+ * Update default chunk size.
+ *
+ */
+void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
+					u64 chunk_size)
+{
+	atomic64_set(&space_info->chunk_size, chunk_size);
+}
+
 static int create_space_info(struct btrfs_fs_info *info, u64 flags)
 {
 
@@ -202,6 +242,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
 	INIT_LIST_HEAD(&space_info->tickets);
 	INIT_LIST_HEAD(&space_info->priority_tickets);
 	space_info->clamp = 1;
+	btrfs_update_space_info_chunk_size(space_info, compute_chunk_size(info, flags));
 
 	ret = btrfs_sysfs_add_space_info_type(info, space_info);
 	if (ret)
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index d841fed73492..c1a64bbfb6d1 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -23,6 +23,7 @@ struct btrfs_space_info {
 	u64 max_extent_size;	/* This will hold the maximum extent size of
 				   the space info if we had an ENOSPC in the
 				   allocator. */
+	atomic64_t chunk_size;  /* chunk size in bytes */
 
 	int clamp;		/* Used to scale our threshold for preemptive
 				   flushing. The value is >> clamp, so turns
@@ -115,6 +116,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
 			     u64 total_bytes, u64 bytes_used,
 			     u64 bytes_readonly, u64 bytes_zone_unusable,
 			     struct btrfs_space_info **space_info);
+void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
+			     u64 chunk_size);
 struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
 					       u64 flags);
 u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b07d382d53a8..51091130337b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5104,26 +5104,16 @@ static void init_alloc_chunk_ctl_policy_regular(
 				struct btrfs_fs_devices *fs_devices,
 				struct alloc_chunk_ctl *ctl)
 {
-	u64 type = ctl->type;
+	struct btrfs_space_info *space_info;
 
-	if (type & BTRFS_BLOCK_GROUP_DATA) {
-		ctl->max_stripe_size = SZ_1G;
-		ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
-	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
-		/* For larger filesystems, use larger metadata chunks */
-		if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
-			ctl->max_stripe_size = SZ_1G;
-		else
-			ctl->max_stripe_size = SZ_256M;
-		ctl->max_chunk_size = ctl->max_stripe_size;
-	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
-		ctl->max_stripe_size = SZ_32M;
-		ctl->max_chunk_size = 2 * ctl->max_stripe_size;
-		ctl->devs_max = min_t(int, ctl->devs_max,
-				      BTRFS_MAX_DEVS_SYS_CHUNK);
-	} else {
-		BUG();
-	}
+	space_info = btrfs_find_space_info(fs_devices->fs_info, ctl->type);
+	ASSERT(space_info);
+
+	ctl->max_chunk_size = atomic64_read(&space_info->chunk_size);
+	ctl->max_stripe_size = ctl->max_chunk_size;
+
+	if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
+		ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
 
 	/* We don't want a chunk larger than 10% of writable space */
 	ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v1 2/3] btrfs: expose chunk size in sysfs.
  2022-02-08 19:31 [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Stefan Roesch
  2022-02-08 19:31 ` [PATCH v1 1/3] btrfs: store chunk size in space-info struct Stefan Roesch
@ 2022-02-08 19:31 ` Stefan Roesch
  2022-04-04 16:52   ` David Sterba
  2022-02-08 19:31 ` [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation Stefan Roesch
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Stefan Roesch @ 2022-02-08 19:31 UTC (permalink / raw)
  To: linux-btrfs, kernel-team; +Cc: shr

This adds a new sysfs entry in /sys/fs/btrfs/<uuid>/allocation/<block
type>/chunk_size. This allows to query the chunk size and also set the
chunk size.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/btrfs/sysfs.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index beb7f72d50b8..ca337117f15b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -21,6 +21,7 @@
 #include "space-info.h"
 #include "block-group.h"
 #include "qgroup.h"
+#include "misc.h"
 
 /*
  * Structure name                       Path
@@ -92,6 +93,7 @@ static struct btrfs_feature_attr btrfs_attr_features_##_name = {	     \
 
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
 static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
+static inline struct kobject *get_btrfs_kobj(struct kobject *kobj);
 
 static struct btrfs_feature_attr *to_btrfs_feature_attr(struct kobj_attribute *a)
 {
@@ -708,6 +710,81 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj,	\
 }									\
 BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
 
+/*
+ * Return space info chunk size.
+ */
+static ssize_t btrfs_chunk_size_show(struct kobject *kobj,
+				     struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_space_info *sinfo = to_space_info(kobj);
+
+	return sysfs_emit(buf, "%llu\n", atomic64_read(&sinfo->chunk_size));
+}
+
+/*
+ * Store new user supplied chunk size in space info.
+ *
+ * Note: If the new chunk size value is larger than 10% of free space it is
+ *       reduced to match that limit.
+ */
+static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
+				      struct kobj_attribute *a,
+				      const char *buf, size_t len)
+{
+	struct btrfs_space_info *space_info = to_space_info(kobj);
+	struct btrfs_fs_info *fs_info = to_fs_info(get_btrfs_kobj(kobj));
+	u64 val;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!fs_info) {
+		pr_err("couldn't get fs_info\n");
+		return -EPERM;
+	}
+
+	if (sb_rdonly(fs_info->sb))
+		return -EROFS;
+
+	if (!fs_info->fs_devices)
+		return -EINVAL;
+
+	if (btrfs_is_zoned(fs_info))
+		return -EINVAL;
+
+	if (!space_info) {
+		btrfs_err(fs_info, "couldn't get space_info\n");
+		return -EPERM;
+	}
+
+	/* System block type must not be changed. */
+	if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+		return -EINVAL;
+
+	ret = kstrtoull(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	val = min(val, BTRFS_MAX_DATA_CHUNK_SIZE);
+
+	/*
+	 * Limit stripe size to 10% of available space.
+	 */
+	val = min(div_factor(fs_info->fs_devices->total_rw_bytes, 1), val);
+
+	/* Must be multiple of 256M. */
+	val &= ~(SZ_256M - 1);
+
+	/* Must be at least 256M. */
+	if (val < SZ_256M)
+		return -EINVAL;
+
+	btrfs_update_space_info_chunk_size(space_info, val);
+
+	return val;
+}
+
 SPACE_INFO_ATTR(flags);
 SPACE_INFO_ATTR(total_bytes);
 SPACE_INFO_ATTR(bytes_used);
@@ -718,6 +795,8 @@ SPACE_INFO_ATTR(bytes_readonly);
 SPACE_INFO_ATTR(bytes_zone_unusable);
 SPACE_INFO_ATTR(disk_used);
 SPACE_INFO_ATTR(disk_total);
+BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show,
+	      btrfs_chunk_size_store);
 
 /*
  * Allocation information about block group types.
@@ -735,6 +814,7 @@ static struct attribute *space_info_attrs[] = {
 	BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
 	BTRFS_ATTR_PTR(space_info, disk_used),
 	BTRFS_ATTR_PTR(space_info, disk_total),
+	BTRFS_ATTR_PTR(space_info, chunk_size),
 	NULL,
 };
 ATTRIBUTE_GROUPS(space_info);
@@ -1099,6 +1179,20 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
 	return to_fs_devs(kobj)->fs_info;
 }
 
+/*
+ * Get btrfs sysfs kobject.
+ */
+static inline struct kobject *get_btrfs_kobj(struct kobject *kobj)
+{
+	while (kobj) {
+		if (kobj->ktype == &btrfs_ktype)
+			return kobj;
+		kobj = kobj->parent;
+	}
+
+	return NULL;
+}
+
 #define NUM_FEATURE_BITS 64
 #define BTRFS_FEATURE_NAME_MAX 13
 static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation
  2022-02-08 19:31 [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Stefan Roesch
  2022-02-08 19:31 ` [PATCH v1 1/3] btrfs: store chunk size in space-info struct Stefan Roesch
  2022-02-08 19:31 ` [PATCH v1 2/3] btrfs: expose chunk size in sysfs Stefan Roesch
@ 2022-02-08 19:31 ` Stefan Roesch
  2022-04-04 17:02   ` David Sterba
  2022-02-28 19:30 ` [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Josef Bacik
  2022-04-04 16:31 ` David Sterba
  4 siblings, 1 reply; 14+ messages in thread
From: Stefan Roesch @ 2022-02-08 19:31 UTC (permalink / raw)
  To: linux-btrfs, kernel-team; +Cc: shr

This adds the force_chunk_alloc sysfs entry to be able to force a
storage allocation. This is a debugging and test feature and is
enabled with the CONFIG_BTRFS_DEBUG configuration option.

It is stored at
/sys/fs/btrfs/<uuid>/allocation/<block_type>/force_chunk_alloc.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/btrfs/sysfs.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index ca337117f15b..4f4f038bc261 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -62,6 +62,10 @@ struct raid_kobject {
 	.store	= _store,						\
 }
 
+#define BTRFS_ATTR_W(_prefix, _name, _store)			        \
+	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
+			__INIT_KOBJ_ATTR(_name, 0200, NULL, _store)
+
 #define BTRFS_ATTR_RW(_prefix, _name, _show, _store)			\
 	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
 			__INIT_KOBJ_ATTR(_name, 0644, _show, _store)
@@ -785,6 +789,54 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
 	return val;
 }
 
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Request chunk allocation with current chunk size.
+ */
+static ssize_t btrfs_force_chunk_alloc_store(struct kobject *kobj,
+					     struct kobj_attribute *a,
+					     const char *buf, size_t len)
+{
+	struct btrfs_space_info *space_info = to_space_info(kobj);
+	struct btrfs_fs_info *fs_info = to_fs_info(get_btrfs_kobj(kobj));
+	struct btrfs_trans_handle *trans;
+	unsigned long val;
+	int ret;
+
+	if (!fs_info) {
+		pr_err("couldn't get fs_info\n");
+		return -EPERM;
+	}
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (sb_rdonly(fs_info->sb))
+		return -EROFS;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val == 0)
+		return -EINVAL;
+
+	/*
+	 * Allocate new chunk.
+	 */
+	trans = btrfs_start_transaction(fs_info->tree_root, 0);
+	if (!trans)
+		return PTR_ERR(trans);
+	ret = btrfs_force_chunk_alloc(trans, space_info->flags);
+	btrfs_end_transaction(trans);
+
+	if (ret == 1)
+		return len;
+
+	return -ENOSPC;
+}
+#endif
+
 SPACE_INFO_ATTR(flags);
 SPACE_INFO_ATTR(total_bytes);
 SPACE_INFO_ATTR(bytes_used);
@@ -797,6 +849,9 @@ SPACE_INFO_ATTR(disk_used);
 SPACE_INFO_ATTR(disk_total);
 BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show,
 	      btrfs_chunk_size_store);
+#ifdef CONFIG_BTRFS_DEBUG
+BTRFS_ATTR_W(space_info, force_chunk_alloc, btrfs_force_chunk_alloc_store);
+#endif
 
 /*
  * Allocation information about block group types.
@@ -815,6 +870,9 @@ static struct attribute *space_info_attrs[] = {
 	BTRFS_ATTR_PTR(space_info, disk_used),
 	BTRFS_ATTR_PTR(space_info, disk_total),
 	BTRFS_ATTR_PTR(space_info, chunk_size),
+#ifdef CONFIG_BTRFS_DEBUG
+	BTRFS_ATTR_PTR(space_info, force_chunk_alloc),
+#endif
 	NULL,
 };
 ATTRIBUTE_GROUPS(space_info);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size
  2022-02-08 19:31 [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Stefan Roesch
                   ` (2 preceding siblings ...)
  2022-02-08 19:31 ` [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation Stefan Roesch
@ 2022-02-28 19:30 ` Josef Bacik
  2022-04-04 16:31 ` David Sterba
  4 siblings, 0 replies; 14+ messages in thread
From: Josef Bacik @ 2022-02-28 19:30 UTC (permalink / raw)
  To: Stefan Roesch; +Cc: linux-btrfs, kernel-team

On Tue, Feb 08, 2022 at 11:31:19AM -0800, Stefan Roesch wrote:
> The btrfs allocator is currently not ideal for all workloads. It tends
> to suffer from overallocating data block groups and underallocating
> metadata block groups. This results in filesystems becoming read-only
> even though there is plenty of "free" space.
> 
> This patch adds the ability to query and set the metadata allocation
> size.
> 
>   Patch 1: btrfs: store chunk size in space-info struct.
>     Store the stripe and chunk size in the btrfs_space_info structure
>     to be able to expose and set the metadata allocation size.
>     
>   Patch 2: btrfs: expose chunk size in sysfs.
>     Add a sysfs entry to read and write the above information.
>     
>   btrfs: add force_chunk_alloc sysfs entry to force allocation
>     For testing purposes and under a debug flag add a sysfs entry to
>     force a space allocation.
> 
> 

You can add

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size
  2022-02-08 19:31 [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Stefan Roesch
                   ` (3 preceding siblings ...)
  2022-02-28 19:30 ` [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Josef Bacik
@ 2022-04-04 16:31 ` David Sterba
  2022-05-27 13:34   ` David Sterba
  4 siblings, 1 reply; 14+ messages in thread
From: David Sterba @ 2022-04-04 16:31 UTC (permalink / raw)
  To: Stefan Roesch; +Cc: linux-btrfs, kernel-team

On Tue, Feb 08, 2022 at 11:31:19AM -0800, Stefan Roesch wrote:
> The btrfs allocator is currently not ideal for all workloads. It tends
> to suffer from overallocating data block groups and underallocating
> metadata block groups. This results in filesystems becoming read-only
> even though there is plenty of "free" space.
> 
> This patch adds the ability to query and set the metadata allocation
> size.
> 
>   Patch 1: btrfs: store chunk size in space-info struct.
>     Store the stripe and chunk size in the btrfs_space_info structure
>     to be able to expose and set the metadata allocation size.
>     
>   Patch 2: btrfs: expose chunk size in sysfs.
>     Add a sysfs entry to read and write the above information.
>     
>   btrfs: add force_chunk_alloc sysfs entry to force allocation
>     For testing purposes and under a debug flag add a sysfs entry to
>     force a space allocation.

Sorry for late response, I'm now reviewing the patches, there are still
some things to fix, commends under the patches.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 1/3] btrfs: store chunk size in space-info struct.
  2022-02-08 19:31 ` [PATCH v1 1/3] btrfs: store chunk size in space-info struct Stefan Roesch
@ 2022-04-04 16:36   ` David Sterba
  2022-06-16 20:53     ` Stefan Roesch
  0 siblings, 1 reply; 14+ messages in thread
From: David Sterba @ 2022-04-04 16:36 UTC (permalink / raw)
  To: Stefan Roesch; +Cc: linux-btrfs, kernel-team

On Tue, Feb 08, 2022 at 11:31:20AM -0800, Stefan Roesch wrote:
> The chunk size is stored in the btrfs_space_info structure.
> It is initialized at the start and is then used.
> 
> A new api is added to update the current chunk size.
> 
> This api is used to be able to expose the chunk_size
> as a sysfs setting.
> 
> Signed-off-by: Stefan Roesch <shr@fb.com>
> ---
>  fs/btrfs/space-info.c | 41 +++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/space-info.h |  3 +++
>  fs/btrfs/volumes.c    | 28 +++++++++-------------------
>  3 files changed, 53 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 294242c194d8..302522a250d8 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -181,6 +181,46 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
>  		found->full = 0;
>  }
>  
> +/*
> + * Compute chunk size depending on block type for regular volumes.
> + */
> +static u64 compute_chunk_size_regular(struct btrfs_fs_info *info, u64 flags)

compute_chunk_size_regular

We've been using 'calculate' or 'calc' for such helper elsewhere, please
rename it for consistency.

Also the common name for struct btrfs_fs_info is 'fs_info' and it can be
made const.

> +{
> +	ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
> +
> +	if (flags & BTRFS_BLOCK_GROUP_DATA)
> +		return SZ_1G;
> +	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
> +		return SZ_32M;
> +
> +	/* Handle BTRFS_BLOCK_GROUP_METADATA */
> +	if (info->fs_devices->total_rw_bytes > 50ULL * SZ_1G)
> +		return SZ_1G;
> +
> +	return SZ_256M;
> +}
> +
> +/*
> + * Compute chunk size depending on volume type.
> + */
> +static u64 compute_chunk_size(struct btrfs_fs_info *info, u64 flags)

Same.

> +{
> +	if (btrfs_is_zoned(info))
> +		return info->zone_size;
> +
> +	return compute_chunk_size_regular(info, flags);
> +}
> +
> +/*
> + * Update default chunk size.

That's not very helpful comment but the function is sort and clear
enough so I don't think it needs it anyway.

> + *
> + */
> +void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
> +					u64 chunk_size)
> +{
> +	atomic64_set(&space_info->chunk_size, chunk_size);
> +}
> +
>  static int create_space_info(struct btrfs_fs_info *info, u64 flags)
>  {
>  
> @@ -202,6 +242,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
>  	INIT_LIST_HEAD(&space_info->tickets);
>  	INIT_LIST_HEAD(&space_info->priority_tickets);
>  	space_info->clamp = 1;
> +	btrfs_update_space_info_chunk_size(space_info, compute_chunk_size(info, flags));
>  
>  	ret = btrfs_sysfs_add_space_info_type(info, space_info);
>  	if (ret)
> diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
> index d841fed73492..c1a64bbfb6d1 100644
> --- a/fs/btrfs/space-info.h
> +++ b/fs/btrfs/space-info.h
> @@ -23,6 +23,7 @@ struct btrfs_space_info {
>  	u64 max_extent_size;	/* This will hold the maximum extent size of
>  				   the space info if we had an ENOSPC in the
>  				   allocator. */
> +	atomic64_t chunk_size;  /* chunk size in bytes */

Why is this an atomic? I don't see the atomic semantics used anywhere,
ie. not losing increments/decrements. For plain atomic_set/atomic_read
it's exactly the same as an int or long. The right annotation here is
READ_ONCE and WRITE_ONCE, which is used for other values updated from
sysfs.

>  
>  	int clamp;		/* Used to scale our threshold for preemptive
>  				   flushing. The value is >> clamp, so turns

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 2/3] btrfs: expose chunk size in sysfs.
  2022-02-08 19:31 ` [PATCH v1 2/3] btrfs: expose chunk size in sysfs Stefan Roesch
@ 2022-04-04 16:52   ` David Sterba
  2022-06-16 20:56     ` Stefan Roesch
  0 siblings, 1 reply; 14+ messages in thread
From: David Sterba @ 2022-04-04 16:52 UTC (permalink / raw)
  To: Stefan Roesch; +Cc: linux-btrfs, kernel-team

On Tue, Feb 08, 2022 at 11:31:21AM -0800, Stefan Roesch wrote:
> This adds a new sysfs entry in /sys/fs/btrfs/<uuid>/allocation/<block
> type>/chunk_size. This allows to query the chunk size and also set the
> chunk size.
> 
> Signed-off-by: Stefan Roesch <shr@fb.com>
> ---
>  fs/btrfs/sysfs.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 94 insertions(+)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index beb7f72d50b8..ca337117f15b 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -21,6 +21,7 @@
>  #include "space-info.h"
>  #include "block-group.h"
>  #include "qgroup.h"
> +#include "misc.h"
>  
>  /*
>   * Structure name                       Path
> @@ -92,6 +93,7 @@ static struct btrfs_feature_attr btrfs_attr_features_##_name = {	     \
>  
>  static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
>  static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
> +static inline struct kobject *get_btrfs_kobj(struct kobject *kobj);
>  
>  static struct btrfs_feature_attr *to_btrfs_feature_attr(struct kobj_attribute *a)
>  {
> @@ -708,6 +710,81 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj,	\
>  }									\
>  BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
>  
> +/*
> + * Return space info chunk size.
> + */

Comment not necessary.

> +static ssize_t btrfs_chunk_size_show(struct kobject *kobj,
> +				     struct kobj_attribute *a, char *buf)
> +{
> +	struct btrfs_space_info *sinfo = to_space_info(kobj);
> +
> +	return sysfs_emit(buf, "%llu\n", atomic64_read(&sinfo->chunk_size));
> +}
> +
> +/*
> + * Store new user supplied chunk size in space info.
> + *
> + * Note: If the new chunk size value is larger than 10% of free space it is
> + *       reduced to match that limit.
> + */
> +static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
> +				      struct kobj_attribute *a,
> +				      const char *buf, size_t len)
> +{
> +	struct btrfs_space_info *space_info = to_space_info(kobj);
> +	struct btrfs_fs_info *fs_info = to_fs_info(get_btrfs_kobj(kobj));
> +	u64 val;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (!fs_info) {
> +		pr_err("couldn't get fs_info\n");

What's the reason for such error message? I would end up in the system
log without any context so it does not bring any value to user nor does
it tell what wen wrong.

Checking other functions, there's no such check in most of them and I
don't think it's needed, the filesystem can't be unmounted with open
file references to sysfs.

> +		return -EPERM;
> +	}
> +
> +	if (sb_rdonly(fs_info->sb))
> +		return -EROFS;
> +
> +	if (!fs_info->fs_devices)
> +		return -EINVAL;
> +
> +	if (btrfs_is_zoned(fs_info))
> +		return -EINVAL;
> +
> +	if (!space_info) {
> +		btrfs_err(fs_info, "couldn't get space_info\n");

Same as the error message above.

Also I'm not sure there could be a NULL space_info, it's one of the core
data structures used all the time.

> +		return -EPERM;
> +	}
> +
> +	/* System block type must not be changed. */
> +	if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
> +		return -EINVAL;

This would probably be EPERM, EINVAL is for invalid values (like out of
range).

> +
> +	ret = kstrtoull(buf, 10, &val);

As this is supposed to be used by humans, it's better to use memparse
that also translates the K/M/G suffix so it's easy to write '256M' etc.

> +	if (ret)
> +		return ret;
> +
> +	val = min(val, BTRFS_MAX_DATA_CHUNK_SIZE);
> +
> +	/*
> +	 * Limit stripe size to 10% of available space.
> +	 */
> +	val = min(div_factor(fs_info->fs_devices->total_rw_bytes, 1), val);
> +
> +	/* Must be multiple of 256M. */
> +	val &= ~(SZ_256M - 1);

Where does this requirement come from?

> +
> +	/* Must be at least 256M. */
> +	if (val < SZ_256M)
> +		return -EINVAL;
> +
> +	btrfs_update_space_info_chunk_size(space_info, val);
> +
> +	return val;
> +}
> +
>  SPACE_INFO_ATTR(flags);
>  SPACE_INFO_ATTR(total_bytes);
>  SPACE_INFO_ATTR(bytes_used);
> @@ -718,6 +795,8 @@ SPACE_INFO_ATTR(bytes_readonly);
>  SPACE_INFO_ATTR(bytes_zone_unusable);
>  SPACE_INFO_ATTR(disk_used);
>  SPACE_INFO_ATTR(disk_total);
> +BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show,
> +	      btrfs_chunk_size_store);
>  
>  /*
>   * Allocation information about block group types.
> @@ -735,6 +814,7 @@ static struct attribute *space_info_attrs[] = {
>  	BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
>  	BTRFS_ATTR_PTR(space_info, disk_used),
>  	BTRFS_ATTR_PTR(space_info, disk_total),
> +	BTRFS_ATTR_PTR(space_info, chunk_size),
>  	NULL,
>  };
>  ATTRIBUTE_GROUPS(space_info);
> @@ -1099,6 +1179,20 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
>  	return to_fs_devs(kobj)->fs_info;
>  }
>  
> +/*
> + * Get btrfs sysfs kobject.
> + */
> +static inline struct kobject *get_btrfs_kobj(struct kobject *kobj)
> +{
> +	while (kobj) {
> +		if (kobj->ktype == &btrfs_ktype)
> +			return kobj;
> +		kobj = kobj->parent;
> +	}
> +
> +	return NULL;
> +}

There's the to_fs_info helper to get fs_info from any kobj, why is
get_btrfs_kobj needed to traverse back?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation
  2022-02-08 19:31 ` [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation Stefan Roesch
@ 2022-04-04 17:02   ` David Sterba
  2022-06-16 20:59     ` Stefan Roesch
  0 siblings, 1 reply; 14+ messages in thread
From: David Sterba @ 2022-04-04 17:02 UTC (permalink / raw)
  To: Stefan Roesch; +Cc: linux-btrfs, kernel-team

On Tue, Feb 08, 2022 at 11:31:22AM -0800, Stefan Roesch wrote:
> This adds the force_chunk_alloc sysfs entry to be able to force a
> storage allocation. This is a debugging and test feature and is
> enabled with the CONFIG_BTRFS_DEBUG configuration option.
> 
> It is stored at
> /sys/fs/btrfs/<uuid>/allocation/<block_type>/force_chunk_alloc.
> 
> Signed-off-by: Stefan Roesch <shr@fb.com>
> ---
>  fs/btrfs/sysfs.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index ca337117f15b..4f4f038bc261 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -62,6 +62,10 @@ struct raid_kobject {
>  	.store	= _store,						\
>  }
>  
> +#define BTRFS_ATTR_W(_prefix, _name, _store)			        \
> +	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
> +			__INIT_KOBJ_ATTR(_name, 0200, NULL, _store)
> +
>  #define BTRFS_ATTR_RW(_prefix, _name, _show, _store)			\
>  	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
>  			__INIT_KOBJ_ATTR(_name, 0644, _show, _store)
> @@ -785,6 +789,54 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
>  	return val;
>  }
>  
> +#ifdef CONFIG_BTRFS_DEBUG
> +/*
> + * Request chunk allocation with current chunk size.
> + */
> +static ssize_t btrfs_force_chunk_alloc_store(struct kobject *kobj,
> +					     struct kobj_attribute *a,
> +					     const char *buf, size_t len)
> +{
> +	struct btrfs_space_info *space_info = to_space_info(kobj);
> +	struct btrfs_fs_info *fs_info = to_fs_info(get_btrfs_kobj(kobj));
> +	struct btrfs_trans_handle *trans;
> +	unsigned long val;
> +	int ret;
> +
> +	if (!fs_info) {
> +		pr_err("couldn't get fs_info\n");
> +		return -EPERM;
> +	}

Same comments as in patch 1

> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (sb_rdonly(fs_info->sb))
> +		return -EROFS;
> +
> +	ret = kstrtoul(buf, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val == 0)
> +		return -EINVAL;
> +
> +	/*
> +	 * Allocate new chunk.
> +	 */
> +	trans = btrfs_start_transaction(fs_info->tree_root, 0);

Starting transaction from sysfs callbacks is not considered safe due to
potentially heavy operations going on, taking locks etc. and should be
done in the follwing way:

	btrfs_set_pending(fs_info, COMMIT);
	wake_up_process(fs_info->transaction_kthread);

> +	if (!trans)
> +		return PTR_ERR(trans);
> +	ret = btrfs_force_chunk_alloc(trans, space_info->flags);

Similar here, check the function, it does a lot of things, this is not
safe from sysfs context.

This will need to be done somewhere early in the transaction commit
after setting a new pending bit here in sysfs, like the
btrfs_set_pending(..., COMMIT) does.

> +	btrfs_end_transaction(trans);
> +
> +	if (ret == 1)
> +		return len;
> +
> +	return -ENOSPC;
> +}
> +#endif
> +
>  SPACE_INFO_ATTR(flags);
>  SPACE_INFO_ATTR(total_bytes);
>  SPACE_INFO_ATTR(bytes_used);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size
  2022-04-04 16:31 ` David Sterba
@ 2022-05-27 13:34   ` David Sterba
  0 siblings, 0 replies; 14+ messages in thread
From: David Sterba @ 2022-05-27 13:34 UTC (permalink / raw)
  To: dsterba, Stefan Roesch, linux-btrfs, kernel-team

On Mon, Apr 04, 2022 at 06:31:02PM +0200, David Sterba wrote:
> On Tue, Feb 08, 2022 at 11:31:19AM -0800, Stefan Roesch wrote:
> > The btrfs allocator is currently not ideal for all workloads. It tends
> > to suffer from overallocating data block groups and underallocating
> > metadata block groups. This results in filesystems becoming read-only
> > even though there is plenty of "free" space.
> > 
> > This patch adds the ability to query and set the metadata allocation
> > size.
> > 
> >   Patch 1: btrfs: store chunk size in space-info struct.
> >     Store the stripe and chunk size in the btrfs_space_info structure
> >     to be able to expose and set the metadata allocation size.
> >     
> >   Patch 2: btrfs: expose chunk size in sysfs.
> >     Add a sysfs entry to read and write the above information.
> >     
> >   btrfs: add force_chunk_alloc sysfs entry to force allocation
> >     For testing purposes and under a debug flag add a sysfs entry to
> >     force a space allocation.
> 
> Sorry for late response, I'm now reviewing the patches, there are still
> some things to fix, commends under the

There was no update since, I've applied the fixups and merged the
patches. Changelogs have been updated to mention the individual changes
and what are the constraints of the chunk_size values.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 1/3] btrfs: store chunk size in space-info struct.
  2022-04-04 16:36   ` David Sterba
@ 2022-06-16 20:53     ` Stefan Roesch
  0 siblings, 0 replies; 14+ messages in thread
From: Stefan Roesch @ 2022-06-16 20:53 UTC (permalink / raw)
  To: dsterba, linux-btrfs, kernel-team, Josef Bacik, boris



On 4/4/22 9:36 AM, David Sterba wrote:
> On Tue, Feb 08, 2022 at 11:31:20AM -0800, Stefan Roesch wrote:
>> The chunk size is stored in the btrfs_space_info structure.
>> It is initialized at the start and is then used.
>>
>> A new api is added to update the current chunk size.
>>
>> This api is used to be able to expose the chunk_size
>> as a sysfs setting.
>>
>> Signed-off-by: Stefan Roesch <shr@fb.com>
>> ---
>>  fs/btrfs/space-info.c | 41 +++++++++++++++++++++++++++++++++++++++++
>>  fs/btrfs/space-info.h |  3 +++
>>  fs/btrfs/volumes.c    | 28 +++++++++-------------------
>>  3 files changed, 53 insertions(+), 19 deletions(-)
>>
>> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
>> index 294242c194d8..302522a250d8 100644
>> --- a/fs/btrfs/space-info.c
>> +++ b/fs/btrfs/space-info.c
>> @@ -181,6 +181,46 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
>>  		found->full = 0;
>>  }
>>  
>> +/*
>> + * Compute chunk size depending on block type for regular volumes.
>> + */
>> +static u64 compute_chunk_size_regular(struct btrfs_fs_info *info, u64 flags)
> 
> compute_chunk_size_regular
> 
> We've been using 'calculate' or 'calc' for such helper elsewhere, please
> rename it for consistency.
> 

I renamed it.

> Also the common name for struct btrfs_fs_info is 'fs_info' and it can be
> made const.
>

I made it const and called it fs_info.
 
>> +{
>> +	ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
>> +
>> +	if (flags & BTRFS_BLOCK_GROUP_DATA)
>> +		return SZ_1G;
>> +	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
>> +		return SZ_32M;
>> +
>> +	/* Handle BTRFS_BLOCK_GROUP_METADATA */
>> +	if (info->fs_devices->total_rw_bytes > 50ULL * SZ_1G)
>> +		return SZ_1G;
>> +
>> +	return SZ_256M;
>> +}
>> +
>> +/*
>> + * Compute chunk size depending on volume type.
>> + */
>> +static u64 compute_chunk_size(struct btrfs_fs_info *info, u64 flags)
> 
> Same.

Same as above.

> 
>> +{
>> +	if (btrfs_is_zoned(info))
>> +		return info->zone_size;
>> +
>> +	return compute_chunk_size_regular(info, flags);
>> +}
>> +
>> +/*
>> + * Update default chunk size.
> 
> That's not very helpful comment but the function is sort and clear
> enough so I don't think it needs it anyway.
> 

I removed it.

>> + *
>> + */
>> +void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
>> +					u64 chunk_size)
>> +{
>> +	atomic64_set(&space_info->chunk_size, chunk_size);
>> +}
>> +
>>  static int create_space_info(struct btrfs_fs_info *info, u64 flags)
>>  {
>>  
>> @@ -202,6 +242,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
>>  	INIT_LIST_HEAD(&space_info->tickets);
>>  	INIT_LIST_HEAD(&space_info->priority_tickets);
>>  	space_info->clamp = 1;
>> +	btrfs_update_space_info_chunk_size(space_info, compute_chunk_size(info, flags));
>>  
>>  	ret = btrfs_sysfs_add_space_info_type(info, space_info);
>>  	if (ret)
>> diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
>> index d841fed73492..c1a64bbfb6d1 100644
>> --- a/fs/btrfs/space-info.h
>> +++ b/fs/btrfs/space-info.h
>> @@ -23,6 +23,7 @@ struct btrfs_space_info {
>>  	u64 max_extent_size;	/* This will hold the maximum extent size of
>>  				   the space info if we had an ENOSPC in the
>>  				   allocator. */
>> +	atomic64_t chunk_size;  /* chunk size in bytes */
> 
> Why is this an atomic? I don't see the atomic semantics used anywhere,
> ie. not losing increments/decrements. For plain atomic_set/atomic_read
> it's exactly the same as an int or long. The right annotation here is
> READ_ONCE and WRITE_ONCE, which is used for other values updated from
> sysfs.
> 

I removed the atomic and used READ_ONCE and WRITE_ONCE.

>>  
>>  	int clamp;		/* Used to scale our threshold for preemptive
>>  				   flushing. The value is >> clamp, so turns

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 2/3] btrfs: expose chunk size in sysfs.
  2022-04-04 16:52   ` David Sterba
@ 2022-06-16 20:56     ` Stefan Roesch
  0 siblings, 0 replies; 14+ messages in thread
From: Stefan Roesch @ 2022-06-16 20:56 UTC (permalink / raw)
  To: dsterba, linux-btrfs, kernel-team, Josef Bacik, boris



On 4/4/22 9:52 AM, David Sterba wrote:
> On Tue, Feb 08, 2022 at 11:31:21AM -0800, Stefan Roesch wrote:
>> This adds a new sysfs entry in /sys/fs/btrfs/<uuid>/allocation/<block
>> type>/chunk_size. This allows to query the chunk size and also set the
>> chunk size.
>>
>> Signed-off-by: Stefan Roesch <shr@fb.com>
>> ---
>>  fs/btrfs/sysfs.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 94 insertions(+)
>>
>> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
>> index beb7f72d50b8..ca337117f15b 100644
>> --- a/fs/btrfs/sysfs.c
>> +++ b/fs/btrfs/sysfs.c
>> @@ -21,6 +21,7 @@
>>  #include "space-info.h"
>>  #include "block-group.h"
>>  #include "qgroup.h"
>> +#include "misc.h"
>>  
>>  /*
>>   * Structure name                       Path
>> @@ -92,6 +93,7 @@ static struct btrfs_feature_attr btrfs_attr_features_##_name = {	     \
>>  
>>  static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
>>  static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
>> +static inline struct kobject *get_btrfs_kobj(struct kobject *kobj);
>>  
>>  static struct btrfs_feature_attr *to_btrfs_feature_attr(struct kobj_attribute *a)
>>  {
>> @@ -708,6 +710,81 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj,	\
>>  }									\
>>  BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
>>  
>> +/*
>> + * Return space info chunk size.
>> + */
> 
> Comment not necessary.
> 

Removed the comment.

>> +static ssize_t btrfs_chunk_size_show(struct kobject *kobj,
>> +				     struct kobj_attribute *a, char *buf)
>> +{
>> +	struct btrfs_space_info *sinfo = to_space_info(kobj);
>> +
>> +	return sysfs_emit(buf, "%llu\n", atomic64_read(&sinfo->chunk_size));
>> +}
>> +
>> +/*
>> + * Store new user supplied chunk size in space info.
>> + *
>> + * Note: If the new chunk size value is larger than 10% of free space it is
>> + *       reduced to match that limit.
>> + */
>> +static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
>> +				      struct kobj_attribute *a,
>> +				      const char *buf, size_t len)
>> +{
>> +	struct btrfs_space_info *space_info = to_space_info(kobj);
>> +	struct btrfs_fs_info *fs_info = to_fs_info(get_btrfs_kobj(kobj));
>> +	u64 val;
>> +	int ret;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +
>> +	if (!fs_info) {
>> +		pr_err("couldn't get fs_info\n");
> 
> What's the reason for such error message? I would end up in the system
> log without any context so it does not bring any value to user nor does
> it tell what wen wrong.
> 
> Checking other functions, there's no such check in most of them and I
> don't think it's needed, the filesystem can't be unmounted with open
> file references to sysfs.
>

I removed the check.
 
>> +		return -EPERM;
>> +	}
>> +
>> +	if (sb_rdonly(fs_info->sb))
>> +		return -EROFS;
>> +
>> +	if (!fs_info->fs_devices)
>> +		return -EINVAL;
>> +
>> +	if (btrfs_is_zoned(fs_info))
>> +		return -EINVAL;
>> +
>> +	if (!space_info) {
>> +		btrfs_err(fs_info, "couldn't get space_info\n");
> 
> Same as the error message above.
> 

Removed the check.

> Also I'm not sure there could be a NULL space_info, it's one of the core
> data structures used all the time.
> 
>> +		return -EPERM;
>> +	}
>> +
>> +	/* System block type must not be changed. */
>> +	if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
>> +		return -EINVAL;
> 
> This would probably be EPERM, EINVAL is for invalid values (like out of
> range).
>

I changed the return value to EPERM.
 
>> +
>> +	ret = kstrtoull(buf, 10, &val);
> 
> As this is supposed to be used by humans, it's better to use memparse
> that also translates the K/M/G suffix so it's easy to write '256M' etc.
> 
>> +	if (ret)
>> +		return ret;
>> +
>> +	val = min(val, BTRFS_MAX_DATA_CHUNK_SIZE);
>> +
>> +	/*
>> +	 * Limit stripe size to 10% of available space.
>> +	 */
>> +	val = min(div_factor(fs_info->fs_devices->total_rw_bytes, 1), val);
>> +
>> +	/* Must be multiple of 256M. */
>> +	val &= ~(SZ_256M - 1);
> 
> Where does this requirement come from?

One of the reviewers wanted it to be multiple of 256M to avoid fragmentation.

> 
>> +
>> +	/* Must be at least 256M. */
>> +	if (val < SZ_256M)
>> +		return -EINVAL;
>> +
>> +	btrfs_update_space_info_chunk_size(space_info, val);
>> +
>> +	return val;
>> +}
>> +
>>  SPACE_INFO_ATTR(flags);
>>  SPACE_INFO_ATTR(total_bytes);
>>  SPACE_INFO_ATTR(bytes_used);
>> @@ -718,6 +795,8 @@ SPACE_INFO_ATTR(bytes_readonly);
>>  SPACE_INFO_ATTR(bytes_zone_unusable);
>>  SPACE_INFO_ATTR(disk_used);
>>  SPACE_INFO_ATTR(disk_total);
>> +BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show,
>> +	      btrfs_chunk_size_store);
>>  
>>  /*
>>   * Allocation information about block group types.
>> @@ -735,6 +814,7 @@ static struct attribute *space_info_attrs[] = {
>>  	BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
>>  	BTRFS_ATTR_PTR(space_info, disk_used),
>>  	BTRFS_ATTR_PTR(space_info, disk_total),
>> +	BTRFS_ATTR_PTR(space_info, chunk_size),
>>  	NULL,
>>  };
>>  ATTRIBUTE_GROUPS(space_info);
>> @@ -1099,6 +1179,20 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
>>  	return to_fs_devs(kobj)->fs_info;
>>  }
>>  
>> +/*
>> + * Get btrfs sysfs kobject.
>> + */
>> +static inline struct kobject *get_btrfs_kobj(struct kobject *kobj)
>> +{
>> +	while (kobj) {
>> +		if (kobj->ktype == &btrfs_ktype)
>> +			return kobj;
>> +		kobj = kobj->parent;
>> +	}
>> +
>> +	return NULL;
>> +}
> 
> There's the to_fs_info helper to get fs_info from any kobj, why is
> get_btrfs_kobj needed to traverse back?

The kobj for the new sysfs entry is not of type btrfs_ktype. Instead the
kobj->parent->parent is of type btrfs_ktype.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation
  2022-04-04 17:02   ` David Sterba
@ 2022-06-16 20:59     ` Stefan Roesch
  2022-06-17 14:14       ` David Sterba
  0 siblings, 1 reply; 14+ messages in thread
From: Stefan Roesch @ 2022-06-16 20:59 UTC (permalink / raw)
  To: dsterba, linux-btrfs, kernel-team, Josef Bacik, boris



On 4/4/22 10:02 AM, David Sterba wrote:
> On Tue, Feb 08, 2022 at 11:31:22AM -0800, Stefan Roesch wrote:
>> This adds the force_chunk_alloc sysfs entry to be able to force a
>> storage allocation. This is a debugging and test feature and is
>> enabled with the CONFIG_BTRFS_DEBUG configuration option.
>>
>> It is stored at
>> /sys/fs/btrfs/<uuid>/allocation/<block_type>/force_chunk_alloc.
>>
>> Signed-off-by: Stefan Roesch <shr@fb.com>
>> ---
>>  fs/btrfs/sysfs.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>>
>> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
>> index ca337117f15b..4f4f038bc261 100644
>> --- a/fs/btrfs/sysfs.c
>> +++ b/fs/btrfs/sysfs.c
>> @@ -62,6 +62,10 @@ struct raid_kobject {
>>  	.store	= _store,						\
>>  }
>>  
>> +#define BTRFS_ATTR_W(_prefix, _name, _store)			        \
>> +	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
>> +			__INIT_KOBJ_ATTR(_name, 0200, NULL, _store)
>> +
>>  #define BTRFS_ATTR_RW(_prefix, _name, _show, _store)			\
>>  	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
>>  			__INIT_KOBJ_ATTR(_name, 0644, _show, _store)
>> @@ -785,6 +789,54 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
>>  	return val;
>>  }
>>  
>> +#ifdef CONFIG_BTRFS_DEBUG
>> +/*
>> + * Request chunk allocation with current chunk size.
>> + */
>> +static ssize_t btrfs_force_chunk_alloc_store(struct kobject *kobj,
>> +					     struct kobj_attribute *a,
>> +					     const char *buf, size_t len)
>> +{
>> +	struct btrfs_space_info *space_info = to_space_info(kobj);
>> +	struct btrfs_fs_info *fs_info = to_fs_info(get_btrfs_kobj(kobj));
>> +	struct btrfs_trans_handle *trans;
>> +	unsigned long val;
>> +	int ret;
>> +
>> +	if (!fs_info) {
>> +		pr_err("couldn't get fs_info\n");
>> +		return -EPERM;
>> +	}
> 
> Same comments as in patch 1
> 

Removed the check.

>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +
>> +	if (sb_rdonly(fs_info->sb))
>> +		return -EROFS;
>> +
>> +	ret = kstrtoul(buf, 10, &val);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (val == 0)
>> +		return -EINVAL;
>> +
>> +	/*
>> +	 * Allocate new chunk.
>> +	 */
>> +	trans = btrfs_start_transaction(fs_info->tree_root, 0);
> 
> Starting transaction from sysfs callbacks is not considered safe due to
> potentially heavy operations going on, taking locks etc. and should be
> done in the follwing way:
> 
> 	btrfs_set_pending(fs_info, COMMIT);
> 	wake_up_process(fs_info->transaction_kthread);
> 
>> +	if (!trans)
>> +		return PTR_ERR(trans);
>> +	ret = btrfs_force_chunk_alloc(trans, space_info->flags);
> 
> Similar here, check the function, it does a lot of things, this is not
> safe from sysfs context.
> 
> This will need to be done somewhere early in the transaction commit
> after setting a new pending bit here in sysfs, like the
> btrfs_set_pending(..., COMMIT) does.
> 

I discussed this with Josef and Boris. Using a work queue to process these
requests seems to be more adequate then using pending transaction flags.
Any thoughts?

>> +	btrfs_end_transaction(trans);
>> +
>> +	if (ret == 1)
>> +		return len;
>> +
>> +	return -ENOSPC;
>> +}
>> +#endif
>> +
>>  SPACE_INFO_ATTR(flags);
>>  SPACE_INFO_ATTR(total_bytes);
>>  SPACE_INFO_ATTR(bytes_used);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation
  2022-06-16 20:59     ` Stefan Roesch
@ 2022-06-17 14:14       ` David Sterba
  0 siblings, 0 replies; 14+ messages in thread
From: David Sterba @ 2022-06-17 14:14 UTC (permalink / raw)
  To: Stefan Roesch; +Cc: dsterba, linux-btrfs, kernel-team, Josef Bacik, boris

On Thu, Jun 16, 2022 at 01:59:47PM -0700, Stefan Roesch wrote:
> On 4/4/22 10:02 AM, David Sterba wrote:
> > On Tue, Feb 08, 2022 at 11:31:22AM -0800, Stefan Roesch wrote:
> > Starting transaction from sysfs callbacks is not considered safe due to
> > potentially heavy operations going on, taking locks etc. and should be
> > done in the follwing way:
> > 
> > 	btrfs_set_pending(fs_info, COMMIT);
> > 	wake_up_process(fs_info->transaction_kthread);
> > 
> >> +	if (!trans)
> >> +		return PTR_ERR(trans);
> >> +	ret = btrfs_force_chunk_alloc(trans, space_info->flags);
> > 
> > Similar here, check the function, it does a lot of things, this is not
> > safe from sysfs context.
> > 
> > This will need to be done somewhere early in the transaction commit
> > after setting a new pending bit here in sysfs, like the
> > btrfs_set_pending(..., COMMIT) does.
> > 
> 
> I discussed this with Josef and Boris. Using a work queue to process these
> requests seems to be more adequate then using pending transaction flags.
> Any thoughts?

I've left is as is, commit from sysfs though it's not safe, the file is
available only under debug.

Otherwise, the patches have been in misc-next with my fixups so you
don't need to resend.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2022-06-17 14:19 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-08 19:31 [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Stefan Roesch
2022-02-08 19:31 ` [PATCH v1 1/3] btrfs: store chunk size in space-info struct Stefan Roesch
2022-04-04 16:36   ` David Sterba
2022-06-16 20:53     ` Stefan Roesch
2022-02-08 19:31 ` [PATCH v1 2/3] btrfs: expose chunk size in sysfs Stefan Roesch
2022-04-04 16:52   ` David Sterba
2022-06-16 20:56     ` Stefan Roesch
2022-02-08 19:31 ` [PATCH v1 3/3] btrfs: add force_chunk_alloc sysfs entry to force allocation Stefan Roesch
2022-04-04 17:02   ` David Sterba
2022-06-16 20:59     ` Stefan Roesch
2022-06-17 14:14       ` David Sterba
2022-02-28 19:30 ` [PATCH v1 0/3] btrfs: add sysfs switch to get/set metadata size Josef Bacik
2022-04-04 16:31 ` David Sterba
2022-05-27 13:34   ` David Sterba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).