All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anand Jain <anand.jain@oracle.com>
To: linux-btrfs@vger.kernel.org
Cc: josef@toxicpanda.com, kreijack@libero.it
Subject: [RFC][PATCH] btrfs: sysfs for chunk layout hint
Date: Thu, 18 Feb 2021 09:20:19 -0800	[thread overview]
Message-ID: <0ed770d6d5e37fc942f3034d917d2b38477d7d20.1613668002.git.anand.jain@oracle.com> (raw)

btrfs_chunk_alloc() uses dev_alloc_list to allocate new chunks. The
function's stack leading to btrfs_cmp_device_info() sorts the
dev_alloc_list in the descending order of unallocated space. This
sorting helps to maximize the filesystem space.

But, there might be other types of preferences when allocating the
chunks. For example, allocation by device latency, with which the
metadata could go to the device with the least latency.

This patch is a preparatory patch and makes the existing allocation
layout a configurable parameter using sysfs, as shown below.

cd /sys/fs/btrfs/863c787e-fdbd-49ca-a0ea-22f36934ff1f
cat chunk_layout_data
[size]
cat chunk_layout_metadata
[size]

We could add more chunk allocation types by adding to the list in
enum btrfs_chunk_layout{ }.

This is only a preparatory patch. The parameter is only an in-memory
as of now. A persistent disk structure can be added on top of this
when we have a consensus.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
This + sequential chunk layout hint (experimental) (patch not yet sent)
helped me get consistent performance numbers for read_policy pid.
As chunk layout hint is not set at mkfs, a balance after setting the
desired chunk layout hint is needed.

 fs/btrfs/ctree.h   |  3 ++
 fs/btrfs/disk-io.c |  3 ++
 fs/btrfs/sysfs.c   | 98 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.c |  4 +-
 fs/btrfs/volumes.h | 10 +++++
 5 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3bc00aed13b2..c37bd2d7f5d4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -993,6 +993,9 @@ struct btrfs_fs_info {
 	spinlock_t eb_leak_lock;
 	struct list_head allocated_ebs;
 #endif
+
+	int chunk_layout_data;
+	int chunk_layout_metadata;
 };
 
 static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c2576c5fe62e..c81f95339a35 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2890,6 +2890,9 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	fs_info->swapfile_pins = RB_ROOT;
 
 	fs_info->send_in_progress = 0;
+
+	fs_info->chunk_layout_data = BTRFS_CHUNK_LAYOUT_SIZE;
+	fs_info->chunk_layout_metadata = BTRFS_CHUNK_LAYOUT_SIZE;
 }
 
 static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 30e1cfcaa925..788784b1ed44 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -967,6 +967,102 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
 }
 BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
 
+static const char * const btrfs_chunk_layout_name[] = { "size" };
+
+static ssize_t btrfs_chunk_layout_data_show(struct kobject *kobj,
+					    struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+	ssize_t ret = 0;
+	int i;
+
+	for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+		if (fs_info->chunk_layout_data == i)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
+					 (ret == 0 ? "" : " "),
+					 btrfs_chunk_layout_name[i]);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+					 (ret == 0 ? "" : " "),
+					 btrfs_chunk_layout_name[i]);
+	}
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+	return ret;
+}
+
+static ssize_t btrfs_chunk_layout_data_store(struct kobject *kobj,
+					     struct kobj_attribute *a,
+					     const char *buf, size_t len)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+	int i;
+
+	for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+		if (strmatch(buf, btrfs_chunk_layout_name[i])) {
+			if (i != fs_info->chunk_layout_data) {
+				fs_info->chunk_layout_data = i;
+				btrfs_info(fs_info, "chunk_layout_data set to '%s'",
+					   btrfs_chunk_layout_name[i]);
+			}
+			return len;
+		}
+	}
+
+	return -EINVAL;
+}
+BTRFS_ATTR_RW(, chunk_layout_data, btrfs_chunk_layout_data_show,
+	      btrfs_chunk_layout_data_store);
+
+static ssize_t btrfs_chunk_layout_metadata_show(struct kobject *kobj,
+						struct kobj_attribute *a,
+						char *buf)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+	ssize_t ret = 0;
+	int i;
+
+	for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+		if (fs_info->chunk_layout_metadata == i)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
+					 (ret == 0 ? "" : " "),
+					 btrfs_chunk_layout_name[i]);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+					 (ret == 0 ? "" : " "),
+					 btrfs_chunk_layout_name[i]);
+	}
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+	return ret;
+}
+
+static ssize_t btrfs_chunk_layout_metadata_store(struct kobject *kobj,
+						 struct kobj_attribute *a,
+						 const char *buf, size_t len)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+	int i;
+
+	for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+		if (strmatch(buf, btrfs_chunk_layout_name[i])) {
+			if (i != fs_info->chunk_layout_metadata) {
+				fs_info->chunk_layout_metadata = i;
+				btrfs_info(fs_info,
+					   "chunk_layout_metadata set to '%s'",
+					   btrfs_chunk_layout_name[i]);
+			}
+			return len;
+		}
+	}
+
+	return -EINVAL;
+}
+BTRFS_ATTR_RW(, chunk_layout_metadata, btrfs_chunk_layout_metadata_show,
+	      btrfs_chunk_layout_metadata_store);
+
 static const struct attribute *btrfs_attrs[] = {
 	BTRFS_ATTR_PTR(, label),
 	BTRFS_ATTR_PTR(, nodesize),
@@ -978,6 +1074,8 @@ static const struct attribute *btrfs_attrs[] = {
 	BTRFS_ATTR_PTR(, exclusive_operation),
 	BTRFS_ATTR_PTR(, generation),
 	BTRFS_ATTR_PTR(, read_policy),
+	BTRFS_ATTR_PTR(, chunk_layout_data),
+	BTRFS_ATTR_PTR(, chunk_layout_metadata),
 	NULL,
 };
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d1ba160ef73b..2223c4263d4a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5097,7 +5097,9 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
 	ctl->ndevs = ndevs;
 
 	/*
-	 * now sort the devices by hole size / available space
+	 * Now sort the devices by hole size / available space.
+	 * This sort helps to pick device(s) with larger space.
+	 * That is BTRFS_CHUNK_LAYOUT_SIZE.
 	 */
 	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
 	     btrfs_cmp_device_info, NULL);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d0a90dc7fc03..b514d09f4ba8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -218,6 +218,16 @@ enum btrfs_chunk_allocation_policy {
 	BTRFS_CHUNK_ALLOC_ZONED,
 };
 
+/*
+ * If we have more than the required number of the devices for striping,
+ * chunk_layout let us know which device to use.
+ */
+enum btrfs_chunk_layout {
+	/* Use in the order of the size of the unallocated space on the device */
+	BTRFS_CHUNK_LAYOUT_SIZE,
+	BTRFS_NR_CHUNK_LAYOUT,
+};
+
 /*
  * Read policies for mirrored block group profiles, read picks the stripe based
  * on these policies.
-- 
2.27.0


             reply	other threads:[~2021-02-18 18:18 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-18 17:20 Anand Jain [this message]
2021-02-18 19:11 ` [RFC][PATCH] btrfs: sysfs for chunk layout hint Goffredo Baroncelli
2021-02-19  3:06   ` Anand Jain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0ed770d6d5e37fc942f3034d917d2b38477d7d20.1613668002.git.anand.jain@oracle.com \
    --to=anand.jain@oracle.com \
    --cc=josef@toxicpanda.com \
    --cc=kreijack@libero.it \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.