All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anand Jain <anand.jain@oracle.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 3/3] btrfs: add readmirror devid property
Date: Thu, 25 Apr 2019 19:59:46 +0800	[thread overview]
Message-ID: <20190425115946.2550-4-anand.jain@oracle.com> (raw)
In-Reply-To: <20190425115946.2550-1-anand.jain@oracle.com>

Introduces devid readmirror property, to direct read IO to the specified
device(s).

The readmirror property is stored as extended attribute on the root
inode. The readmirror input format is devid1,2,3.. etc. And for the
each devid provided, a new flag BTRFS_DEV_STATE_READ_OPTIMISED is set.

As of now readmirror by devid supports only raid1s. Raid10 support has
to leverage device grouping feature, which is yet to be implemented.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/props.c   | 69 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/volumes.c | 16 +++++++++++
 fs/btrfs/volumes.h |  2 ++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 1d44fbdd5891..c5d17e29ab4e 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -320,7 +320,10 @@ static const char *prop_compression_extract(struct inode *inode)
 static int prop_readmirror_validate(struct inode *inode, const char *value,
 				    size_t len)
 {
+	char *value_dup;
+	char *devid_str;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 
 	if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID)
 		return -EINVAL;
@@ -328,16 +331,80 @@ static int prop_readmirror_validate(struct inode *inode, const char *value,
 	if (!len)
 		return 0;
 
-	return -EINVAL;
+	if (len <= 5 || strncmp("devid", value, 5))
+		return -EINVAL;
+
+	value_dup = kstrndup(value + 5, len - 5, GFP_KERNEL);
+	if (!value_dup)
+		return -ENOMEM;
+
+	while ((devid_str = strsep(&value_dup, ",")) != NULL) {
+		u64 devid;
+		struct btrfs_device *device;
+
+		if (kstrtoull(devid_str, 10, &devid)) {
+			kfree(value_dup);
+			return -EINVAL;
+		}
+
+		device = btrfs_find_device(fs_devices, devid, NULL, NULL, false);
+		if (!device) {
+			kfree(value_dup);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
 }
 
 static int prop_readmirror_apply(struct inode *inode, const char *value,
 				 size_t len)
 {
+	char *value_dup;
+	char *devid_str;
+	struct btrfs_device *device;
 	struct btrfs_fs_devices *fs_devices = btrfs_sb(inode->i_sb)->fs_devices;
 
+	if (value) {
+		value_dup = kstrndup(value + 5, len - 5, GFP_KERNEL);
+		if (!value_dup)
+			return -ENOMEM;
+	}
+
+	/* Both set and reset has to clear the exisiting values */
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		if (test_bit(BTRFS_DEV_STATE_READ_OPTIMISED,
+			     &device->dev_state)) {
+			clear_bit(BTRFS_DEV_STATE_READ_OPTIMISED,
+				  &device->dev_state);
+		}
+	}
 	fs_devices->readmirror_policy = BTRFS_READMIRROR_DEFAULT;
 
+	/* Its only reset so just return */
+	if (!value)
+		return 0;
+
+	while ((devid_str = strsep(&value_dup, ",")) != NULL) {
+		u64 devid;
+
+		/* Has been verified in validate() this will not fail */
+		if (kstrtoull(devid_str, 10, &devid)) {
+			kfree(value_dup);
+			return -EINVAL;
+		}
+
+		device = btrfs_find_device(fs_devices, devid, NULL, NULL, false);
+		if (!device) {
+			kfree(value_dup);
+			return -EINVAL;
+		}
+
+		set_bit(BTRFS_DEV_STATE_READ_OPTIMISED, &device->dev_state);
+		fs_devices->readmirror_policy = BTRFS_READMIRROR_DEVID;
+	}
+
+	kfree(value_dup);
 	return 0;
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 64c3a2d8b264..90cbc03a20f6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5495,6 +5495,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	int preferred_mirror;
 	int tolerance;
 	struct btrfs_device *srcdev;
+	bool found = false;
 
 	ASSERT((map->type &
 		 (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
@@ -5505,6 +5506,21 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 		num_stripes = map->num_stripes;
 
 	switch(fs_info->fs_devices->readmirror_policy) {
+	case BTRFS_READMIRROR_DEVID:
+		/* skip raid10 for now */
+		if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+			for (i = first; i < first + num_stripes; i++) {
+				if (test_bit(BTRFS_DEV_STATE_READ_OPTIMISED,
+					     &map->stripes[i].dev->dev_state)) {
+					preferred_mirror = i;
+					found = true;
+					break;
+				}
+			}
+			if (found)
+				break;
+		}
+		/* fall through */
 	case BTRFS_READMIRROR_DEFAULT:
 		/* fall through */
 	default:
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5dffaa6f9d18..6c223790dc4c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -41,6 +41,7 @@ struct btrfs_pending_bios {
 #define BTRFS_DEV_STATE_MISSING		(2)
 #define BTRFS_DEV_STATE_REPLACE_TGT	(3)
 #define BTRFS_DEV_STATE_FLUSH_SENT	(4)
+#define BTRFS_DEV_STATE_READ_OPTIMISED	(5)
 
 struct btrfs_device {
 	struct list_head dev_list;
@@ -206,6 +207,7 @@ BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
 
 enum btrfs_readmirror_policy {
 	BTRFS_READMIRROR_DEFAULT,
+	BTRFS_READMIRROR_DEVID,
 };
 
 struct btrfs_fs_devices {
-- 
2.20.1 (Apple Git-117)


  parent reply	other threads:[~2019-04-25 12:00 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-25 11:59 [PATCH 0/3] readmirror feature Anand Jain
2019-04-25 11:59 ` [PATCH 1/3] btrfs: add inode pointer to prop_handler::validate() Anand Jain
2019-04-25 11:59 ` [PATCH 2/3] btrfs: add readmirror property framework Anand Jain
2019-04-25 11:59 ` Anand Jain [this message]
2019-04-25 12:00 ` [PATCH 1/2] btrfs-progs: add helper to create xattr name Anand Jain
2019-04-25 12:00   ` [PATCH 2/2] btrfs-progs: add readmirror policy Anand Jain
2019-05-11 15:24 ` [PATCH 0/3] readmirror feature Steven Davies
2019-05-28 13:56   ` Anand Jain
  -- strict thread matches above, loose matches on Subject: below --
2019-04-25 11:55 Anand Jain
2019-04-25 11:55 ` [PATCH 3/3] btrfs: add readmirror devid property Anand Jain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190425115946.2550-4-anand.jain@oracle.com \
    --to=anand.jain@oracle.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.