linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@redhat.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH] Btrfs-progs: add support for mixed data+metadata block groups V3
Date: Wed, 27 Oct 2010 14:37:34 -0400	[thread overview]
Message-ID: <1288204654-2127-1-git-send-email-josef@redhat.com> (raw)

So alot of crazy people (I'm looking at you Meego) want to use btrfs on phones
and such with small devices.  Unfortunately the way we split out metadata/data
chunks it makes space usage inefficient for volumes that are smaller than
1gigabyte.  So add a -M option for mixing metadata+data, and default to this
mixed mode if the filesystem is less than or equal to 1 gigabyte.  I've tested
this with xfstests on a 100mb filesystem and everything is a-ok.

Signed-off-by: Josef Bacik <josef@redhat.com>
---
V2->V3: Add an incompat flag since the kernel will panic with mixed block groups
unless its had a few key fixes.

V1->V2: My original patch was screwing up if you specified the data or metadata
profile.  This update fixes that problem.

 btrfs-vol.c  |    4 +-
 btrfs_cmds.c |   13 +++++-
 ctree.h      |    4 +-
 mkfs.c       |  122 +++++++++++++++++++++++++++++++++++++++++-----------------
 utils.c      |   10 ++--
 utils.h      |    2 +-
 6 files changed, 109 insertions(+), 46 deletions(-)

diff --git a/btrfs-vol.c b/btrfs-vol.c
index 8069778..7200bbc 100644
--- a/btrfs-vol.c
+++ b/btrfs-vol.c
@@ -129,7 +129,9 @@ int main(int ac, char **av)
 		exit(1);
 	}
 	if (cmd == BTRFS_IOC_ADD_DEV) {
-		ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count);
+		int mixed = 0;
+
+		ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count, &mixed);
 		if (ret) {
 			fprintf(stderr, "Unable to init %s\n", device);
 			exit(1);
diff --git a/btrfs_cmds.c b/btrfs_cmds.c
index 8031c58..683aec0 100644
--- a/btrfs_cmds.c
+++ b/btrfs_cmds.c
@@ -705,6 +705,7 @@ int do_add_volume(int nargs, char **args)
 		int	devfd, res;
 		u64 dev_block_count = 0;
 		struct stat st;
+		int mixed = 0;
 
 		devfd = open(args[i], O_RDWR);
 		if (!devfd) {
@@ -727,7 +728,7 @@ int do_add_volume(int nargs, char **args)
 			continue;
 		}
 
-		res = btrfs_prepare_device(devfd, args[i], 1, &dev_block_count);
+		res = btrfs_prepare_device(devfd, args[i], 1, &dev_block_count, &mixed);
 		if (res) {
 			fprintf(stderr, "ERROR: Unable to init '%s'\n", args[i]);
 			close(devfd);
@@ -889,8 +890,14 @@ int do_df_filesystem(int nargs, char **argv)
 		memset(description, 0, 80);
 
 		if (flags & BTRFS_BLOCK_GROUP_DATA) {
-			snprintf(description, 5, "%s", "Data");
-			written += 4;
+			if (flags & BTRFS_BLOCK_GROUP_METADATA) {
+				snprintf(description, 15, "%s",
+					 "Data+Metadata");
+				written += 14;
+			} else {
+				snprintf(description, 5, "%s", "Data");
+				written += 4;
+			}
 		} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) {
 			snprintf(description, 7, "%s", "System");
 			written += 6;
diff --git a/ctree.h b/ctree.h
index 4d0e1e2..8276e5e 100644
--- a/ctree.h
+++ b/ctree.h
@@ -352,13 +352,15 @@ struct btrfs_super_block {
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
 #define BTRFS_FEATURE_INCOMPAT_SPACE_CACHE	(1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 3)
 
 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
-	 BTRFS_FEATURE_INCOMPAT_SPACE_CACHE)
+	 BTRFS_FEATURE_INCOMPAT_SPACE_CACHE |		\
+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
diff --git a/mkfs.c b/mkfs.c
index 2e99b95..04de93a 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -69,7 +69,7 @@ static u64 parse_size(char *s)
 	return atol(s) * mult;
 }
 
-static int make_root_dir(struct btrfs_root *root)
+static int make_root_dir(struct btrfs_root *root, int mixed)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_key location;
@@ -88,30 +88,47 @@ static int make_root_dir(struct btrfs_root *root)
 				     0, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
 	BUG_ON(ret);
 
-	ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
-				&chunk_start, &chunk_size,
-				BTRFS_BLOCK_GROUP_METADATA);
-	BUG_ON(ret);
-	ret = btrfs_make_block_group(trans, root, 0,
-				     BTRFS_BLOCK_GROUP_METADATA,
-				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-				     chunk_start, chunk_size);
-	BUG_ON(ret);
+	if (mixed) {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size,
+					BTRFS_BLOCK_GROUP_METADATA |
+					BTRFS_BLOCK_GROUP_DATA);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root, 0,
+					     BTRFS_BLOCK_GROUP_METADATA |
+					     BTRFS_BLOCK_GROUP_DATA,
+					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+		printf("Created a data/metadata chunk of size %llu\n", chunk_size);
+	} else {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size,
+					BTRFS_BLOCK_GROUP_METADATA);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root, 0,
+					     BTRFS_BLOCK_GROUP_METADATA,
+					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+	}
 
 	root->fs_info->system_allocs = 0;
 	btrfs_commit_transaction(trans, root);
 	trans = btrfs_start_transaction(root, 1);
 	BUG_ON(!trans);
 
-	ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
-				&chunk_start, &chunk_size,
-				BTRFS_BLOCK_GROUP_DATA);
-	BUG_ON(ret);
-	ret = btrfs_make_block_group(trans, root, 0,
-				     BTRFS_BLOCK_GROUP_DATA,
-				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-				     chunk_start, chunk_size);
-	BUG_ON(ret);
+	if (!mixed) {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size,
+					BTRFS_BLOCK_GROUP_DATA);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root, 0,
+					     BTRFS_BLOCK_GROUP_DATA,
+					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+	}
 
 	ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
 			      BTRFS_ROOT_TREE_DIR_OBJECTID);
@@ -200,7 +217,7 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans,
 
 static int create_raid_groups(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root, u64 data_profile,
-			      u64 metadata_profile)
+			      u64 metadata_profile, int mixed)
 {
 	u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
 	u64 allowed;
@@ -215,20 +232,24 @@ static int create_raid_groups(struct btrfs_trans_handle *trans,
 		allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
 
 	if (allowed & metadata_profile) {
+		u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
+
 		ret = create_one_raid_group(trans, root,
 					    BTRFS_BLOCK_GROUP_SYSTEM |
 					    (allowed & metadata_profile));
 		BUG_ON(ret);
 
-		ret = create_one_raid_group(trans, root,
-					    BTRFS_BLOCK_GROUP_METADATA |
+		if (mixed)
+			meta_flags |= BTRFS_BLOCK_GROUP_DATA;
+
+		ret = create_one_raid_group(trans, root, meta_flags |
 					    (allowed & metadata_profile));
 		BUG_ON(ret);
 
 		ret = recow_roots(trans, root);
 		BUG_ON(ret);
 	}
-	if (num_devices > 1 && (allowed & data_profile)) {
+	if (!mixed && num_devices > 1 && (allowed & data_profile)) {
 		ret = create_one_raid_group(trans, root,
 					    BTRFS_BLOCK_GROUP_DATA |
 					    (allowed & data_profile));
@@ -274,6 +295,7 @@ static void print_usage(void)
 	fprintf(stderr, "\t -l --leafsize size of btree leaves\n");
 	fprintf(stderr, "\t -L --label set a label\n");
 	fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n");
+	fprintf(stderr, "\t -M --mixed mix metadata and data together\n");
 	fprintf(stderr, "\t -n --nodesize size of btree nodes\n");
 	fprintf(stderr, "\t -s --sectorsize min block allocation\n");
 	fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
@@ -328,6 +350,7 @@ static struct option long_options[] = {
 	{ "leafsize", 1, NULL, 'l' },
 	{ "label", 1, NULL, 'L'},
 	{ "metadata", 1, NULL, 'm' },
+	{ "mixed", 0, NULL, 'M' },
 	{ "nodesize", 1, NULL, 'n' },
 	{ "sectorsize", 1, NULL, 's' },
 	{ "data", 1, NULL, 'd' },
@@ -358,10 +381,13 @@ int main(int ac, char **av)
 	int first_fd;
 	int ret;
 	int i;
+	int mixed = 0;
+	int data_profile_opt = 0;
+	int metadata_profile_opt = 0;
 
 	while(1) {
 		int c;
-		c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:V", long_options,
+		c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:VM", long_options,
 				&option_index);
 		if (c < 0)
 			break;
@@ -371,6 +397,7 @@ int main(int ac, char **av)
 				break;
 			case 'd':
 				data_profile = parse_profile(optarg);
+				data_profile_opt = 1;
 				break;
 			case 'l':
 				leafsize = parse_size(optarg);
@@ -380,6 +407,10 @@ int main(int ac, char **av)
 				break;
 			case 'm':
 				metadata_profile = parse_profile(optarg);
+				metadata_profile_opt = 1;
+				break;
+			case 'M':
+				mixed = 1;
 				break;
 			case 'n':
 				nodesize = parse_size(optarg);
@@ -389,12 +420,10 @@ int main(int ac, char **av)
 				break;
 			case 'b':
 				block_count = parse_size(optarg);
-				if (block_count < 256*1024*1024) {
-					fprintf(stderr, "File system size "
-						"%llu bytes is too small, "
-						"256M is required at least\n",
-						(unsigned long long)block_count);
-					exit(1);
+				if (block_count <= 1024*1024*1024) {
+					printf("SMALL VOLUME: forcing mixed "
+					       "metadata/data groups\n");
+					mixed = 1;
 				}
 				zero_end = 0;
 				break;
@@ -439,9 +468,22 @@ int main(int ac, char **av)
 	}
 	first_fd = fd;
 	first_file = file;
-	ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count);
+	ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
+				   &mixed);
 	if (block_count == 0)
 		block_count = dev_block_count;
+	if (mixed) {
+		if (!metadata_profile_opt)
+			metadata_profile = 0;
+		if (!data_profile_opt)
+			data_profile = 0;
+
+		if (metadata_profile != data_profile) {
+			fprintf(stderr, "With mixed block groups data and metadata "
+				"profiles must be the same\n");
+			exit(1);
+		}
+	}
 
 	blocks[0] = BTRFS_SUPER_INFO_OFFSET;
 	for (i = 1; i < 7; i++) {
@@ -459,7 +501,7 @@ int main(int ac, char **av)
 	root = open_ctree(file, 0, O_RDWR);
 	root->fs_info->alloc_start = alloc_start;
 
-	ret = make_root_dir(root);
+	ret = make_root_dir(root, mixed);
 	if (ret) {
 		fprintf(stderr, "failed to setup the root directory\n");
 		exit(1);
@@ -478,6 +520,8 @@ int main(int ac, char **av)
 
 	zero_end = 1;
 	while(ac-- > 0) {
+		int old_mixed = mixed;
+
 		file = av[optind++];
 		ret = check_mounted(file);
 		if (ret < 0) {
@@ -503,8 +547,8 @@ int main(int ac, char **av)
 			continue;
 		}
 		ret = btrfs_prepare_device(fd, file, zero_end,
-					   &dev_block_count);
-
+					   &dev_block_count, &mixed);
+		mixed = old_mixed;
 		BUG_ON(ret);
 
 		ret = btrfs_add_to_fsid(trans, root, fd, file, dev_block_count,
@@ -515,12 +559,20 @@ int main(int ac, char **av)
 
 raid_groups:
 	ret = create_raid_groups(trans, root, data_profile,
-				 metadata_profile);
+				 metadata_profile, mixed);
 	BUG_ON(ret);
 
 	ret = create_data_reloc_tree(trans, root);
 	BUG_ON(ret);
 
+	if (mixed) {
+		struct btrfs_super_block *super = &root->fs_info->super_copy;
+		u64 flags = btrfs_super_incompat_flags(super);
+
+		flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
+		btrfs_set_super_incompat_flags(super, flags);
+	}
+
 	printf("fs created label %s on %s\n\tnodesize %u leafsize %u "
 	    "sectorsize %u size %s\n",
 	    label, first_file, nodesize, leafsize, sectorsize,
diff --git a/utils.c b/utils.c
index 2f4c6e1..0de43df 100644
--- a/utils.c
+++ b/utils.c
@@ -507,7 +507,8 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret)
+int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret,
+			 int *mixed)
 {
 	u64 block_count;
 	u64 bytenr;
@@ -527,10 +528,9 @@ int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret)
 	}
 	zero_end = 1;
 
-	if (block_count < 256 * 1024 * 1024) {
-		fprintf(stderr, "device %s is too small "
-		        "(must be at least 256 MB)\n", file);
-		exit(1);
+	if (block_count < 1024 * 1024 * 1024 && !(*mixed)) {
+		printf("SMALL VOLUME: forcing mixed metadata/data groups\n");
+		*mixed = 1;
 	}
 	ret = zero_dev_start(fd);
 	if (ret) {
diff --git a/utils.h b/utils.h
index 7ff542b..b91140e 100644
--- a/utils.h
+++ b/utils.h
@@ -27,7 +27,7 @@ int make_btrfs(int fd, const char *device, const char *label,
 int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, u64 objectid);
 int btrfs_prepare_device(int fd, char *file, int zero_end,
-			 u64 *block_count_ret);
+			 u64 *block_count_ret, int *mixed);
 int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root, int fd, char *path,
 		      u64 block_count, u32 io_width, u32 io_align,
-- 
1.6.6.1


             reply	other threads:[~2010-10-27 18:37 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-27 18:37 Josef Bacik [this message]
2010-10-29  0:20 ` [PATCH] Btrfs-progs: add support for mixed data+metadata block groups V3 Cláudio Martins
2010-10-29  1:03   ` Josef Bacik
2010-11-06 21:10     ` Mitch Harder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1288204654-2127-1-git-send-email-josef@redhat.com \
    --to=josef@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).