All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
@ 2021-07-11 16:10 Sidong Yang
  2021-07-12  1:16 ` Qu Wenruo
  2021-07-12  6:52 ` Johannes Thumshirn
  0 siblings, 2 replies; 9+ messages in thread
From: Sidong Yang @ 2021-07-11 16:10 UTC (permalink / raw)
  To: linux-btrfs, Qu Wenruo; +Cc: Sidong Yang

This patch adds an subcommand in inspect-internal. It dumps file extents of
the file that user provided. It helps to show the internal information
about file extents comprise the file.

Signed-off-by: Sidong Yang <realwakka@gmail.com>
---
v2:
 - Prints type and compression
 - Use the terms from file_extents_item like disk_bytenr not like physical"
---
 Makefile                         |   2 +-
 cmds/commands.h                  |   2 +-
 cmds/inspect-dump-file-extents.c | 165 +++++++++++++++++++++++++++++++
 cmds/inspect.c                   |   1 +
 4 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 cmds/inspect-dump-file-extents.c

diff --git a/Makefile b/Makefile
index a1cc457b..911e16de 100644
--- a/Makefile
+++ b/Makefile
@@ -156,7 +156,7 @@ cmds_objects = cmds/subvolume.o cmds/filesystem.o cmds/device.o cmds/scrub.o \
 	       cmds/restore.o cmds/rescue.o cmds/rescue-chunk-recover.o \
 	       cmds/rescue-super-recover.o \
 	       cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \
-	       cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \
+	       cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/inspect-dump-file-extents.o cmds/filesystem-du.o \
 	       mkfs/common.o check/mode-common.o check/mode-lowmem.o
 libbtrfs_objects = common/send-stream.o common/send-utils.o kernel-lib/rbtree.o btrfs-list.o \
 		   kernel-lib/radix-tree.o common/extent-cache.o kernel-shared/extent_io.o \
diff --git a/cmds/commands.h b/cmds/commands.h
index 8fa85d6c..55de248e 100644
--- a/cmds/commands.h
+++ b/cmds/commands.h
@@ -154,5 +154,5 @@ DECLARE_COMMAND(select_super);
 DECLARE_COMMAND(dump_super);
 DECLARE_COMMAND(debug_tree);
 DECLARE_COMMAND(rescue);
-
+DECLARE_COMMAND(inspect_dump_file_extents);
 #endif
diff --git a/cmds/inspect-dump-file-extents.c b/cmds/inspect-dump-file-extents.c
new file mode 100644
index 00000000..8574a1d0
--- /dev/null
+++ b/cmds/inspect-dump-file-extents.c
@@ -0,0 +1,165 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <sys/ioctl.h>
+
+#include "common/utils.h"
+#include "cmds/commands.h"
+
+static const char * const cmd_inspect_dump_file_extents_usage[] = {
+	"btrfs inspect-internal dump-extent path",
+	"Dump file extent in a textual form",
+	NULL
+};
+
+static void compress_type_to_str(u8 compress_type, char *ret)
+{
+	switch (compress_type) {
+	case BTRFS_COMPRESS_NONE:
+		strcpy(ret, "none");
+		break;
+	case BTRFS_COMPRESS_ZLIB:
+		strcpy(ret, "zlib");
+		break;
+	case BTRFS_COMPRESS_LZO:
+		strcpy(ret, "lzo");
+		break;
+	case BTRFS_COMPRESS_ZSTD:
+		strcpy(ret, "zstd");
+		break;
+	default:
+		sprintf(ret, "UNKNOWN.%d", compress_type);
+	}
+}
+
+static const char* file_extent_type_to_str(u8 type)
+{
+	switch (type) {
+	case BTRFS_FILE_EXTENT_INLINE: return "inline";
+	case BTRFS_FILE_EXTENT_PREALLOC: return "prealloc";
+	case BTRFS_FILE_EXTENT_REG: return "regular";
+	default: return "unknown";
+	}
+}
+
+static int cmd_inspect_dump_file_extents(const struct cmd_struct *cmd,
+										 int argc, char **argv)
+{
+	int fd;
+	struct stat statbuf;
+	struct btrfs_ioctl_ino_lookup_args lookup;
+	struct btrfs_ioctl_search_args args;
+	struct btrfs_ioctl_search_key *sk = &args.key;
+	struct btrfs_file_extent_item *extent_item;
+	struct btrfs_ioctl_search_header *header;
+	u64 pos;
+	u64 buf_off;
+	u64 len;
+	u64 begin;
+	u64 disk_bytenr;
+	u64 disk_num_bytes;
+	u64 offset;
+	int ret;
+	int i;
+	char compress_str[16];
+
+	fd = open(argv[optind], O_RDONLY);
+	if (fd < 0) {
+		error("cannot open %s: %m", argv[optind]);
+		ret = 1;
+		goto out;
+	}
+
+	if (fstat(fd, &statbuf) < 0) {
+		error("failed to fstat %s: %m", argv[optind]);
+		ret = 1;
+		goto out;
+	}
+
+	lookup.treeid = 0;
+	lookup.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+	if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &lookup) < 0) {
+		error("failed to lookup inode %s: %m", argv[optind]);
+		ret = 1;
+		goto out;
+	}
+
+	pos = 0;
+
+	sk->tree_id = lookup.treeid;
+	sk->min_objectid = statbuf.st_ino;
+	sk->max_objectid = statbuf.st_ino;
+
+	sk->max_offset = UINT64_MAX;
+	sk->min_transid = 0;
+	sk->max_transid = UINT64_MAX;
+	sk->min_type = sk->max_type = BTRFS_EXTENT_DATA_KEY;
+	sk->nr_items = 4096;
+
+	while (statbuf.st_size > pos) {
+		sk->min_offset = pos;
+		if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args)) {
+			error("failed to search tree ioctl %s: %m", argv[optind]);
+			ret = 1;
+			goto out;
+		}
+
+		buf_off = 0;
+		for(i=0; i<sk->nr_items; ++i) {
+			header = (struct btrfs_ioctl_search_header *)(args.buf + buf_off);
+
+			if (btrfs_search_header_type(header) == BTRFS_EXTENT_DATA_KEY) {
+				extent_item = (struct btrfs_file_extent_item *)(header + 1);
+				begin = btrfs_search_header_offset(header);
+
+				printf("type = %s, begin = %llu, ",
+					   file_extent_type_to_str(extent_item->type), begin);
+				switch (extent_item->type) {
+				case BTRFS_FILE_EXTENT_INLINE:
+					len = le64_to_cpu(extent_item->ram_bytes);
+					printf("end = %llu\n", begin + len);
+					break;
+				case BTRFS_FILE_EXTENT_REG:
+				case BTRFS_FILE_EXTENT_PREALLOC:
+					len = le64_to_cpu(extent_item->num_bytes);
+					disk_bytenr = le64_to_cpu(extent_item->disk_bytenr);
+					disk_num_bytes = le64_to_cpu(extent_item->disk_num_bytes);
+					offset = le64_to_cpu(extent_item->offset);
+					compress_type_to_str(extent_item->compression, compress_str);
+					printf("end = %llu, disk_bytenr = %llu, disk_num_bytes = %llu,"
+						   " offset = %llu, compression = %s\n",
+						   begin + len, disk_bytenr, disk_num_bytes, offset, compress_str);
+
+					break;
+				}
+
+			}
+			buf_off += sizeof(*header) + btrfs_search_header_len(header);
+			pos += len;
+		}
+
+	}
+	ret = 0;
+out:
+	close(fd);
+	return ret;
+}
+DEFINE_SIMPLE_COMMAND(inspect_dump_file_extents, "dump-file-extents");
diff --git a/cmds/inspect.c b/cmds/inspect.c
index 2ef5f4b6..dfb0e27b 100644
--- a/cmds/inspect.c
+++ b/cmds/inspect.c
@@ -696,6 +696,7 @@ static const struct cmd_group inspect_cmd_group = {
 		&cmd_struct_inspect_dump_tree,
 		&cmd_struct_inspect_dump_super,
 		&cmd_struct_inspect_tree_stats,
+		&cmd_struct_inspect_dump_file_extents,
 		NULL
 	}
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-11 16:10 [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents Sidong Yang
@ 2021-07-12  1:16 ` Qu Wenruo
  2021-07-12  6:40   ` Sidong Yang
  2021-07-12  6:52 ` Johannes Thumshirn
  1 sibling, 1 reply; 9+ messages in thread
From: Qu Wenruo @ 2021-07-12  1:16 UTC (permalink / raw)
  To: Sidong Yang, linux-btrfs, Qu Wenruo, David Sterba



On 2021/7/12 上午12:10, Sidong Yang wrote:
> This patch adds an subcommand in inspect-internal. It dumps file extents of
> the file that user provided. It helps to show the internal information
> about file extents comprise the file.

Despite the comments inlined below for the technical details, I'm not 
determined if we really want the tool.

On one hand, fiemap doesn't provide all detailed btrfs specific info, 
and it's common to utilize the tree search ioctl to do the work, just 
like "compsize".

But on the other hand, I'm not sure if it provides enough info compared 
to things like "btrfs ins dump-tree".

For now I don't have any objection nor preference.

Thus it's again David's call on this.

> 
> Signed-off-by: Sidong Yang <realwakka@gmail.com>
> ---
> v2:
>   - Prints type and compression
>   - Use the terms from file_extents_item like disk_bytenr not like physical"
> ---
>   Makefile                         |   2 +-
>   cmds/commands.h                  |   2 +-
>   cmds/inspect-dump-file-extents.c | 165 +++++++++++++++++++++++++++++++
>   cmds/inspect.c                   |   1 +
>   4 files changed, 168 insertions(+), 2 deletions(-)
>   create mode 100644 cmds/inspect-dump-file-extents.c
> 
> diff --git a/Makefile b/Makefile
> index a1cc457b..911e16de 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -156,7 +156,7 @@ cmds_objects = cmds/subvolume.o cmds/filesystem.o cmds/device.o cmds/scrub.o \
>   	       cmds/restore.o cmds/rescue.o cmds/rescue-chunk-recover.o \
>   	       cmds/rescue-super-recover.o \
>   	       cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \
> -	       cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \
> +	       cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/inspect-dump-file-extents.o cmds/filesystem-du.o \
>   	       mkfs/common.o check/mode-common.o check/mode-lowmem.o
>   libbtrfs_objects = common/send-stream.o common/send-utils.o kernel-lib/rbtree.o btrfs-list.o \
>   		   kernel-lib/radix-tree.o common/extent-cache.o kernel-shared/extent_io.o \
> diff --git a/cmds/commands.h b/cmds/commands.h
> index 8fa85d6c..55de248e 100644
> --- a/cmds/commands.h
> +++ b/cmds/commands.h
> @@ -154,5 +154,5 @@ DECLARE_COMMAND(select_super);
>   DECLARE_COMMAND(dump_super);
>   DECLARE_COMMAND(debug_tree);

Off-topic here.

Those "dump_super" and "debug_tree" makes me wonder, do we need to 
cleanup them?

I mean, we have inspect_dump_super for "btrfs ins dump-super", but 
what's "dump_super" here for?
And what's the "debug_tree" here for?

>   DECLARE_COMMAND(rescue);
> -
> +DECLARE_COMMAND(inspect_dump_file_extents);

I would be better to put this line where the other "inpsect" subcommands 
are.

>   #endif
> diff --git a/cmds/inspect-dump-file-extents.c b/cmds/inspect-dump-file-extents.c
> new file mode 100644
> index 00000000..8574a1d0
> --- /dev/null
> +++ b/cmds/inspect-dump-file-extents.c
> @@ -0,0 +1,165 @@
> +/*
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License v2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; if not, write to the
> + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
> + * Boston, MA 021110-1307, USA.
> + */
> +
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <fcntl.h>
> +
> +#include <sys/ioctl.h>
> +
> +#include "common/utils.h"
> +#include "cmds/commands.h"
> +
> +static const char * const cmd_inspect_dump_file_extents_usage[] = {
> +	"btrfs inspect-internal dump-extent path",
> +	"Dump file extent in a textual form",
> +	NULL
> +};
> +
> +static void compress_type_to_str(u8 compress_type, char *ret)
> +{
> +	switch (compress_type) {
> +	case BTRFS_COMPRESS_NONE:
> +		strcpy(ret, "none");
> +		break;
> +	case BTRFS_COMPRESS_ZLIB:
> +		strcpy(ret, "zlib");
> +		break;
> +	case BTRFS_COMPRESS_LZO:
> +		strcpy(ret, "lzo");
> +		break;
> +	case BTRFS_COMPRESS_ZSTD:
> +		strcpy(ret, "zstd");
> +		break;
> +	default:
> +		sprintf(ret, "UNKNOWN.%d", compress_type);
> +	}
> +}

It would be better to just export the function with the same name in 
"kernel-shared/print-tree.c" so we don't have duplicated code.

> +
> +static const char* file_extent_type_to_str(u8 type)
> +{
> +	switch (type) {
> +	case BTRFS_FILE_EXTENT_INLINE: return "inline";
> +	case BTRFS_FILE_EXTENT_PREALLOC: return "prealloc";
> +	case BTRFS_FILE_EXTENT_REG: return "regular";
> +	default: return "unknown";
> +	}
> +}

The same here.

> +
> +static int cmd_inspect_dump_file_extents(const struct cmd_struct *cmd,
> +										 int argc, char **argv)
> +{
> +	int fd;
> +	struct stat statbuf;
> +	struct btrfs_ioctl_ino_lookup_args lookup;
> +	struct btrfs_ioctl_search_args args;
> +	struct btrfs_ioctl_search_key *sk = &args.key;
> +	struct btrfs_file_extent_item *extent_item;
> +	struct btrfs_ioctl_search_header *header;
> +	u64 pos;
> +	u64 buf_off;
> +	u64 len;
> +	u64 begin;
> +	u64 disk_bytenr;
> +	u64 disk_num_bytes;
> +	u64 offset;
> +	int ret;
> +	int i;
> +	char compress_str[16];
> +
> +	fd = open(argv[optind], O_RDONLY);
> +	if (fd < 0) {
> +		error("cannot open %s: %m", argv[optind]);
> +		ret = 1;
> +		goto out;
> +	}
> +
> +	if (fstat(fd, &statbuf) < 0) {
> +		error("failed to fstat %s: %m", argv[optind]);
> +		ret = 1;
> +		goto out;
> +	}
> +
> +	lookup.treeid = 0;
> +	lookup.objectid = BTRFS_FIRST_FREE_OBJECTID;
> +
> +	if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &lookup) < 0) {
> +		error("failed to lookup inode %s: %m", argv[optind]);
> +		ret = 1;
> +		goto out;
> +	}
> +
> +	pos = 0;
> +
> +	sk->tree_id = lookup.treeid;
> +	sk->min_objectid = statbuf.st_ino;
> +	sk->max_objectid = statbuf.st_ino;
> +
> +	sk->max_offset = UINT64_MAX;
> +	sk->min_transid = 0;
> +	sk->max_transid = UINT64_MAX;
> +	sk->min_type = sk->max_type = BTRFS_EXTENT_DATA_KEY;
> +	sk->nr_items = 4096;

You may want to do the tree search ioctl in a loop, as it's pretty 
common for super large or heavily fragmented inode to have way more 
items than one ioctl can return.

> +
> +	while (statbuf.st_size > pos) {
> +		sk->min_offset = pos;
> +		if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args)) {
> +			error("failed to search tree ioctl %s: %m", argv[optind]);
> +			ret = 1;
> +			goto out;
> +		}
> +
> +		buf_off = 0;
> +		for(i=0; i<sk->nr_items; ++i) {
> +			header = (struct btrfs_ioctl_search_header *)(args.buf + buf_off);
> +
> +			if (btrfs_search_header_type(header) == BTRFS_EXTENT_DATA_KEY) {
> +				extent_item = (struct btrfs_file_extent_item *)(header + 1);
> +				begin = btrfs_search_header_offset(header);
> +
> +				printf("type = %s, begin = %llu, ",
> +					   file_extent_type_to_str(extent_item->type), begin);
> +				switch (extent_item->type) {
> +				case BTRFS_FILE_EXTENT_INLINE:
> +					len = le64_to_cpu(extent_item->ram_bytes);
> +					printf("end = %llu\n", begin + len);
> +					break;
> +				case BTRFS_FILE_EXTENT_REG:
> +				case BTRFS_FILE_EXTENT_PREALLOC:
> +					len = le64_to_cpu(extent_item->num_bytes);
> +					disk_bytenr = le64_to_cpu(extent_item->disk_bytenr);
> +					disk_num_bytes = le64_to_cpu(extent_item->disk_num_bytes);
> +					offset = le64_to_cpu(extent_item->offset);
> +					compress_type_to_str(extent_item->compression, compress_str);
> +					printf("end = %llu, disk_bytenr = %llu, disk_num_bytes = %llu,"

For "end" we normally mean inclusive end.
E.g, for @start = 1M, @len = 4K, then the @end should be 1M + 4K - 1.

Thus it would be better to just output the length, not the end.

(I know this sounds a little nitpicking, but trust me, when you have 
seen too many bugs caused by such offset-by-one behavior, you will be as 
sensitive as me on this)

Thanks,
Qu

> +						   " offset = %llu, compression = %s\n",
> +						   begin + len, disk_bytenr, disk_num_bytes, offset, compress_str); > +
> +					break;
> +				}
> +
> +			}
> +			buf_off += sizeof(*header) + btrfs_search_header_len(header);
> +			pos += len;
> +		}
> +
> +	}
> +	ret = 0;
> +out:
> +	close(fd);
> +	return ret;
> +}
> +DEFINE_SIMPLE_COMMAND(inspect_dump_file_extents, "dump-file-extents");
> diff --git a/cmds/inspect.c b/cmds/inspect.c
> index 2ef5f4b6..dfb0e27b 100644
> --- a/cmds/inspect.c
> +++ b/cmds/inspect.c
> @@ -696,6 +696,7 @@ static const struct cmd_group inspect_cmd_group = {
>   		&cmd_struct_inspect_dump_tree,
>   		&cmd_struct_inspect_dump_super,
>   		&cmd_struct_inspect_tree_stats,
> +		&cmd_struct_inspect_dump_file_extents,
>   		NULL
>   	}
>   };
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-12  1:16 ` Qu Wenruo
@ 2021-07-12  6:40   ` Sidong Yang
  2021-07-12  6:46     ` Qu Wenruo
  0 siblings, 1 reply; 9+ messages in thread
From: Sidong Yang @ 2021-07-12  6:40 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, Qu Wenruo, David Sterba

On Mon, Jul 12, 2021 at 09:16:17AM +0800, Qu Wenruo wrote:
> 
> 
> On 2021/7/12 上午12:10, Sidong Yang wrote:
> > This patch adds an subcommand in inspect-internal. It dumps file extents of
> > the file that user provided. It helps to show the internal information
> > about file extents comprise the file.
> 
> Despite the comments inlined below for the technical details, I'm not
> determined if we really want the tool.
> 
> On one hand, fiemap doesn't provide all detailed btrfs specific info, and
> it's common to utilize the tree search ioctl to do the work, just like
> "compsize".
> 
> But on the other hand, I'm not sure if it provides enough info compared to
> things like "btrfs ins dump-tree".
> 
> For now I don't have any objection nor preference.
> 
> Thus it's again David's call on this.
> 
> > 
> > Signed-off-by: Sidong Yang <realwakka@gmail.com>
> > ---
> > v2:
> >   - Prints type and compression
> >   - Use the terms from file_extents_item like disk_bytenr not like physical"
> > ---
> >   Makefile                         |   2 +-
> >   cmds/commands.h                  |   2 +-
> >   cmds/inspect-dump-file-extents.c | 165 +++++++++++++++++++++++++++++++
> >   cmds/inspect.c                   |   1 +
> >   4 files changed, 168 insertions(+), 2 deletions(-)
> >   create mode 100644 cmds/inspect-dump-file-extents.c
> > 
> > diff --git a/Makefile b/Makefile
> > index a1cc457b..911e16de 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -156,7 +156,7 @@ cmds_objects = cmds/subvolume.o cmds/filesystem.o cmds/device.o cmds/scrub.o \
> >   	       cmds/restore.o cmds/rescue.o cmds/rescue-chunk-recover.o \
> >   	       cmds/rescue-super-recover.o \
> >   	       cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \
> > -	       cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \
> > +	       cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/inspect-dump-file-extents.o cmds/filesystem-du.o \
> >   	       mkfs/common.o check/mode-common.o check/mode-lowmem.o
> >   libbtrfs_objects = common/send-stream.o common/send-utils.o kernel-lib/rbtree.o btrfs-list.o \
> >   		   kernel-lib/radix-tree.o common/extent-cache.o kernel-shared/extent_io.o \
> > diff --git a/cmds/commands.h b/cmds/commands.h
> > index 8fa85d6c..55de248e 100644
> > --- a/cmds/commands.h
> > +++ b/cmds/commands.h
> > @@ -154,5 +154,5 @@ DECLARE_COMMAND(select_super);
> >   DECLARE_COMMAND(dump_super);
> >   DECLARE_COMMAND(debug_tree);
> 
> Off-topic here.
> 
> Those "dump_super" and "debug_tree" makes me wonder, do we need to cleanup
> them?
> 
> I mean, we have inspect_dump_super for "btrfs ins dump-super", but what's
> "dump_super" here for?
> And what's the "debug_tree" here for?
there is no command dump_super and debug_tree. And these aren't need to
compile.
> 
> >   DECLARE_COMMAND(rescue);
> > -
> > +DECLARE_COMMAND(inspect_dump_file_extents);
> 
> I would be better to put this line where the other "inpsect" subcommands
> are.

Okay, I'll do it.
> 
> >   #endif
> > diff --git a/cmds/inspect-dump-file-extents.c b/cmds/inspect-dump-file-extents.c
> > new file mode 100644
> > index 00000000..8574a1d0
> > --- /dev/null
> > +++ b/cmds/inspect-dump-file-extents.c
> > @@ -0,0 +1,165 @@
> > +/*
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public
> > + * License v2 as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public
> > + * License along with this program; if not, write to the
> > + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
> > + * Boston, MA 021110-1307, USA.
> > + */
> > +
> > +#include <unistd.h>
> > +#include <stdio.h>
> > +#include <fcntl.h>
> > +
> > +#include <sys/ioctl.h>
> > +
> > +#include "common/utils.h"
> > +#include "cmds/commands.h"
> > +
> > +static const char * const cmd_inspect_dump_file_extents_usage[] = {
> > +	"btrfs inspect-internal dump-extent path",
> > +	"Dump file extent in a textual form",
> > +	NULL
> > +};
> > +
> > +static void compress_type_to_str(u8 compress_type, char *ret)
> > +{
> > +	switch (compress_type) {
> > +	case BTRFS_COMPRESS_NONE:
> > +		strcpy(ret, "none");
> > +		break;
> > +	case BTRFS_COMPRESS_ZLIB:
> > +		strcpy(ret, "zlib");
> > +		break;
> > +	case BTRFS_COMPRESS_LZO:
> > +		strcpy(ret, "lzo");
> > +		break;
> > +	case BTRFS_COMPRESS_ZSTD:
> > +		strcpy(ret, "zstd");
> > +		break;
> > +	default:
> > +		sprintf(ret, "UNKNOWN.%d", compress_type);
> > +	}
> > +}
> 
> It would be better to just export the function with the same name in
> "kernel-shared/print-tree.c" so we don't have duplicated code.

Yes, I would be better.
> 
> > +
> > +static const char* file_extent_type_to_str(u8 type)
> > +{
> > +	switch (type) {
> > +	case BTRFS_FILE_EXTENT_INLINE: return "inline";
> > +	case BTRFS_FILE_EXTENT_PREALLOC: return "prealloc";
> > +	case BTRFS_FILE_EXTENT_REG: return "regular";
> > +	default: return "unknown";
> > +	}
> > +}
> 
> The same here.
Okay.
> 
> > +
> > +static int cmd_inspect_dump_file_extents(const struct cmd_struct *cmd,
> > +										 int argc, char **argv)
> > +{
> > +	int fd;
> > +	struct stat statbuf;
> > +	struct btrfs_ioctl_ino_lookup_args lookup;
> > +	struct btrfs_ioctl_search_args args;
> > +	struct btrfs_ioctl_search_key *sk = &args.key;
> > +	struct btrfs_file_extent_item *extent_item;
> > +	struct btrfs_ioctl_search_header *header;
> > +	u64 pos;
> > +	u64 buf_off;
> > +	u64 len;
> > +	u64 begin;
> > +	u64 disk_bytenr;
> > +	u64 disk_num_bytes;
> > +	u64 offset;
> > +	int ret;
> > +	int i;
> > +	char compress_str[16];
> > +
> > +	fd = open(argv[optind], O_RDONLY);
> > +	if (fd < 0) {
> > +		error("cannot open %s: %m", argv[optind]);
> > +		ret = 1;
> > +		goto out;
> > +	}
> > +
> > +	if (fstat(fd, &statbuf) < 0) {
> > +		error("failed to fstat %s: %m", argv[optind]);
> > +		ret = 1;
> > +		goto out;
> > +	}
> > +
> > +	lookup.treeid = 0;
> > +	lookup.objectid = BTRFS_FIRST_FREE_OBJECTID;
> > +
> > +	if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &lookup) < 0) {
> > +		error("failed to lookup inode %s: %m", argv[optind]);
> > +		ret = 1;
> > +		goto out;
> > +	}
> > +
> > +	pos = 0;
> > +
> > +	sk->tree_id = lookup.treeid;
> > +	sk->min_objectid = statbuf.st_ino;
> > +	sk->max_objectid = statbuf.st_ino;
> > +
> > +	sk->max_offset = UINT64_MAX;
> > +	sk->min_transid = 0;
> > +	sk->max_transid = UINT64_MAX;
> > +	sk->min_type = sk->max_type = BTRFS_EXTENT_DATA_KEY;
> > +	sk->nr_items = 4096;
> 
> You may want to do the tree search ioctl in a loop, as it's pretty common
> for super large or heavily fragmented inode to have way more items than one
> ioctl can return.

I don't think much about this. I wonder if it's proper way to search
tree. is there any better way than this code?

> 
> > +
> > +	while (statbuf.st_size > pos) {
> > +		sk->min_offset = pos;
> > +		if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args)) {
> > +			error("failed to search tree ioctl %s: %m", argv[optind]);
> > +			ret = 1;
> > +			goto out;
> > +		}
> > +
> > +		buf_off = 0;
> > +		for(i=0; i<sk->nr_items; ++i) {
> > +			header = (struct btrfs_ioctl_search_header *)(args.buf + buf_off);
> > +
> > +			if (btrfs_search_header_type(header) == BTRFS_EXTENT_DATA_KEY) {
> > +				extent_item = (struct btrfs_file_extent_item *)(header + 1);
> > +				begin = btrfs_search_header_offset(header);
> > +
> > +				printf("type = %s, begin = %llu, ",
> > +					   file_extent_type_to_str(extent_item->type), begin);
> > +				switch (extent_item->type) {
> > +				case BTRFS_FILE_EXTENT_INLINE:
> > +					len = le64_to_cpu(extent_item->ram_bytes);
> > +					printf("end = %llu\n", begin + len);
> > +					break;
> > +				case BTRFS_FILE_EXTENT_REG:
> > +				case BTRFS_FILE_EXTENT_PREALLOC:
> > +					len = le64_to_cpu(extent_item->num_bytes);
> > +					disk_bytenr = le64_to_cpu(extent_item->disk_bytenr);
> > +					disk_num_bytes = le64_to_cpu(extent_item->disk_num_bytes);
> > +					offset = le64_to_cpu(extent_item->offset);
> > +					compress_type_to_str(extent_item->compression, compress_str);
> > +					printf("end = %llu, disk_bytenr = %llu, disk_num_bytes = %llu,"
> 
> For "end" we normally mean inclusive end.
> E.g, for @start = 1M, @len = 4K, then the @end should be 1M + 4K - 1.
> 
> Thus it would be better to just output the length, not the end.

Okay, Printing output as start and len would be more explicit.

Thanks,
Sidong
> 
> (I know this sounds a little nitpicking, but trust me, when you have seen
> too many bugs caused by such offset-by-one behavior, you will be as
> sensitive as me on this)
> 
> Thanks,
> Qu
> 
> > +						   " offset = %llu, compression = %s\n",
> > +						   begin + len, disk_bytenr, disk_num_bytes, offset, compress_str); > +
> > +					break;
> > +				}
> > +
> > +			}
> > +			buf_off += sizeof(*header) + btrfs_search_header_len(header);
> > +			pos += len;
> > +		}
> > +
> > +	}
> > +	ret = 0;
> > +out:
> > +	close(fd);
> > +	return ret;
> > +}
> > +DEFINE_SIMPLE_COMMAND(inspect_dump_file_extents, "dump-file-extents");
> > diff --git a/cmds/inspect.c b/cmds/inspect.c
> > index 2ef5f4b6..dfb0e27b 100644
> > --- a/cmds/inspect.c
> > +++ b/cmds/inspect.c
> > @@ -696,6 +696,7 @@ static const struct cmd_group inspect_cmd_group = {
> >   		&cmd_struct_inspect_dump_tree,
> >   		&cmd_struct_inspect_dump_super,
> >   		&cmd_struct_inspect_tree_stats,
> > +		&cmd_struct_inspect_dump_file_extents,
> >   		NULL
> >   	}
> >   };
> > 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-12  6:40   ` Sidong Yang
@ 2021-07-12  6:46     ` Qu Wenruo
  2021-07-13 16:45       ` Sidong Yang
  0 siblings, 1 reply; 9+ messages in thread
From: Qu Wenruo @ 2021-07-12  6:46 UTC (permalink / raw)
  To: Sidong Yang; +Cc: linux-btrfs, Qu Wenruo, David Sterba



On 2021/7/12 下午2:40, Sidong Yang wrote:
[...]
>>> +	sk->max_offset = UINT64_MAX;
>>> +	sk->min_transid = 0;
>>> +	sk->max_transid = UINT64_MAX;
>>> +	sk->min_type = sk->max_type = BTRFS_EXTENT_DATA_KEY;
>>> +	sk->nr_items = 4096;
>>
>> You may want to do the tree search ioctl in a loop, as it's pretty common
>> for super large or heavily fragmented inode to have way more items than one
>> ioctl can return.
> 
> I don't think much about this. I wonder if it's proper way to search
> tree. is there any better way than this code?

Here is one example:

https://github.com/kilobyte/compsize/blob/master/compsize.c#L179

It goes @again label to restart from last offset + 1.

Thanks,
Qu


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-11 16:10 [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents Sidong Yang
  2021-07-12  1:16 ` Qu Wenruo
@ 2021-07-12  6:52 ` Johannes Thumshirn
  2021-07-13 16:54   ` Sidong Yang
  1 sibling, 1 reply; 9+ messages in thread
From: Johannes Thumshirn @ 2021-07-12  6:52 UTC (permalink / raw)
  To: Sidong Yang, linux-btrfs, Qu Wenruo

On 11/07/2021 18:10, Sidong Yang wrote:
> +static void compress_type_to_str(u8 compress_type, char *ret)
> +{
> +	switch (compress_type) {
> +	case BTRFS_COMPRESS_NONE:
> +		strcpy(ret, "none");
> +		break;
> +	case BTRFS_COMPRESS_ZLIB:
> +		strcpy(ret, "zlib");
> +		break;
> +	case BTRFS_COMPRESS_LZO:
> +		strcpy(ret, "lzo");
> +		break;
> +	case BTRFS_COMPRESS_ZSTD:
> +		strcpy(ret, "zstd");
> +		break;
> +	default:
> +		sprintf(ret, "UNKNOWN.%d", compress_type);
> +	}
> +}

[....]
> +	char compress_str[16];

[...]

> +					compress_type_to_str(extent_item->compression, compress_str);

While this looks safe at a first glance, can we change this to:

#define COMPRESS_STR_LEN 5

[...]

switch (compress_type) {
case BTRFS_COMPRESS_NONE:
	strncpy(ret, "none", COMPRESS_STR_LEN);

[...]

char compress_str[COMPRESS_STR_LEN];

One day someone will factor out 'compress_type_to_str()' and make it public, read user
supplied input and then it's a disaster waiting to happen.

Thanks,
	Johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-12  6:46     ` Qu Wenruo
@ 2021-07-13 16:45       ` Sidong Yang
  0 siblings, 0 replies; 9+ messages in thread
From: Sidong Yang @ 2021-07-13 16:45 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, Qu Wenruo, David Sterba

On Mon, Jul 12, 2021 at 02:46:39PM +0800, Qu Wenruo wrote:
> 
> 
> On 2021/7/12 下午2:40, Sidong Yang wrote:
> [...]
> > > > +	sk->max_offset = UINT64_MAX;
> > > > +	sk->min_transid = 0;
> > > > +	sk->max_transid = UINT64_MAX;
> > > > +	sk->min_type = sk->max_type = BTRFS_EXTENT_DATA_KEY;
> > > > +	sk->nr_items = 4096;
> > > 
> > > You may want to do the tree search ioctl in a loop, as it's pretty common
> > > for super large or heavily fragmented inode to have way more items than one
> > > ioctl can return.
> > 
> > I don't think much about this. I wonder if it's proper way to search
> > tree. is there any better way than this code?
> 
> Here is one example:
> 
> https://github.com/kilobyte/compsize/blob/master/compsize.c#L179
> 
> It goes @again label to restart from last offset + 1.

Thanks, I read the code you provided. It seems that compsize is also
doing to what this patch do. 

The 'do_file' function checks if nr_items is larger than 512. I wonder
that the number '512' is such an magic number. I'll check it by my own
effort. 

Thanks,
Sidong

> 
> Thanks,
> Qu
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-12  6:52 ` Johannes Thumshirn
@ 2021-07-13 16:54   ` Sidong Yang
  2021-07-13 22:16     ` Qu Wenruo
  0 siblings, 1 reply; 9+ messages in thread
From: Sidong Yang @ 2021-07-13 16:54 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: linux-btrfs, Qu Wenruo

On Mon, Jul 12, 2021 at 06:52:28AM +0000, Johannes Thumshirn wrote:

Hi, Thanks for review!

> On 11/07/2021 18:10, Sidong Yang wrote:
> > +static void compress_type_to_str(u8 compress_type, char *ret)
> > +{
> > +	switch (compress_type) {
> > +	case BTRFS_COMPRESS_NONE:
> > +		strcpy(ret, "none");
> > +		break;
> > +	case BTRFS_COMPRESS_ZLIB:
> > +		strcpy(ret, "zlib");
> > +		break;
> > +	case BTRFS_COMPRESS_LZO:
> > +		strcpy(ret, "lzo");
> > +		break;
> > +	case BTRFS_COMPRESS_ZSTD:
> > +		strcpy(ret, "zstd");
> > +		break;
> > +	default:
> > +		sprintf(ret, "UNKNOWN.%d", compress_type);
> > +	}
> > +}
> 
> [....]
> > +	char compress_str[16];
> 
> [...]
> 
> > +					compress_type_to_str(extent_item->compression, compress_str);
> 
> While this looks safe at a first glance, can we change this to:
> 
> #define COMPRESS_STR_LEN 5
> 
> [...]
> 
> switch (compress_type) {
> case BTRFS_COMPRESS_NONE:
> 	strncpy(ret, "none", COMPRESS_STR_LEN);
> 
> [...]
> 
> char compress_str[COMPRESS_STR_LEN];
> 
> One day someone will factor out 'compress_type_to_str()' and make it public, read user
> supplied input and then it's a disaster waiting to happen.

This can be happen when @ret is too short? If it so, I think it also has
problem when @ret is shorter than COMPRESS_STR_LEN. I wonder that I
understood this problem you said.

Thanks,
Sidong

> 
> Thanks,
> 	Johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-13 16:54   ` Sidong Yang
@ 2021-07-13 22:16     ` Qu Wenruo
  2021-07-14  6:41       ` Sidong Yang
  0 siblings, 1 reply; 9+ messages in thread
From: Qu Wenruo @ 2021-07-13 22:16 UTC (permalink / raw)
  To: Sidong Yang, Johannes Thumshirn; +Cc: linux-btrfs



On 2021/7/14 上午12:54, Sidong Yang wrote:
> On Mon, Jul 12, 2021 at 06:52:28AM +0000, Johannes Thumshirn wrote:
>
> Hi, Thanks for review!
>
>> On 11/07/2021 18:10, Sidong Yang wrote:
>>> +static void compress_type_to_str(u8 compress_type, char *ret)
>>> +{
>>> +	switch (compress_type) {
>>> +	case BTRFS_COMPRESS_NONE:
>>> +		strcpy(ret, "none");
>>> +		break;
>>> +	case BTRFS_COMPRESS_ZLIB:
>>> +		strcpy(ret, "zlib");
>>> +		break;
>>> +	case BTRFS_COMPRESS_LZO:
>>> +		strcpy(ret, "lzo");
>>> +		break;
>>> +	case BTRFS_COMPRESS_ZSTD:
>>> +		strcpy(ret, "zstd");
>>> +		break;
>>> +	default:
>>> +		sprintf(ret, "UNKNOWN.%d", compress_type);
>>> +	}
>>> +}
>>
>> [....]
>>> +	char compress_str[16];
>>
>> [...]
>>
>>> +					compress_type_to_str(extent_item->compression, compress_str);
>>
>> While this looks safe at a first glance, can we change this to:
>>
>> #define COMPRESS_STR_LEN 5
>>
>> [...]
>>
>> switch (compress_type) {
>> case BTRFS_COMPRESS_NONE:
>> 	strncpy(ret, "none", COMPRESS_STR_LEN);
>>
>> [...]
>>
>> char compress_str[COMPRESS_STR_LEN];
>>
>> One day someone will factor out 'compress_type_to_str()' and make it public, read user
>> supplied input and then it's a disaster waiting to happen.
>
> This can be happen when @ret is too short? If it so, I think it also has
> problem when @ret is shorter than COMPRESS_STR_LEN. I wonder that I
> understood this problem you said.

I guess Johannes means that, if we support more and more new compression
algos, it can go beyond your original 16 bytes length for the
compression algo name.

While using strncpy, it will never go beyond COMPRESS_STR_LEN forever.
(Although it will truncating the output, but that would be easier to
spot the problem and then enlarge the macro)

Thanks,
Qu

>
> Thanks,
> Sidong
>
>>
>> Thanks,
>> 	Johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents
  2021-07-13 22:16     ` Qu Wenruo
@ 2021-07-14  6:41       ` Sidong Yang
  0 siblings, 0 replies; 9+ messages in thread
From: Sidong Yang @ 2021-07-14  6:41 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: Johannes Thumshirn, linux-btrfs

On Wed, Jul 14, 2021 at 06:16:08AM +0800, Qu Wenruo wrote:
> 
> 
> On 2021/7/14 上午12:54, Sidong Yang wrote:
> > On Mon, Jul 12, 2021 at 06:52:28AM +0000, Johannes Thumshirn wrote:
> > 
> > Hi, Thanks for review!
> > 
> > > On 11/07/2021 18:10, Sidong Yang wrote:
> > > > +static void compress_type_to_str(u8 compress_type, char *ret)
> > > > +{
> > > > +	switch (compress_type) {
> > > > +	case BTRFS_COMPRESS_NONE:
> > > > +		strcpy(ret, "none");
> > > > +		break;
> > > > +	case BTRFS_COMPRESS_ZLIB:
> > > > +		strcpy(ret, "zlib");
> > > > +		break;
> > > > +	case BTRFS_COMPRESS_LZO:
> > > > +		strcpy(ret, "lzo");
> > > > +		break;
> > > > +	case BTRFS_COMPRESS_ZSTD:
> > > > +		strcpy(ret, "zstd");
> > > > +		break;
> > > > +	default:
> > > > +		sprintf(ret, "UNKNOWN.%d", compress_type);
> > > > +	}
> > > > +}
> > > 
> > > [....]
> > > > +	char compress_str[16];
> > > 
> > > [...]
> > > 
> > > > +					compress_type_to_str(extent_item->compression, compress_str);
> > > 
> > > While this looks safe at a first glance, can we change this to:
> > > 
> > > #define COMPRESS_STR_LEN 5
> > > 
> > > [...]
> > > 
> > > switch (compress_type) {
> > > case BTRFS_COMPRESS_NONE:
> > > 	strncpy(ret, "none", COMPRESS_STR_LEN);
> > > 
> > > [...]
> > > 
> > > char compress_str[COMPRESS_STR_LEN];
> > > 
> > > One day someone will factor out 'compress_type_to_str()' and make it public, read user
> > > supplied input and then it's a disaster waiting to happen.
> > 
> > This can be happen when @ret is too short? If it so, I think it also has
> > problem when @ret is shorter than COMPRESS_STR_LEN. I wonder that I
> > understood this problem you said.
> 
> I guess Johannes means that, if we support more and more new compression
> algos, it can go beyond your original 16 bytes length for the
> compression algo name.
> 
> While using strncpy, it will never go beyond COMPRESS_STR_LEN forever.
> (Although it will truncating the output, but that would be easier to
> spot the problem and then enlarge the macro)
Okay, I understood. I'll do this in next version.

Thanks,
Sidong
> 
> Thanks,
> Qu
> 
> > 
> > Thanks,
> > Sidong
> > 
> > > 
> > > Thanks,
> > > 	Johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-07-14  6:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-11 16:10 [PATCH v2] btrfs-progs: cmds: Add subcommand that dumps file extents Sidong Yang
2021-07-12  1:16 ` Qu Wenruo
2021-07-12  6:40   ` Sidong Yang
2021-07-12  6:46     ` Qu Wenruo
2021-07-13 16:45       ` Sidong Yang
2021-07-12  6:52 ` Johannes Thumshirn
2021-07-13 16:54   ` Sidong Yang
2021-07-13 22:16     ` Qu Wenruo
2021-07-14  6:41       ` Sidong Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.