linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils
@ 2021-09-11 13:46 Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

From: mpiglet <mpiglet@outlook.com>

Add dump-tool for erofs to facilitate users directly
analyzing the erofs image file.

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 Makefile.am        |  2 +-
 configure.ac       |  2 ++
 dump/Makefile.am   | 10 ++++++
 dump/main.c        | 84 ++++++++++++++++++++++++++++++++++++++++++++++
 include/erofs/io.h |  3 ++
 lib/namei.c        |  4 +--
 6 files changed, 102 insertions(+), 3 deletions(-)
 create mode 100644 dump/Makefile.am
 create mode 100644 dump/main.c

diff --git a/Makefile.am b/Makefile.am
index b804aa9..fedf7b5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -3,7 +3,7 @@
 
 ACLOCAL_AMFLAGS = -I m4
 
-SUBDIRS = man lib mkfs
+SUBDIRS = man lib mkfs dump
 if ENABLE_FUSE
 SUBDIRS += fuse
 endif
diff --git a/configure.ac b/configure.ac
index f626064..f4fe548 100644
--- a/configure.ac
+++ b/configure.ac
@@ -280,6 +280,8 @@ AC_CONFIG_FILES([Makefile
 		 man/Makefile
 		 lib/Makefile
 		 mkfs/Makefile
+		 dump/Makefile
 		 fuse/Makefile])
+
 AC_OUTPUT
 
diff --git a/dump/Makefile.am b/dump/Makefile.am
new file mode 100644
index 0000000..e664799
--- /dev/null
+++ b/dump/Makefile.am
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0+
+# Makefile.am
+
+AUTOMAKE_OPTIONS = foreign
+bin_PROGRAMS     = dump.erofs
+AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}
+dump_erofs_SOURCES = main.c
+dump_erofs_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
+dump_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} ${liblz4_LIBS}
+
diff --git a/dump/main.c b/dump/main.c
new file mode 100644
index 0000000..8fbc24a
--- /dev/null
+++ b/dump/main.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * dump/main.c
+ *
+ * Copyright (C) 2021-2022 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Wang Qi <mpiglet@outlook.com>
+ *            Guo Xuenan <guoxuenan@huawei.com>
+ */
+
+#include <stdlib.h>
+#include <getopt.h>
+#include <sys/sysmacros.h>
+#include <time.h>
+#include <lz4.h>
+
+#include "erofs/print.h"
+#include "erofs/io.h"
+
+static struct option long_options[] = {
+	{"help", no_argument, 0, 1},
+	{0, 0, 0, 0},
+};
+
+static void usage(void)
+{
+	fputs("usage: [options] erofs-image \n\n"
+		"Dump erofs layout from erofs-image, and [options] are:\n"
+		"-v/-V      print dump.erofs version info\n"
+		"-h/--help  display this help and exit\n", stderr);
+}
+static void dumpfs_print_version(void)
+{
+	fprintf(stderr, "dump.erofs %s\n", cfg.c_version);
+}
+
+static int dumpfs_parse_options_cfg(int argc, char **argv)
+{
+	int opt;
+
+	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
+					long_options, NULL)) != -1) {
+		switch (opt) {
+		case 'v':
+		case 'V':
+			dumpfs_print_version();
+			exit(0);
+		case 'h':
+		case 1:
+		    usage();
+		    exit(0);
+		default: /* '?' */
+			return -EINVAL;
+		}
+	}
+
+	if (optind >= argc)
+		return -EINVAL;
+
+	cfg.c_img_path = strdup(argv[optind++]);
+	if (!cfg.c_img_path)
+		return -ENOMEM;
+
+	if (optind < argc) {
+		erofs_err("unexpected argument: %s\n", argv[optind]);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int err = 0;
+
+	erofs_init_configure();
+	err = dumpfs_parse_options_cfg(argc, argv);
+	if (err) {
+		if (err == -EINVAL)
+			usage();
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 5574245..00e5de8 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -10,6 +10,7 @@
 #define __EROFS_IO_H
 
 #include <unistd.h>
+#include <sys/types.h>
 #include "internal.h"
 
 #ifndef O_BINARY
@@ -25,6 +26,8 @@ int dev_fillzero(u64 offset, size_t len, bool padding);
 int dev_fsync(void);
 int dev_resize(erofs_blk_t nblocks);
 u64 dev_length(void);
+dev_t erofs_new_decode_dev(u32 dev);
+int erofs_read_inode_from_disk(struct erofs_inode *vi);
 
 static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
 			    u32 nblocks)
diff --git a/lib/namei.c b/lib/namei.c
index 4e06ba4..21631f1 100644
--- a/lib/namei.c
+++ b/lib/namei.c
@@ -15,7 +15,7 @@
 #include "erofs/print.h"
 #include "erofs/io.h"
 
-static dev_t erofs_new_decode_dev(u32 dev)
+dev_t erofs_new_decode_dev(u32 dev)
 {
 	const unsigned int major = (dev & 0xfff00) >> 8;
 	const unsigned int minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
@@ -23,7 +23,7 @@ static dev_t erofs_new_decode_dev(u32 dev)
 	return makedev(major, minor);
 }
 
-static int erofs_read_inode_from_disk(struct erofs_inode *vi)
+int erofs_read_inode_from_disk(struct erofs_inode *vi)
 {
 	int ret, ifmt;
 	char buf[sizeof(struct erofs_inode_extended)];
-- 
2.25.4


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 15:58   ` Gao Xiang
  2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/dump/main.c b/dump/main.c
index 8fbc24a..25ac89f 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -17,6 +17,12 @@
 #include "erofs/print.h"
 #include "erofs/io.h"
 
+struct dumpcfg {
+	bool print_superblock;
+	bool print_version;
+};
+static struct dumpcfg dumpcfg;
+
 static struct option long_options[] = {
 	{"help", no_argument, 0, 1},
 	{0, 0, 0, 0},
@@ -26,6 +32,7 @@ static void usage(void)
 {
 	fputs("usage: [options] erofs-image \n\n"
 		"Dump erofs layout from erofs-image, and [options] are:\n"
+		"-s          print information about superblock\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -41,6 +48,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
 					long_options, NULL)) != -1) {
 		switch (opt) {
+		case 's':
+			dumpcfg.print_superblock = true;
+			break;
 		case 'v':
 		case 'V':
 			dumpfs_print_version();
@@ -68,6 +78,39 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 	return 0;
 }
 
+static void dumpfs_print_superblock(void)
+{
+	time_t time = sbi.build_time;
+
+	fprintf(stderr, "Filesystem magic number:	0x%04X\n", EROFS_SUPER_MAGIC_V1);
+	fprintf(stderr, "Filesystem blocks: 		%lu\n", sbi.blocks);
+	fprintf(stderr, "Filesystem meta block:		%u\n", sbi.meta_blkaddr);
+	fprintf(stderr, "Filesystem xattr block:	%u\n", sbi.xattr_blkaddr);
+	fprintf(stderr, "Filesystem root nid:		%ld\n", sbi.root_nid);
+	fprintf(stderr, "Filesystem valid inos:		%lu\n", sbi.inos);
+	fprintf(stderr, "Filesystem created:		%s", ctime(&time));
+	fprintf(stderr, "Filesystem uuid:		");
+	for (int i = 0; i < 16; i++)
+		fprintf(stderr, "%02x", sbi.uuid[i]);
+	fprintf(stderr, "\n");
+
+	if (erofs_sb_has_lz4_0padding())
+		fprintf(stderr, "Filesystem support lz4 0padding\n");
+	else
+		fprintf(stderr, "Filesystem not support lz4 0padding\n");
+
+	if (erofs_sb_has_big_pcluster())
+		fprintf(stderr, "Filesystem support big pcluster\n");
+	else
+		fprintf(stderr, "Filesystem not support big pcluster\n");
+
+	if (erofs_sb_has_sb_chksum())
+		fprintf(stderr, "Filesystem has super block checksum feature\n");
+	else
+		fprintf(stderr, "Filesystem has no superblock checksum feature\n");
+
+}
+
 int main(int argc, char **argv)
 {
 	int err = 0;
@@ -80,5 +123,20 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
+	err = dev_open_ro(cfg.c_img_path);
+	if (err) {
+		erofs_err("open image file failed");
+		return -1;
+	}
+
+	err = erofs_read_superblock();
+	if (err) {
+		erofs_err("read superblock failed");
+		return -1;
+	}
+
+	if (dumpcfg.print_superblock)
+		dumpfs_print_superblock();
+
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 16:13   ` Gao Xiang
  2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="y", Size: 13813 bytes --]

From: mpiglet <mpiglet@outlook.com>

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 474 insertions(+)

diff --git a/dump/main.c b/dump/main.c
index 25ac89f..b0acc0b 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -19,10 +19,78 @@
 
 struct dumpcfg {
 	bool print_superblock;
+	bool print_statistic;
 	bool print_version;
 };
 static struct dumpcfg dumpcfg;
 
+static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
+static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
+static char *file_types[] = {
+	".so",
+	".png",
+	".jpg",
+	".xml",
+	".html",
+	".odex",
+	".vdex",
+	".apk",
+	".ttf",
+	".jar",
+	".json",
+	".ogg",
+	".oat",
+	".art",
+	".rc",
+	".otf",
+	".txt",
+	"others",
+};
+enum {
+	SOFILETYPE = 0,
+	PNGFILETYPE,
+	JPEGFILETYPE,
+	XMLFILETYPE,
+	HTMLFILETYPE,
+	ODEXFILETYPE,
+	VDEXFILETYPE,
+	APKFILETYPE,
+	TTFFILETYPE,
+	JARFILETYPE,
+	JSONFILETYPE,
+	OGGFILETYPE,
+	OATFILETYPE,
+	ARTFILETYPE,
+	RCFILETYPE,
+	OTFFILETYPE,
+	TXTFILETYPE,
+	OTHERFILETYPE,
+};
+
+#define	FILE_SIZE_BITS	30
+struct statistics {
+	unsigned long blocks;
+	unsigned long files;
+	unsigned long files_total_size;
+	unsigned long files_total_origin_size;
+	double compress_rate;
+	unsigned long compressed_files;
+	unsigned long uncompressed_files;
+
+	unsigned long regular_files;
+	unsigned long dir_files;
+	unsigned long chardev_files;
+	unsigned long blkdev_files;
+	unsigned long fifo_files;
+	unsigned long sock_files;
+	unsigned long symlink_files;
+
+	unsigned int file_type_stat[OTHERFILETYPE + 1];
+	unsigned int file_org_size[FILE_SIZE_BITS];
+	unsigned int file_comp_size[FILE_SIZE_BITS];
+};
+static struct statistics stats;
+
 static struct option long_options[] = {
 	{"help", no_argument, 0, 1},
 	{0, 0, 0, 0},
@@ -33,6 +101,7 @@ static void usage(void)
 	fputs("usage: [options] erofs-image \n\n"
 		"Dump erofs layout from erofs-image, and [options] are:\n"
 		"-s          print information about superblock\n"
+		"-S      print statistic information of the erofs-image\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 		case 's':
 			dumpcfg.print_superblock = true;
 			break;
+		case 'S':
+			dumpcfg.print_statistic = true;
+			break;
 		case 'v':
 		case 'V':
 			dumpfs_print_version();
@@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 	return 0;
 }
 
+static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
+		erofs_off_t last_cluster_size,
+		erofs_off_t *last_cluster_compressed_size)
+{
+	int ret;
+	int decomp_len;
+	int compressed_len = 0;
+	char *decompress;
+	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
+
+	ret = dev_read(raw, map->m_pa, map->m_plen);
+	if (ret < 0)
+		return -EIO;
+
+	if (erofs_sb_has_lz4_0padding()) {
+		compressed_len = map->m_plen;
+	} else {
+		// lz4 maximum compression ratio is 255
+		decompress = (char *)malloc(map->m_plen * 255);
+		if (!decompress) {
+			erofs_err("allocate memory for decompress space failed");
+			return -1;
+		}
+		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
+				map->m_plen, last_cluster_size,
+				map->m_plen * 10);
+		if (decomp_len < 0) {
+			erofs_err("decompress last cluster to get decompressed size failed");
+			free(decompress);
+			return -1;
+		}
+		compressed_len = LZ4_compress_destSize(decompress, raw,
+				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
+		if (compressed_len < 0) {
+			erofs_err("compress to get last extent size failed\n");
+			free(decompress);
+			return -1;
+		}
+		free(decompress);
+		// dut to the use of lz4hc (can use different compress level),
+		// our normal lz4 compress result may be bigger
+		compressed_len = compressed_len < map->m_plen ?
+			compressed_len : map->m_plen;
+	}
+
+	*last_cluster_compressed_size = compressed_len;
+	return 0;
+}
+
+static int z_erofs_get_compressed_size(struct erofs_inode *inode,
+		erofs_off_t *size)
+{
+	int err;
+	erofs_blk_t compressedlcs;
+	erofs_off_t last_cluster_size;
+	erofs_off_t last_cluster_compressed_size;
+	struct erofs_map_blocks map = {
+		.index = UINT_MAX,
+		.m_la = inode->i_size - 1,
+	};
+
+	err = z_erofs_map_blocks_iter(inode, &map);
+	if (err) {
+		erofs_err("read nid %ld's last block failed\n", inode->nid);
+		return err;
+	}
+	compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
+	*size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
+	last_cluster_size = inode->i_size - map.m_la;
+
+	if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
+		*size += last_cluster_size;
+	} else {
+		err = z_erofs_get_last_cluster_size_from_disk(&map,
+				last_cluster_size,
+				&last_cluster_compressed_size);
+		if (err) {
+			erofs_err("get nid %ld's last extent size failed",
+					inode->nid);
+			return err;
+		}
+		*size += last_cluster_compressed_size;
+	}
+	return 0;
+}
+
+static int get_file_compressed_size(struct erofs_inode *inode,
+		erofs_off_t *size)
+{
+	int err;
+
+	*size = 0;
+	switch (inode->datalayout) {
+	case EROFS_INODE_FLAT_INLINE:
+	case EROFS_INODE_FLAT_PLAIN:
+		stats.uncompressed_files++;
+		*size = inode->i_size;
+		break;
+	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+	case EROFS_INODE_FLAT_COMPRESSION:
+		stats.compressed_files++;
+		err = z_erofs_get_compressed_size(inode, size);
+		if (err) {
+			erofs_err("get compressed file size failed\n");
+			return err;
+		}
+	}
+	return 0;
+}
+
 static void dumpfs_print_superblock(void)
 {
 	time_t time = sbi.build_time;
@@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
 
 }
 
+static int get_file_type(const char *filename)
+{
+	char *postfix = strrchr(filename, '.');
+	int type = SOFILETYPE;
+
+	if (postfix == NULL)
+		return OTHERFILETYPE;
+	while (type < OTHERFILETYPE) {
+		if (strcmp(postfix, file_types[type]) == 0)
+			break;
+		type++;
+	}
+	return type;
+}
+
+// file count、file size、file type
+static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
+{
+	struct erofs_inode vi = { .nid = nid};
+	int err;
+	char buf[EROFS_BLKSIZ];
+	char filename[PATH_MAX + 1];
+	erofs_off_t offset;
+
+	err = erofs_read_inode_from_disk(&vi);
+	if (err)
+		return err;
+
+	offset = 0;
+	while (offset < vi.i_size) {
+		erofs_off_t maxsize = min_t(erofs_off_t,
+			vi.i_size - offset, EROFS_BLKSIZ);
+		struct erofs_dirent *de = (void *)buf;
+		struct erofs_dirent *end;
+		unsigned int nameoff;
+
+		err = erofs_pread(&vi, buf, maxsize, offset);
+		if (err)
+			return err;
+
+		nameoff = le16_to_cpu(de->nameoff);
+
+		if (nameoff < sizeof(struct erofs_dirent) ||
+		    nameoff >= PAGE_SIZE) {
+			erofs_err("invalid de[0].nameoff %u @ nid %llu",
+				  nameoff, nid | 0ULL);
+			return -EFSCORRUPTED;
+		}
+		end = (void *)buf + nameoff;
+		while (de < end) {
+			const char *dname;
+			unsigned int dname_len;
+			struct erofs_inode inode = { .nid = de->nid };
+			int actual_size_mark;
+			int original_size_mark;
+			erofs_off_t actual_size = 0;
+			erofs_off_t original_size;
+
+			nameoff = le16_to_cpu(de->nameoff);
+			dname = (char *)buf + nameoff;
+
+			if (de + 1 >= end)
+				dname_len = strnlen(dname, maxsize - nameoff);
+			else
+				dname_len =
+					le16_to_cpu(de[1].nameoff) - nameoff;
+
+			/* a corrupted entry is found */
+			if (nameoff + dname_len > maxsize ||
+				dname_len > EROFS_NAME_LEN) {
+				erofs_err("bogus dirent @ nid %llu",
+						le64_to_cpu(de->nid) | 0ULL);
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+			if (de->nid != nid && de->nid != parent_nid)
+				stats.files++;
+
+			memset(filename, 0, PATH_MAX + 1);
+			memcpy(filename, dname, dname_len);
+
+			switch (de->file_type) {
+			case EROFS_FT_UNKNOWN:
+				break;
+			case EROFS_FT_REG_FILE:
+				err = erofs_read_inode_from_disk(&inode);
+				if (err) {
+					erofs_err("read file inode from disk failed!");
+					return err;
+				}
+				original_size = inode.i_size;
+				stats.files_total_origin_size += original_size;
+				stats.regular_files++;
+
+				err = get_file_compressed_size(&inode,
+						&actual_size);
+				if (err) {
+					erofs_err("get file size failed\n");
+					return err;
+				}
+				stats.files_total_size += actual_size;
+				stats.file_type_stat[get_file_type(filename)]++;
+
+				original_size_mark = 0;
+				actual_size_mark = 0;
+				actual_size >>= 10;
+				original_size >>= 10;
+
+				while (actual_size || original_size) {
+					if (actual_size) {
+						actual_size >>= 1;
+						actual_size_mark++;
+					}
+					if (original_size) {
+						original_size >>= 1;
+						original_size_mark++;
+					}
+				}
+
+				if (original_size_mark >= FILE_SIZE_BITS - 1)
+					stats.file_org_size[FILE_SIZE_BITS - 1]++;
+				else
+					stats.file_org_size[original_size_mark]++;
+				if (actual_size_mark >= FILE_SIZE_BITS - 1)
+					stats.file_comp_size[FILE_SIZE_BITS - 1]++;
+				else
+					stats.file_comp_size[actual_size_mark]++;
+				break;
+
+			case EROFS_FT_DIR:
+				if (de->nid != nid && de->nid != parent_nid) {
+					stats.dir_files++;
+					stats.uncompressed_files++;
+					err = read_dir(de->nid, nid);
+					if (err) {
+						fprintf(stderr,
+								"parse dir nid %llu error occurred\n",
+								de->nid);
+						return err;
+					}
+				}
+				break;
+			case EROFS_FT_CHRDEV:
+				stats.chardev_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_BLKDEV:
+				stats.blkdev_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_FIFO:
+				stats.fifo_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_SOCK:
+				stats.sock_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_SYMLINK:
+				stats.symlink_files++;
+				stats.uncompressed_files++;
+				break;
+			}
+			++de;
+		}
+		offset += maxsize;
+	}
+	return 0;
+}
+
+static void dumpfs_print_statistic_of_filetype(void)
+{
+	fprintf(stderr, "Filesystem total file count:         %lu\n",
+			stats.files);
+	fprintf(stderr, "Filesystem regular file count:       %lu\n",
+			stats.regular_files);
+	fprintf(stderr, "Filesystem directory count:          %lu\n",
+			stats.dir_files);
+	fprintf(stderr, "Filesystem symlink file count:       %lu\n",
+			stats.symlink_files);
+	fprintf(stderr, "Filesystem character device count:   %lu\n",
+			stats.chardev_files);
+	fprintf(stderr, "Filesystem block device count:       %lu\n",
+			stats.blkdev_files);
+	fprintf(stderr, "Filesystem FIFO file count:          %lu\n",
+			stats.fifo_files);
+	fprintf(stderr, "Filesystem SOCK file count:          %lu\n",
+			stats.sock_files);
+}
+
+static void dumpfs_print_chart_row(char *col1, unsigned int col2,
+		double col3, char *col4)
+{
+	char row[500] = {0};
+
+	sprintf(row, chart_format, col1, col2, col3, col4);
+	fprintf(stderr, row);
+}
+
+static void dumpfs_print_chart_of_file(unsigned int *file_counts,
+		unsigned int len)
+{
+	char col1[30];
+	unsigned int col2;
+	double col3;
+	char col4[400];
+	unsigned int lowerbound = 0;
+	unsigned int upperbound = 1;
+
+	fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
+			"ratio", "distribution");
+	for (int i = 0; i < len; i++) {
+		memset(col1, 0, 30);
+		memset(col4, 0, 400);
+		if (i == len - 1)
+			strcpy(col1, " others");
+		else if (i <= 6)
+			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
+		else
+
+			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
+		col2 = file_counts[i];
+		col3 = (double)(100 * col2) / (double)stats.regular_files;
+		memset(col4, '#', col3 / 2);
+		dumpfs_print_chart_row(col1, col2, col3, col4);
+		lowerbound = upperbound;
+		upperbound <<= 1;
+	}
+}
+
+static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
+{
+	char col1[30];
+	unsigned int col2;
+	double col3;
+	char col4[401];
+
+	fprintf(stderr, header_format, "type", "count", "ratio",
+			"distribution");
+	for (int i = 0; i < len; i++) {
+		memset(col1, 0, 30);
+		memset(col4, 0, 401);
+		sprintf(col1, "%-17s", file_types[i]);
+		col2 = stats.file_type_stat[i];
+		col3 = (double)(100 * col2) / (double)stats.regular_files;
+		memset(col4, '#', col3 / 2);
+		dumpfs_print_chart_row(col1, col2, col3, col4);
+	}
+}
+
+static void dumpfs_print_statistic_of_compression(void)
+{
+	stats.compress_rate = (double)(100 * stats.files_total_size) /
+		(double)(stats.files_total_origin_size);
+	fprintf(stderr, "Filesystem compressed files:         %lu\n",
+			stats.compressed_files);
+	fprintf(stderr, "Filesystem uncompressed files:       %lu\n",
+			stats.uncompressed_files);
+	fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
+			stats.files_total_origin_size);
+	fprintf(stderr, "Filesystem total file size:          %lu Bytes\n",
+			stats.files_total_size);
+	fprintf(stderr, "Filesystem compress rate:            %.2f%%\n",
+			stats.compress_rate);
+}
+
+static void dumpfs_print_statistic(void)
+{
+	int err;
+
+	stats.blocks = sbi.blocks;
+	err = read_dir(sbi.root_nid, sbi.root_nid);
+	if (err) {
+		erofs_err("read dir failed");
+		return;
+	}
+
+	dumpfs_print_statistic_of_filetype();
+	dumpfs_print_statistic_of_compression();
+
+	fprintf(stderr, "\nOriginal file size distribution:\n");
+	dumpfs_print_chart_of_file(stats.file_org_size, 17);
+	fprintf(stderr, "\nOn-Disk file size distribution:\n");
+	dumpfs_print_chart_of_file(stats.file_comp_size, 17);
+	fprintf(stderr, "\nFile type distribution:\n");
+	dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
+}
+
 int main(int argc, char **argv)
 {
 	int err = 0;
@@ -138,5 +608,9 @@ int main(int argc, char **argv)
 	if (dumpcfg.print_superblock)
 		dumpfs_print_superblock();
 
+	if (dumpcfg.print_statistic)
+		dumpfs_print_statistic();
+
+
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 16:25   ` Gao Xiang
  2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
  2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

From: mpiglet <mpiglet@outlook.com>

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 200 insertions(+), 2 deletions(-)

diff --git a/dump/main.c b/dump/main.c
index b0acc0b..2389cef 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -19,8 +19,10 @@
 
 struct dumpcfg {
 	bool print_superblock;
+	bool print_inode;
 	bool print_statistic;
 	bool print_version;
+	u64 ino;
 };
 static struct dumpcfg dumpcfg;
 
@@ -100,8 +102,9 @@ static void usage(void)
 {
 	fputs("usage: [options] erofs-image \n\n"
 		"Dump erofs layout from erofs-image, and [options] are:\n"
-		"-s          print information about superblock\n"
-		"-S      print statistic information of the erofs-image\n"
+		"-s         print information about superblock\n"
+		"-S         print statistic information of the erofs-image\n"
+		"-i #       print target # inode info\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -113,6 +116,7 @@ static void dumpfs_print_version(void)
 static int dumpfs_parse_options_cfg(int argc, char **argv)
 {
 	int opt;
+	u64 i;
 
 	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
 					long_options, NULL)) != -1) {
@@ -127,6 +131,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 		case 'V':
 			dumpfs_print_version();
 			exit(0);
+		case 'i':
+			i = atoll(optarg);
+			dumpcfg.print_inode = true;
+			dumpcfg.ino = i;
+			break;
 		case 'h':
 		case 1:
 		    usage();
@@ -293,6 +302,193 @@ static void dumpfs_print_superblock(void)
 
 }
 
+static int get_path_by_nid(erofs_nid_t nid, erofs_nid_t parent_nid,
+		erofs_nid_t target, char *path, unsigned int pos)
+{
+	int err;
+	struct erofs_inode inode = {.nid = nid};
+	erofs_off_t offset;
+	char buf[EROFS_BLKSIZ];
+
+	path[pos++] = '/';
+	if (target == sbi.root_nid)
+		return 0;
+
+	err = erofs_read_inode_from_disk(&inode);
+	if (err) {
+		erofs_err("read inode %lu failed", nid);
+		return err;
+	}
+
+	offset = 0;
+	while (offset < inode.i_size) {
+		erofs_off_t maxsize = min_t(erofs_off_t,
+					inode.i_size - offset, EROFS_BLKSIZ);
+		struct erofs_dirent *de = (void *)buf;
+		struct erofs_dirent *end;
+		unsigned int nameoff;
+
+		err = erofs_pread(&inode, buf, maxsize, offset);
+		if (err)
+			return err;
+
+		nameoff = le16_to_cpu(de->nameoff);
+		if (nameoff < sizeof(struct erofs_dirent) ||
+		    nameoff >= PAGE_SIZE) {
+			erofs_err("invalid de[0].nameoff %u @ nid %llu",
+				  nameoff, nid | 0ULL);
+			return -EFSCORRUPTED;
+		}
+
+		end = (void *)buf + nameoff;
+		while (de < end) {
+			const char *dname;
+			unsigned int dname_len;
+
+			nameoff = le16_to_cpu(de->nameoff);
+			dname = (char *)buf + nameoff;
+			if (de + 1 >= end)
+				dname_len = strnlen(dname, maxsize - nameoff);
+			else
+				dname_len = le16_to_cpu(de[1].nameoff)
+					- nameoff;
+
+			/* a corrupted entry is found */
+			if (nameoff + dname_len > maxsize ||
+			    dname_len > EROFS_NAME_LEN) {
+				erofs_err("bogus dirent @ nid %llu",
+						le64_to_cpu(de->nid) | 0ULL);
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+
+			if (de->nid == target) {
+				memcpy(path + pos, dname, dname_len);
+				return 0;
+			}
+
+			if (de->file_type == EROFS_FT_DIR &&
+					de->nid != parent_nid &&
+					de->nid != nid) {
+				memcpy(path + pos, dname, dname_len);
+				err = get_path_by_nid(de->nid, nid,
+						target, path, pos + dname_len);
+				if (!err)
+					return 0;
+				memset(path + pos, 0, dname_len);
+			}
+			++de;
+		}
+		offset += maxsize;
+	}
+	return -1;
+}
+
+static void dumpfs_print_inode(void)
+{
+	int err;
+	erofs_off_t size;
+	erofs_nid_t nid = dumpcfg.ino;
+	struct erofs_inode inode = {.nid = nid};
+	char path[PATH_MAX + 1] = {0};
+	time_t t = inode.i_ctime;
+
+	err = erofs_read_inode_from_disk(&inode);
+	if (err) {
+		erofs_err("read inode %lu from disk failed", nid);
+		return;
+	}
+
+	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
+	switch (inode.inode_isize) {
+	case 32:
+		fprintf(stderr, "	File inode is compacted layout\n");
+		break;
+	case 64:
+		fprintf(stderr, "	File inode is extended layout\n");
+		break;
+	default:
+		erofs_err("unsupported inode layout\n");
+	}
+	fprintf(stderr, "	File size:		%lu\n",
+			inode.i_size);
+	fprintf(stderr, "	File nid:		%lu\n",
+			inode.nid);
+	fprintf(stderr, "	File extent size:	%u\n",
+			inode.extent_isize);
+	fprintf(stderr, "	File xattr size:	%u\n",
+			inode.xattr_isize);
+	fprintf(stderr, "	File inode size:	%u\n",
+			inode.inode_isize);
+	fprintf(stderr, "	File type:		");
+	switch (inode.i_mode & S_IFMT) {
+	case S_IFREG:
+		fprintf(stderr, "regular\n");
+		break;
+	case S_IFDIR:
+		fprintf(stderr, "directory\n");
+		break;
+	case S_IFLNK:
+		fprintf(stderr, "link\n");
+		break;
+	case S_IFCHR:
+		fprintf(stderr, "character device\n");
+		break;
+	case S_IFBLK:
+		fprintf(stderr, "block device\n");
+		break;
+	case S_IFIFO:
+		fprintf(stderr, "fifo\n");
+		break;
+	case S_IFSOCK:
+		fprintf(stderr, "sock\n");
+		break;
+	default:
+		break;
+	}
+
+	err = get_file_compressed_size(&inode, &size);
+	if (err) {
+		erofs_err("get file size failed\n");
+		return;
+	}
+
+	fprintf(stderr, "	File original size:	%lu\n"
+			"	File on-disk size:	%lu\n",
+			inode.i_size, size);
+	fprintf(stderr, "	File compress rate:	%.2f%%\n",
+			(double)(100 * size) / (double)(inode.i_size));
+
+	fprintf(stderr, "	File datalayout:	");
+	switch (inode.datalayout) {
+	case EROFS_INODE_FLAT_PLAIN:
+		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
+		break;
+	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION_LEGACY\n");
+		break;
+	case EROFS_INODE_FLAT_INLINE:
+		fprintf(stderr, "EROFS_INODE_FLAT_INLINE\n");
+		break;
+	case EROFS_INODE_FLAT_COMPRESSION:
+		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION\n");
+		break;
+	default:
+		break;
+	}
+
+	fprintf(stderr, "	File create time:	%s", ctime(&t));
+	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
+	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);
+	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);
+
+	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
+	if (!err)
+		fprintf(stderr, "	File path:		%s\n", path);
+	else
+		fprintf(stderr, "Path not found\n");
+}
+
 static int get_file_type(const char *filename)
 {
 	char *postfix = strrchr(filename, '.');
@@ -611,6 +807,8 @@ int main(int argc, char **argv)
 	if (dumpcfg.print_statistic)
 		dumpfs_print_statistic();
 
+	if (dumpcfg.print_inode)
+		dumpfs_print_inode();
 
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
                   ` (2 preceding siblings ...)
  2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 16:29   ` Gao Xiang
  2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

From: mpiglet <mpiglet@outlook.com>

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 108 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 91 insertions(+), 17 deletions(-)

diff --git a/dump/main.c b/dump/main.c
index 2389cef..efce309 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -21,8 +21,10 @@ struct dumpcfg {
 	bool print_superblock;
 	bool print_inode;
 	bool print_statistic;
+	bool print_inode_phy;
 	bool print_version;
 	u64 ino;
+	u64 ino_phy;
 };
 static struct dumpcfg dumpcfg;
 
@@ -105,6 +107,7 @@ static void usage(void)
 		"-s         print information about superblock\n"
 		"-S         print statistic information of the erofs-image\n"
 		"-i #       print target # inode info\n"
+		"-I #       print target # inode on-disk info\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -136,6 +139,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 			dumpcfg.print_inode = true;
 			dumpcfg.ino = i;
 			break;
+		case 'I':
+			i = atoll(optarg);
+			dumpcfg.print_inode_phy = true;
+			dumpcfg.ino_phy = i;
+			break;
 		case 'h':
 		case 1:
 		    usage();
@@ -402,25 +410,25 @@ static void dumpfs_print_inode(void)
 	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
 	switch (inode.inode_isize) {
 	case 32:
-		fprintf(stderr, "	File inode is compacted layout\n");
+		fprintf(stderr, "File inode is compacted layout\n");
 		break;
 	case 64:
-		fprintf(stderr, "	File inode is extended layout\n");
+		fprintf(stderr, "File inode is extended layout\n");
 		break;
 	default:
 		erofs_err("unsupported inode layout\n");
 	}
-	fprintf(stderr, "	File size:		%lu\n",
+	fprintf(stderr, "File size:		%lu\n",
 			inode.i_size);
-	fprintf(stderr, "	File nid:		%lu\n",
+	fprintf(stderr, "File nid:		%lu\n",
 			inode.nid);
-	fprintf(stderr, "	File extent size:	%u\n",
+	fprintf(stderr, "File extent size:	%u\n",
 			inode.extent_isize);
-	fprintf(stderr, "	File xattr size:	%u\n",
+	fprintf(stderr, "File xattr size:	%u\n",
 			inode.xattr_isize);
-	fprintf(stderr, "	File inode size:	%u\n",
+	fprintf(stderr, "File inode size:	%u\n",
 			inode.inode_isize);
-	fprintf(stderr, "	File type:		");
+	fprintf(stderr, "File type:		");
 	switch (inode.i_mode & S_IFMT) {
 	case S_IFREG:
 		fprintf(stderr, "regular\n");
@@ -453,13 +461,13 @@ static void dumpfs_print_inode(void)
 		return;
 	}
 
-	fprintf(stderr, "	File original size:	%lu\n"
-			"	File on-disk size:	%lu\n",
+	fprintf(stderr, "File original size:	%lu\n"
+			"File on-disk size:	%lu\n",
 			inode.i_size, size);
-	fprintf(stderr, "	File compress rate:	%.2f%%\n",
+	fprintf(stderr, "File compress rate:	%.2f%%\n",
 			(double)(100 * size) / (double)(inode.i_size));
 
-	fprintf(stderr, "	File datalayout:	");
+	fprintf(stderr, "File datalayout:	");
 	switch (inode.datalayout) {
 	case EROFS_INODE_FLAT_PLAIN:
 		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
@@ -477,18 +485,82 @@ static void dumpfs_print_inode(void)
 		break;
 	}
 
-	fprintf(stderr, "	File create time:	%s", ctime(&t));
-	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
-	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);
-	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);
+	fprintf(stderr, "File create time:	%s", ctime(&t));
+	fprintf(stderr, "File uid:		%u\n", inode.i_uid);
+	fprintf(stderr, "File gid:		%u\n", inode.i_gid);
+	fprintf(stderr, "File hard-link count:	%u\n", inode.i_nlink);
 
 	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
 	if (!err)
-		fprintf(stderr, "	File path:		%s\n", path);
+		fprintf(stderr, "File path:		%s\n", path);
 	else
 		fprintf(stderr, "Path not found\n");
 }
 
+static void dumpfs_print_inode_phy(void)
+{
+	int err;
+	erofs_nid_t nid = dumpcfg.ino_phy;
+	struct erofs_inode inode = {.nid = nid};
+	char path[PATH_MAX + 1] = {0};
+
+	err = erofs_read_inode_from_disk(&inode);
+	if (err < 0) {
+		erofs_err("read inode %lu from disk failed", nid);
+		return;
+	}
+
+	const erofs_off_t ibase = iloc(inode.nid);
+	const erofs_off_t pos = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(
+			ibase + inode.inode_isize + inode.xattr_isize);
+	erofs_blk_t blocks = inode.u.i_blocks;
+	erofs_blk_t start = 0;
+	erofs_blk_t end = 0;
+	struct erofs_map_blocks map = {
+		.index = UINT_MAX,
+		.m_la = 0,
+	};
+
+	fprintf(stderr, "Inode %lu on-disk info:\n", nid);
+	switch (inode.datalayout) {
+	case EROFS_INODE_FLAT_INLINE:
+	case EROFS_INODE_FLAT_PLAIN:
+		if (inode.u.i_blkaddr == NULL_ADDR)
+			start = end = erofs_blknr(pos);
+		else {
+			start = inode.u.i_blkaddr;
+			end = start + BLK_ROUND_UP(inode.i_size) - 1;
+		}
+		fprintf(stderr, "File size:			%lu\n",
+				inode.i_size);
+		fprintf(stderr,
+				"	Plain Block Address:		%u - %u\n",
+				start, end);
+		break;
+
+	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+	case EROFS_INODE_FLAT_COMPRESSION:
+		err = z_erofs_map_blocks_iter(&inode, &map);
+		if (err)
+			erofs_err("get file blocks range failed");
+
+		start = erofs_blknr(map.m_pa);
+		end = start - 1 + blocks;
+		fprintf(stderr,
+				"	Compressed Block Address:	%u - %u\n",
+				start, end);
+		break;
+	}
+
+	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
+	if (!err)
+		fprintf(stderr, "File Path:			%s\n",
+				path);
+	else
+		erofs_err("path not found");
+}
+
+
 static int get_file_type(const char *filename)
 {
 	char *postfix = strrchr(filename, '.');
@@ -810,5 +882,7 @@ int main(int argc, char **argv)
 	if (dumpcfg.print_inode)
 		dumpfs_print_inode();
 
+	if (dumpcfg.print_inode_phy)
+		dumpfs_print_inode_phy();
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
                   ` (3 preceding siblings ...)
  2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
@ 2021-09-11 15:45 ` Gao Xiang
  4 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 15:45 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

Hi Xuenan,

Thanks for working on dump.erofs! Such functionality was recently
requested by some other folks, it's quite helpful to be resolved
upstream.

Some comments in-line:

On Sat, Sep 11, 2021 at 09:46:31PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>

mpiglet => "Wang Qi" (according to the name in the source header)

It'd be better to use the real name if possible. ;)

> 
> Add dump-tool for erofs to facilitate users directly
> analyzing the erofs image file.
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>

Same here.

> ---
>  Makefile.am        |  2 +-
>  configure.ac       |  2 ++
>  dump/Makefile.am   | 10 ++++++
>  dump/main.c        | 84 ++++++++++++++++++++++++++++++++++++++++++++++
>  include/erofs/io.h |  3 ++
>  lib/namei.c        |  4 +--
>  6 files changed, 102 insertions(+), 3 deletions(-)
>  create mode 100644 dump/Makefile.am
>  create mode 100644 dump/main.c
> 
> diff --git a/Makefile.am b/Makefile.am
> index b804aa9..fedf7b5 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -3,7 +3,7 @@
>  
>  ACLOCAL_AMFLAGS = -I m4
>  
> -SUBDIRS = man lib mkfs
> +SUBDIRS = man lib mkfs dump
>  if ENABLE_FUSE
>  SUBDIRS += fuse
>  endif
> diff --git a/configure.ac b/configure.ac
> index f626064..f4fe548 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -280,6 +280,8 @@ AC_CONFIG_FILES([Makefile
>  		 man/Makefile
>  		 lib/Makefile
>  		 mkfs/Makefile
> +		 dump/Makefile
>  		 fuse/Makefile])
> +
>  AC_OUTPUT
>  
> diff --git a/dump/Makefile.am b/dump/Makefile.am
> new file mode 100644
> index 0000000..e664799
> --- /dev/null
> +++ b/dump/Makefile.am
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0+
> +# Makefile.am
> +
> +AUTOMAKE_OPTIONS = foreign
> +bin_PROGRAMS     = dump.erofs
> +AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}

Do we really need uuid and selinux libraries for dump.erofs?

> +dump_erofs_SOURCES = main.c
> +dump_erofs_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
> +dump_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} ${liblz4_LIBS}

Same here.

> +
> diff --git a/dump/main.c b/dump/main.c
> new file mode 100644
> index 0000000..8fbc24a
> --- /dev/null
> +++ b/dump/main.c
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * dump/main.c

It could cause some u-boot checkpatch problem...
It'd be better to get rid of the path.

> + *
> + * Copyright (C) 2021-2022 HUAWEI, Inc.
> + *             http://www.huawei.com/
> + * Created by Wang Qi <mpiglet@outlook.com>
> + *            Guo Xuenan <guoxuenan@huawei.com>
> + */
> +
> +#include <stdlib.h>
> +#include <getopt.h>
> +#include <sys/sysmacros.h>
> +#include <time.h>
> +#include <lz4.h>
> +
> +#include "erofs/print.h"
> +#include "erofs/io.h"
> +
> +static struct option long_options[] = {
> +	{"help", no_argument, 0, 1},
> +	{0, 0, 0, 0},
> +};
> +
> +static void usage(void)
> +{
> +	fputs("usage: [options] erofs-image \n\n"
> +		"Dump erofs layout from erofs-image, and [options] are:\n"
> +		"-v/-V      print dump.erofs version info\n"

How about leaving only one argument here.
It'd be better to keep in sync with dumpe2fs, so:
https://www.man7.org/linux/man-pages/man8/dumpe2fs.8.html

       -V     print the version number of dump.erofs and exit.

> +		"-h/--help  display this help and exit\n", stderr);

-h was used by dumpe2fs, so how about leaving --help only here?

> +}
> +static void dumpfs_print_version(void)
> +{
> +	fprintf(stderr, "dump.erofs %s\n", cfg.c_version);
> +}
> +
> +static int dumpfs_parse_options_cfg(int argc, char **argv)
> +{
> +	int opt;
> +
> +	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",

It seems that not all options are used in this patch.
Also, it would be better to sort them all in the alphabetical order.

> +					long_options, NULL)) != -1) {
> +		switch (opt) {
> +		case 'v':
> +		case 'V':
> +			dumpfs_print_version();
> +			exit(0);
> +		case 'h':
> +		case 1:
> +		    usage();
> +		    exit(0);
> +		default: /* '?' */
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (optind >= argc)
> +		return -EINVAL;
> +
> +	cfg.c_img_path = strdup(argv[optind++]);
> +	if (!cfg.c_img_path)
> +		return -ENOMEM;
> +
> +	if (optind < argc) {
> +		erofs_err("unexpected argument: %s\n", argv[optind]);

minor nit: memory leak of c_img_path?

> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int err = 0;
> +
> +	erofs_init_configure();
> +	err = dumpfs_parse_options_cfg(argc, argv);
> +	if (err) {
> +		if (err == -EINVAL)
> +			usage();
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/include/erofs/io.h b/include/erofs/io.h
> index 5574245..00e5de8 100644
> --- a/include/erofs/io.h
> +++ b/include/erofs/io.h
> @@ -10,6 +10,7 @@
>  #define __EROFS_IO_H
>  
>  #include <unistd.h>
> +#include <sys/types.h>

How about removing "#include <sys/types.h>" in lib/namei.c?

Thanks,
Gao Xiang

>  #include "internal.h"
>  
>  #ifndef O_BINARY
> @@ -25,6 +26,8 @@ int dev_fillzero(u64 offset, size_t len, bool padding);
>  int dev_fsync(void);
>  int dev_resize(erofs_blk_t nblocks);
>  u64 dev_length(void);
> +dev_t erofs_new_decode_dev(u32 dev);
> +int erofs_read_inode_from_disk(struct erofs_inode *vi);
>  
>  static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
>  			    u32 nblocks)
> diff --git a/lib/namei.c b/lib/namei.c
> index 4e06ba4..21631f1 100644
> --- a/lib/namei.c
> +++ b/lib/namei.c
> @@ -15,7 +15,7 @@
>  #include "erofs/print.h"
>  #include "erofs/io.h"
>  
> -static dev_t erofs_new_decode_dev(u32 dev)
> +dev_t erofs_new_decode_dev(u32 dev)
>  {
>  	const unsigned int major = (dev & 0xfff00) >> 8;
>  	const unsigned int minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
> @@ -23,7 +23,7 @@ static dev_t erofs_new_decode_dev(u32 dev)
>  	return makedev(major, minor);
>  }
>  
> -static int erofs_read_inode_from_disk(struct erofs_inode *vi)
> +int erofs_read_inode_from_disk(struct erofs_inode *vi)
>  {
>  	int ret, ifmt;
>  	char buf[sizeof(struct erofs_inode_extended)];
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
@ 2021-09-11 15:58   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 15:58 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

On Sat, Sep 11, 2021 at 09:46:32PM +0800, Guo Xuenan wrote:
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>

Same here.

> ---
>  dump/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/dump/main.c b/dump/main.c
> index 8fbc24a..25ac89f 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -17,6 +17,12 @@
>  #include "erofs/print.h"
>  #include "erofs/io.h"
>  
> +struct dumpcfg {
> +	bool print_superblock;
> +	bool print_version;
> +};
> +static struct dumpcfg dumpcfg;
> +
>  static struct option long_options[] = {
>  	{"help", no_argument, 0, 1},
>  	{0, 0, 0, 0},
> @@ -26,6 +32,7 @@ static void usage(void)
>  {
>  	fputs("usage: [options] erofs-image \n\n"
>  		"Dump erofs layout from erofs-image, and [options] are:\n"
> +		"-s          print information about superblock\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -41,6 +48,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
>  					long_options, NULL)) != -1) {
>  		switch (opt) {
> +		case 's':
> +			dumpcfg.print_superblock = true;
> +			break;
>  		case 'v':
>  		case 'V':
>  			dumpfs_print_version();
> @@ -68,6 +78,39 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  	return 0;
>  }
>  
> +static void dumpfs_print_superblock(void)
> +{
> +	time_t time = sbi.build_time;
> +
> +	fprintf(stderr, "Filesystem magic number:	0x%04X\n", EROFS_SUPER_MAGIC_V1);
> +	fprintf(stderr, "Filesystem blocks: 		%lu\n", sbi.blocks);
> +	fprintf(stderr, "Filesystem meta block:		%u\n", sbi.meta_blkaddr);

Filesystem inode metadata start block:

> +	fprintf(stderr, "Filesystem xattr block:	%u\n", sbi.xattr_blkaddr);

Filesystem shared xattr metadata start block:

> +	fprintf(stderr, "Filesystem root nid:		%ld\n", sbi.root_nid);


> +	fprintf(stderr, "Filesystem valid inos:		%lu\n", sbi.inos);

Inode count:

> +	fprintf(stderr, "Filesystem created:		%s", ctime(&time));
> +	fprintf(stderr, "Filesystem uuid:		");

Filesystem UUID:

How about printing to stdout directly? according to
dumpe2fs:
https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/misc/dumpe2fs.c#n219

Filesystem volume name:   <none>
Last mounted on:          /
Filesystem UUID:          c46ea44a-e249-446f-af40-xxxxxxxxxxxx
Filesystem magic number:  0xEF53
Filesystem revision #:    1 (dynamic)
Filesystem features:      has_journal ext_attr resize_inode dir_index filetype needs_recovery extent 64bit flex_bg sparse_super large_file huge_file dir_nlink extra_isize metadata_csum
Filesystem flags:         signed_directory_hash 
Default mount options:    user_xattr acl
Filesystem state:         clean
Errors behavior:          Continue
Filesystem OS type:       Linux
Inode count:              8003584
Block count:              32000000
Reserved block count:     1600000
Free blocks:              18661241
Free inodes:              7681550
First block:              0
Block size:               4096
Fragment size:            4096


> +	for (int i = 0; i < 16; i++)
> +		fprintf(stderr, "%02x", sbi.uuid[i]);
> +	fprintf(stderr, "\n");

It seems not the correct UUID style...

> +
> +	if (erofs_sb_has_lz4_0padding())
> +		fprintf(stderr, "Filesystem support lz4 0padding\n");
> +	else
> +		fprintf(stderr, "Filesystem not support lz4 0padding\n");
> +
> +	if (erofs_sb_has_big_pcluster())
> +		fprintf(stderr, "Filesystem support big pcluster\n");
> +	else
> +		fprintf(stderr, "Filesystem not support big pcluster\n");
> +
> +	if (erofs_sb_has_sb_chksum())
> +		fprintf(stderr, "Filesystem has super block checksum feature\n");
> +	else
> +		fprintf(stderr, "Filesystem has no superblock checksum feature\n");

How about showing the features in a list as above?

Thanks,
Gao Xiang

> +
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	int err = 0;
> @@ -80,5 +123,20 @@ int main(int argc, char **argv)
>  		return -1;
>  	}
>  
> +	err = dev_open_ro(cfg.c_img_path);
> +	if (err) {
> +		erofs_err("open image file failed");
> +		return -1;
> +	}
> +
> +	err = erofs_read_superblock();
> +	if (err) {
> +		erofs_err("read superblock failed");
> +		return -1;
> +	}
> +
> +	if (dumpcfg.print_superblock)
> +		dumpfs_print_superblock();
> +
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
@ 2021-09-11 16:13   ` Gao Xiang
  2021-09-13  4:30     ` Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:13 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

(+Cc Jianan.)

On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
>  dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 474 insertions(+)
> 
> diff --git a/dump/main.c b/dump/main.c
> index 25ac89f..b0acc0b 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -19,10 +19,78 @@
>  
>  struct dumpcfg {
>  	bool print_superblock;
> +	bool print_statistic;
>  	bool print_version;
>  };
>  static struct dumpcfg dumpcfg;
>  
> +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
> +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> +static char *file_types[] = {
> +	".so",
> +	".png",
> +	".jpg",
> +	".xml",
> +	".html",
> +	".odex",
> +	".vdex",
> +	".apk",
> +	".ttf",
> +	".jar",
> +	".json",
> +	".ogg",
> +	".oat",
> +	".art",
> +	".rc",
> +	".otf",
> +	".txt",
> +	"others",
> +};
> +enum {
> +	SOFILETYPE = 0,
> +	PNGFILETYPE,
> +	JPEGFILETYPE,
> +	XMLFILETYPE,
> +	HTMLFILETYPE,
> +	ODEXFILETYPE,
> +	VDEXFILETYPE,
> +	APKFILETYPE,
> +	TTFFILETYPE,
> +	JARFILETYPE,
> +	JSONFILETYPE,
> +	OGGFILETYPE,
> +	OATFILETYPE,
> +	ARTFILETYPE,
> +	RCFILETYPE,
> +	OTFFILETYPE,
> +	TXTFILETYPE,
> +	OTHERFILETYPE,
> +};

Why we need enums here? Can these be resolved with some array index?

> +
> +#define	FILE_SIZE_BITS	30
> +struct statistics {
> +	unsigned long blocks;
> +	unsigned long files;
> +	unsigned long files_total_size;
> +	unsigned long files_total_origin_size;
> +	double compress_rate;
> +	unsigned long compressed_files;
> +	unsigned long uncompressed_files;
> +
> +	unsigned long regular_files;
> +	unsigned long dir_files;
> +	unsigned long chardev_files;
> +	unsigned long blkdev_files;
> +	unsigned long fifo_files;
> +	unsigned long sock_files;
> +	unsigned long symlink_files;
> +
> +	unsigned int file_type_stat[OTHERFILETYPE + 1];
> +	unsigned int file_org_size[FILE_SIZE_BITS];

What do "FILE_SIZE_BITS" and "file_org_size" mean?

> +	unsigned int file_comp_size[FILE_SIZE_BITS];
> +};
> +static struct statistics stats;
> +
>  static struct option long_options[] = {
>  	{"help", no_argument, 0, 1},
>  	{0, 0, 0, 0},
> @@ -33,6 +101,7 @@ static void usage(void)
>  	fputs("usage: [options] erofs-image \n\n"
>  		"Dump erofs layout from erofs-image, and [options] are:\n"
>  		"-s          print information about superblock\n"
> +		"-S      print statistic information of the erofs-image\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  		case 's':
>  			dumpcfg.print_superblock = true;
>  			break;
> +		case 'S':
> +			dumpcfg.print_statistic = true;
> +			break;
>  		case 'v':
>  		case 'V':
>  			dumpfs_print_version();
> @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  	return 0;
>  }
>  
> +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> +		erofs_off_t last_cluster_size,
> +		erofs_off_t *last_cluster_compressed_size)

Hmmm... do we really need the exact compressed bytes?
or just compressed blocks is enough?

"compressed blocks" can be gotten in erofs inode.

Btw, although I think it's useful for fsck (check if an erofs is correct).

> +{
> +	int ret;
> +	int decomp_len;
> +	int compressed_len = 0;
> +	char *decompress;
> +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> +
> +	ret = dev_read(raw, map->m_pa, map->m_plen);
> +	if (ret < 0)
> +		return -EIO;
> +
> +	if (erofs_sb_has_lz4_0padding()) {
> +		compressed_len = map->m_plen;
> +	} else {
> +		// lz4 maximum compression ratio is 255
> +		decompress = (char *)malloc(map->m_plen * 255);
> +		if (!decompress) {
> +			erofs_err("allocate memory for decompress space failed");
> +			return -1;
> +		}
> +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> +				map->m_plen, last_cluster_size,
> +				map->m_plen * 10);
> +		if (decomp_len < 0) {
> +			erofs_err("decompress last cluster to get decompressed size failed");
> +			free(decompress);
> +			return -1;
> +		}
> +		compressed_len = LZ4_compress_destSize(decompress, raw,
> +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> +		if (compressed_len < 0) {
> +			erofs_err("compress to get last extent size failed\n");
> +			free(decompress);
> +			return -1;
> +		}
> +		free(decompress);
> +		// dut to the use of lz4hc (can use different compress level),
> +		// our normal lz4 compress result may be bigger
> +		compressed_len = compressed_len < map->m_plen ?
> +			compressed_len : map->m_plen;
> +	}
> +
> +	*last_cluster_compressed_size = compressed_len;
> +	return 0;
> +}
> +
> +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> +		erofs_off_t *size)
> +{
> +	int err;
> +	erofs_blk_t compressedlcs;
> +	erofs_off_t last_cluster_size;
> +	erofs_off_t last_cluster_compressed_size;
> +	struct erofs_map_blocks map = {
> +		.index = UINT_MAX,
> +		.m_la = inode->i_size - 1,
> +	};
> +
> +	err = z_erofs_map_blocks_iter(inode, &map);

(add Jianan here.)

Can we port the latest erofs kernel fiemap code to erofs-utils, and add
some functionality to get the file distribution as well when the fs isn't
mounted?


> +	if (err) {
> +		erofs_err("read nid %ld's last block failed\n", inode->nid);
> +		return err;
> +	}
> +	compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
> +	*size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
> +	last_cluster_size = inode->i_size - map.m_la;
> +
> +	if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
> +		*size += last_cluster_size;
> +	} else {
> +		err = z_erofs_get_last_cluster_size_from_disk(&map,
> +				last_cluster_size,
> +				&last_cluster_compressed_size);
> +		if (err) {
> +			erofs_err("get nid %ld's last extent size failed",
> +					inode->nid);
> +			return err;
> +		}
> +		*size += last_cluster_compressed_size;
> +	}
> +	return 0;
> +}
> +
> +static int get_file_compressed_size(struct erofs_inode *inode,
> +		erofs_off_t *size)

erofs_dump_get_file_occupied_blocks?

> +{
> +	int err;
> +
> +	*size = 0;
> +	switch (inode->datalayout) {
> +	case EROFS_INODE_FLAT_INLINE:
> +	case EROFS_INODE_FLAT_PLAIN:
> +		stats.uncompressed_files++;
> +		*size = inode->i_size;
> +		break;
> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> +	case EROFS_INODE_FLAT_COMPRESSION:
> +		stats.compressed_files++;
> +		err = z_erofs_get_compressed_size(inode, size);
> +		if (err) {
> +			erofs_err("get compressed file size failed\n");
> +			return err;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static void dumpfs_print_superblock(void)
>  {
>  	time_t time = sbi.build_time;
> @@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
>  
>  }
>  
> +static int get_file_type(const char *filename)
> +{
> +	char *postfix = strrchr(filename, '.');
> +	int type = SOFILETYPE;
> +
> +	if (postfix == NULL)
> +		return OTHERFILETYPE;
> +	while (type < OTHERFILETYPE) {
> +		if (strcmp(postfix, file_types[type]) == 0)
> +			break;
> +		type++;
> +	}
> +	return type;
> +}
> +
> +// file count、file size、file type

It'd be better to avoid C++ comments...

> +static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
> +{
> +	struct erofs_inode vi = { .nid = nid};
> +	int err;
> +	char buf[EROFS_BLKSIZ];
> +	char filename[PATH_MAX + 1];
> +	erofs_off_t offset;
> +
> +	err = erofs_read_inode_from_disk(&vi);
> +	if (err)
> +		return err;
> +
> +	offset = 0;
> +	while (offset < vi.i_size) {
> +		erofs_off_t maxsize = min_t(erofs_off_t,
> +			vi.i_size - offset, EROFS_BLKSIZ);
> +		struct erofs_dirent *de = (void *)buf;
> +		struct erofs_dirent *end;
> +		unsigned int nameoff;
> +
> +		err = erofs_pread(&vi, buf, maxsize, offset);
> +		if (err)
> +			return err;
> +
> +		nameoff = le16_to_cpu(de->nameoff);
> +
> +		if (nameoff < sizeof(struct erofs_dirent) ||
> +		    nameoff >= PAGE_SIZE) {
> +			erofs_err("invalid de[0].nameoff %u @ nid %llu",
> +				  nameoff, nid | 0ULL);
> +			return -EFSCORRUPTED;
> +		}
> +		end = (void *)buf + nameoff;
> +		while (de < end) {
> +			const char *dname;
> +			unsigned int dname_len;
> +			struct erofs_inode inode = { .nid = de->nid };
> +			int actual_size_mark;
> +			int original_size_mark;
> +			erofs_off_t actual_size = 0;
> +			erofs_off_t original_size;
> +
> +			nameoff = le16_to_cpu(de->nameoff);
> +			dname = (char *)buf + nameoff;
> +
> +			if (de + 1 >= end)
> +				dname_len = strnlen(dname, maxsize - nameoff);
> +			else
> +				dname_len =
> +					le16_to_cpu(de[1].nameoff) - nameoff;
> +
> +			/* a corrupted entry is found */
> +			if (nameoff + dname_len > maxsize ||
> +				dname_len > EROFS_NAME_LEN) {
> +				erofs_err("bogus dirent @ nid %llu",
> +						le64_to_cpu(de->nid) | 0ULL);
> +				DBG_BUGON(1);
> +				return -EFSCORRUPTED;
> +			}
> +			if (de->nid != nid && de->nid != parent_nid)
> +				stats.files++;
> +
> +			memset(filename, 0, PATH_MAX + 1);
> +			memcpy(filename, dname, dname_len);
> +
> +			switch (de->file_type) {
> +			case EROFS_FT_UNKNOWN:
> +				break;
> +			case EROFS_FT_REG_FILE:
> +				err = erofs_read_inode_from_disk(&inode);
> +				if (err) {
> +					erofs_err("read file inode from disk failed!");
> +					return err;
> +				}
> +				original_size = inode.i_size;
> +				stats.files_total_origin_size += original_size;
> +				stats.regular_files++;
> +
> +				err = get_file_compressed_size(&inode,
> +						&actual_size);
> +				if (err) {
> +					erofs_err("get file size failed\n");
> +					return err;
> +				}
> +				stats.files_total_size += actual_size;
> +				stats.file_type_stat[get_file_type(filename)]++;
> +
> +				original_size_mark = 0;
> +				actual_size_mark = 0;
> +				actual_size >>= 10;
> +				original_size >>= 10;
> +
> +				while (actual_size || original_size) {
> +					if (actual_size) {
> +						actual_size >>= 1;
> +						actual_size_mark++;
> +					}
> +					if (original_size) {
> +						original_size >>= 1;
> +						original_size_mark++;
> +					}
> +				}
> +
> +				if (original_size_mark >= FILE_SIZE_BITS - 1)
> +					stats.file_org_size[FILE_SIZE_BITS - 1]++;
> +				else
> +					stats.file_org_size[original_size_mark]++;
> +				if (actual_size_mark >= FILE_SIZE_BITS - 1)
> +					stats.file_comp_size[FILE_SIZE_BITS - 1]++;
> +				else
> +					stats.file_comp_size[actual_size_mark]++;
> +				break;
> +
> +			case EROFS_FT_DIR:
> +				if (de->nid != nid && de->nid != parent_nid) {



> +					stats.dir_files++;
> +					stats.uncompressed_files++;
> +					err = read_dir(de->nid, nid);
> +					if (err) {
> +						fprintf(stderr,
> +								"parse dir nid %llu error occurred\n",
> +								de->nid);
> +						return err;
> +					}
> +				}
> +				break;
> +			case EROFS_FT_CHRDEV:
> +				stats.chardev_files++;
> +				stats.uncompressed_files++;

How about using an array instead?

> +				break;
> +			case EROFS_FT_BLKDEV:
> +				stats.blkdev_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			case EROFS_FT_FIFO:
> +				stats.fifo_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			case EROFS_FT_SOCK:
> +				stats.sock_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			case EROFS_FT_SYMLINK:
> +				stats.symlink_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			}
> +			++de;
> +		}
> +		offset += maxsize;
> +	}
> +	return 0;
> +}
> +
> +static void dumpfs_print_statistic_of_filetype(void)
> +{
> +	fprintf(stderr, "Filesystem total file count:         %lu\n",
> +			stats.files);
> +	fprintf(stderr, "Filesystem regular file count:       %lu\n",
> +			stats.regular_files);
> +	fprintf(stderr, "Filesystem directory count:          %lu\n",
> +			stats.dir_files);
> +	fprintf(stderr, "Filesystem symlink file count:       %lu\n",
> +			stats.symlink_files);
> +	fprintf(stderr, "Filesystem character device count:   %lu\n",
> +			stats.chardev_files);
> +	fprintf(stderr, "Filesystem block device count:       %lu\n",
> +			stats.blkdev_files);
> +	fprintf(stderr, "Filesystem FIFO file count:          %lu\n",
> +			stats.fifo_files);
> +	fprintf(stderr, "Filesystem SOCK file count:          %lu\n",
> +			stats.sock_files);

Also a loop can be used here.

> +}
> +
> +static void dumpfs_print_chart_row(char *col1, unsigned int col2,
> +		double col3, char *col4)
> +{
> +	char row[500] = {0};
> +
> +	sprintf(row, chart_format, col1, col2, col3, col4);
> +	fprintf(stderr, row);
> +}
> +
> +static void dumpfs_print_chart_of_file(unsigned int *file_counts,
> +		unsigned int len)
> +{
> +	char col1[30];
> +	unsigned int col2;
> +	double col3;
> +	char col4[400];
> +	unsigned int lowerbound = 0;
> +	unsigned int upperbound = 1;
> +
> +	fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
> +			"ratio", "distribution");
> +	for (int i = 0; i < len; i++) {
> +		memset(col1, 0, 30);

		memset(col1, 0, sizeof(col1));

> +		memset(col4, 0, 400);

		memset(col4, 0, sizeof(col4));

Thanks,
Gao Xiang

> +		if (i == len - 1)
> +			strcpy(col1, " others");
> +		else if (i <= 6)
> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
> +		else
> +
> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
> +		col2 = file_counts[i];
> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
> +		memset(col4, '#', col3 / 2);
> +		dumpfs_print_chart_row(col1, col2, col3, col4);
> +		lowerbound = upperbound;
> +		upperbound <<= 1;
> +	}
> +}
> +
> +static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
> +{
> +	char col1[30];
> +	unsigned int col2;
> +	double col3;
> +	char col4[401];
> +
> +	fprintf(stderr, header_format, "type", "count", "ratio",
> +			"distribution");
> +	for (int i = 0; i < len; i++) {
> +		memset(col1, 0, 30);
> +		memset(col4, 0, 401);
> +		sprintf(col1, "%-17s", file_types[i]);
> +		col2 = stats.file_type_stat[i];
> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
> +		memset(col4, '#', col3 / 2);
> +		dumpfs_print_chart_row(col1, col2, col3, col4);
> +	}
> +}
> +
> +static void dumpfs_print_statistic_of_compression(void)
> +{
> +	stats.compress_rate = (double)(100 * stats.files_total_size) /
> +		(double)(stats.files_total_origin_size);
> +	fprintf(stderr, "Filesystem compressed files:         %lu\n",
> +			stats.compressed_files);
> +	fprintf(stderr, "Filesystem uncompressed files:       %lu\n",
> +			stats.uncompressed_files);
> +	fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
> +			stats.files_total_origin_size);
> +	fprintf(stderr, "Filesystem total file size:          %lu Bytes\n",
> +			stats.files_total_size);
> +	fprintf(stderr, "Filesystem compress rate:            %.2f%%\n",
> +			stats.compress_rate);
> +}
> +
> +static void dumpfs_print_statistic(void)
> +{
> +	int err;
> +
> +	stats.blocks = sbi.blocks;
> +	err = read_dir(sbi.root_nid, sbi.root_nid);
> +	if (err) {
> +		erofs_err("read dir failed");
> +		return;
> +	}
> +
> +	dumpfs_print_statistic_of_filetype();
> +	dumpfs_print_statistic_of_compression();
> +
> +	fprintf(stderr, "\nOriginal file size distribution:\n");
> +	dumpfs_print_chart_of_file(stats.file_org_size, 17);
> +	fprintf(stderr, "\nOn-Disk file size distribution:\n");
> +	dumpfs_print_chart_of_file(stats.file_comp_size, 17);
> +	fprintf(stderr, "\nFile type distribution:\n");
> +	dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	int err = 0;
> @@ -138,5 +608,9 @@ int main(int argc, char **argv)
>  	if (dumpcfg.print_superblock)
>  		dumpfs_print_superblock();
>  
> +	if (dumpcfg.print_statistic)
> +		dumpfs_print_statistic();
> +
> +
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number
  2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
@ 2021-09-11 16:25   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:25 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

On Sat, Sep 11, 2021 at 09:46:34PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
>  dump/main.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 200 insertions(+), 2 deletions(-)
> 
> diff --git a/dump/main.c b/dump/main.c
> index b0acc0b..2389cef 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -19,8 +19,10 @@
>  
>  struct dumpcfg {
>  	bool print_superblock;
> +	bool print_inode;
>  	bool print_statistic;
>  	bool print_version;
> +	u64 ino;
>  };
>  static struct dumpcfg dumpcfg;
>  
> @@ -100,8 +102,9 @@ static void usage(void)
>  {
>  	fputs("usage: [options] erofs-image \n\n"
>  		"Dump erofs layout from erofs-image, and [options] are:\n"
> -		"-s          print information about superblock\n"
> -		"-S      print statistic information of the erofs-image\n"
> +		"-s         print information about superblock\n"
> +		"-S         print statistic information of the erofs-image\n"
> +		"-i #       print target # inode info\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -113,6 +116,7 @@ static void dumpfs_print_version(void)
>  static int dumpfs_parse_options_cfg(int argc, char **argv)
>  {
>  	int opt;
> +	u64 i;
>  
>  	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
>  					long_options, NULL)) != -1) {
> @@ -127,6 +131,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  		case 'V':
>  			dumpfs_print_version();
>  			exit(0);
> +		case 'i':
> +			i = atoll(optarg);
> +			dumpcfg.print_inode = true;
> +			dumpcfg.ino = i;
> +			break;
>  		case 'h':
>  		case 1:
>  		    usage();
> @@ -293,6 +302,193 @@ static void dumpfs_print_superblock(void)
>  
>  }
>  
> +static int get_path_by_nid(erofs_nid_t nid, erofs_nid_t parent_nid,
> +		erofs_nid_t target, char *path, unsigned int pos)

Can we refactor it as a transversal function (together with a function
in the previous patch)? Also, how to resolve hard links?

> +{
> +	int err;
> +	struct erofs_inode inode = {.nid = nid};
> +	erofs_off_t offset;
> +	char buf[EROFS_BLKSIZ];
> +
> +	path[pos++] = '/';
> +	if (target == sbi.root_nid)
> +		return 0;
> +
> +	err = erofs_read_inode_from_disk(&inode);
> +	if (err) {
> +		erofs_err("read inode %lu failed", nid);
> +		return err;
> +	}
> +
> +	offset = 0;
> +	while (offset < inode.i_size) {
> +		erofs_off_t maxsize = min_t(erofs_off_t,
> +					inode.i_size - offset, EROFS_BLKSIZ);
> +		struct erofs_dirent *de = (void *)buf;
> +		struct erofs_dirent *end;
> +		unsigned int nameoff;
> +
> +		err = erofs_pread(&inode, buf, maxsize, offset);
> +		if (err)
> +			return err;
> +
> +		nameoff = le16_to_cpu(de->nameoff);
> +		if (nameoff < sizeof(struct erofs_dirent) ||
> +		    nameoff >= PAGE_SIZE) {
> +			erofs_err("invalid de[0].nameoff %u @ nid %llu",
> +				  nameoff, nid | 0ULL);
> +			return -EFSCORRUPTED;
> +		}
> +
> +		end = (void *)buf + nameoff;
> +		while (de < end) {
> +			const char *dname;
> +			unsigned int dname_len;
> +
> +			nameoff = le16_to_cpu(de->nameoff);
> +			dname = (char *)buf + nameoff;
> +			if (de + 1 >= end)
> +				dname_len = strnlen(dname, maxsize - nameoff);
> +			else
> +				dname_len = le16_to_cpu(de[1].nameoff)
> +					- nameoff;
> +
> +			/* a corrupted entry is found */
> +			if (nameoff + dname_len > maxsize ||
> +			    dname_len > EROFS_NAME_LEN) {
> +				erofs_err("bogus dirent @ nid %llu",
> +						le64_to_cpu(de->nid) | 0ULL);
> +				DBG_BUGON(1);
> +				return -EFSCORRUPTED;
> +			}
> +
> +			if (de->nid == target) {
> +				memcpy(path + pos, dname, dname_len);
> +				return 0;
> +			}
> +
> +			if (de->file_type == EROFS_FT_DIR &&
> +					de->nid != parent_nid &&
> +					de->nid != nid) {
> +				memcpy(path + pos, dname, dname_len);
> +				err = get_path_by_nid(de->nid, nid,
> +						target, path, pos + dname_len);
> +				if (!err)
> +					return 0;
> +				memset(path + pos, 0, dname_len);
> +			}
> +			++de;
> +		}
> +		offset += maxsize;
> +	}
> +	return -1;
> +}
> +
> +static void dumpfs_print_inode(void)
> +{
> +	int err;
> +	erofs_off_t size;
> +	erofs_nid_t nid = dumpcfg.ino;
> +	struct erofs_inode inode = {.nid = nid};
> +	char path[PATH_MAX + 1] = {0};
> +	time_t t = inode.i_ctime;
> +
> +	err = erofs_read_inode_from_disk(&inode);
> +	if (err) {
> +		erofs_err("read inode %lu from disk failed", nid);
> +		return;
> +	}
> +
> +	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
> +	switch (inode.inode_isize) {
> +	case 32:
> +		fprintf(stderr, "	File inode is compacted layout\n");

It's enough to print "Inode core size: 32/64."

> +		break;
> +	case 64:
> +		fprintf(stderr, "	File inode is extended layout\n");
> +		break;
> +	default:
> +		erofs_err("unsupported inode layout\n");
> +	}
> +	fprintf(stderr, "	File size:		%lu\n",
> +			inode.i_size);
> +	fprintf(stderr, "	File nid:		%lu\n",
> +			inode.nid);
> +	fprintf(stderr, "	File extent size:	%u\n",
> +			inode.extent_isize);
> +	fprintf(stderr, "	File xattr size:	%u\n",
> +			inode.xattr_isize);
> +	fprintf(stderr, "	File inode size:	%u\n",
> +			inode.inode_isize);
> +	fprintf(stderr, "	File type:		");
> +	switch (inode.i_mode & S_IFMT) {
> +	case S_IFREG:
> +		fprintf(stderr, "regular\n");
> +		break;
> +	case S_IFDIR:
> +		fprintf(stderr, "directory\n");
> +		break;
> +	case S_IFLNK:
> +		fprintf(stderr, "link\n");
> +		break;
> +	case S_IFCHR:
> +		fprintf(stderr, "character device\n");
> +		break;
> +	case S_IFBLK:
> +		fprintf(stderr, "block device\n");
> +		break;
> +	case S_IFIFO:
> +		fprintf(stderr, "fifo\n");
> +		break;
> +	case S_IFSOCK:
> +		fprintf(stderr, "sock\n");
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	err = get_file_compressed_size(&inode, &size);
> +	if (err) {
> +		erofs_err("get file size failed\n");
> +		return;
> +	}
> +
> +	fprintf(stderr, "	File original size:	%lu\n"
> +			"	File on-disk size:	%lu\n",
> +			inode.i_size, size);
> +	fprintf(stderr, "	File compress rate:	%.2f%%\n",
> +			(double)(100 * size) / (double)(inode.i_size));

I think we could use "compressed blocks" instead...

> +
> +	fprintf(stderr, "	File datalayout:	");
> +	switch (inode.datalayout) {
> +	case EROFS_INODE_FLAT_PLAIN:
> +		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
> +		break;
> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> +		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION_LEGACY\n");
> +		break;
> +	case EROFS_INODE_FLAT_INLINE:
> +		fprintf(stderr, "EROFS_INODE_FLAT_INLINE\n");
> +		break;
> +	case EROFS_INODE_FLAT_COMPRESSION:
> +		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION\n");
> +		break;

Just using a number is fine, since there could be some new types in the
future (also I'd like to rename EROFS_INODE_FLAT_COMPRESSION_LEGACY later.)


> +	default:
> +		break;
> +	}
> +
> +	fprintf(stderr, "	File create time:	%s", ctime(&t));
> +	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
> +	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);

Lack of Access mode.

> +	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);

Anyway...How about just using "stat" likewise style and add more fields?

  File: erofs.rst
  Size: 14035     	Blocks: 32         IO Block: 4096   regular file
Device: 10303h/66307d	Inode: 7120988     Links: 1
Access: (0644/-rw-r--r--)  Uid: ( 1000/hsiangkao)   Gid: ( 1000/hsiangkao)
Access: 2021-09-11 00:42:02.748083341 +0800
Modify: 2021-09-03 02:54:32.188031546 +0800
Change: 2021-09-03 02:54:32.188031546 +0800
 Birth: -

Thanks,
Gao Xiang

> +
> +	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> +	if (!err)
> +		fprintf(stderr, "	File path:		%s\n", path);
> +	else
> +		fprintf(stderr, "Path not found\n");
> +}
> +
>  static int get_file_type(const char *filename)
>  {
>  	char *postfix = strrchr(filename, '.');
> @@ -611,6 +807,8 @@ int main(int argc, char **argv)
>  	if (dumpcfg.print_statistic)
>  		dumpfs_print_statistic();
>  
> +	if (dumpcfg.print_inode)
> +		dumpfs_print_inode();
>  
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk
  2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
@ 2021-09-11 16:29   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:29 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

On Sat, Sep 11, 2021 at 09:46:35PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
>  dump/main.c | 108 +++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 91 insertions(+), 17 deletions(-)
> 
> diff --git a/dump/main.c b/dump/main.c
> index 2389cef..efce309 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -21,8 +21,10 @@ struct dumpcfg {
>  	bool print_superblock;
>  	bool print_inode;
>  	bool print_statistic;
> +	bool print_inode_phy;
>  	bool print_version;
>  	u64 ino;
> +	u64 ino_phy;
>  };
>  static struct dumpcfg dumpcfg;
>  
> @@ -105,6 +107,7 @@ static void usage(void)
>  		"-s         print information about superblock\n"
>  		"-S         print statistic information of the erofs-image\n"
>  		"-i #       print target # inode info\n"
> +		"-I #       print target # inode on-disk info\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -136,6 +139,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  			dumpcfg.print_inode = true;
>  			dumpcfg.ino = i;
>  			break;
> +		case 'I':
> +			i = atoll(optarg);
> +			dumpcfg.print_inode_phy = true;
> +			dumpcfg.ino_phy = i;
> +			break;
>  		case 'h':
>  		case 1:
>  		    usage();
> @@ -402,25 +410,25 @@ static void dumpfs_print_inode(void)
>  	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
>  	switch (inode.inode_isize) {
>  	case 32:
> -		fprintf(stderr, "	File inode is compacted layout\n");
> +		fprintf(stderr, "File inode is compacted layout\n");
>  		break;
>  	case 64:
> -		fprintf(stderr, "	File inode is extended layout\n");
> +		fprintf(stderr, "File inode is extended layout\n");
>  		break;
>  	default:
>  		erofs_err("unsupported inode layout\n");
>  	}
> -	fprintf(stderr, "	File size:		%lu\n",
> +	fprintf(stderr, "File size:		%lu\n",

How about folding these in the previous patch?

>  			inode.i_size);
> -	fprintf(stderr, "	File nid:		%lu\n",
> +	fprintf(stderr, "File nid:		%lu\n",
>  			inode.nid);
> -	fprintf(stderr, "	File extent size:	%u\n",
> +	fprintf(stderr, "File extent size:	%u\n",
>  			inode.extent_isize);
> -	fprintf(stderr, "	File xattr size:	%u\n",
> +	fprintf(stderr, "File xattr size:	%u\n",
>  			inode.xattr_isize);
> -	fprintf(stderr, "	File inode size:	%u\n",
> +	fprintf(stderr, "File inode size:	%u\n",
>  			inode.inode_isize);
> -	fprintf(stderr, "	File type:		");
> +	fprintf(stderr, "File type:		");
>  	switch (inode.i_mode & S_IFMT) {
>  	case S_IFREG:
>  		fprintf(stderr, "regular\n");
> @@ -453,13 +461,13 @@ static void dumpfs_print_inode(void)
>  		return;
>  	}
>  
> -	fprintf(stderr, "	File original size:	%lu\n"
> -			"	File on-disk size:	%lu\n",
> +	fprintf(stderr, "File original size:	%lu\n"
> +			"File on-disk size:	%lu\n",
>  			inode.i_size, size);
> -	fprintf(stderr, "	File compress rate:	%.2f%%\n",
> +	fprintf(stderr, "File compress rate:	%.2f%%\n",
>  			(double)(100 * size) / (double)(inode.i_size));
>  
> -	fprintf(stderr, "	File datalayout:	");
> +	fprintf(stderr, "File datalayout:	");
>  	switch (inode.datalayout) {
>  	case EROFS_INODE_FLAT_PLAIN:
>  		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
> @@ -477,18 +485,82 @@ static void dumpfs_print_inode(void)
>  		break;
>  	}
>  
> -	fprintf(stderr, "	File create time:	%s", ctime(&t));
> -	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
> -	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);
> -	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);
> +	fprintf(stderr, "File create time:	%s", ctime(&t));
> +	fprintf(stderr, "File uid:		%u\n", inode.i_uid);
> +	fprintf(stderr, "File gid:		%u\n", inode.i_gid);
> +	fprintf(stderr, "File hard-link count:	%u\n", inode.i_nlink);
>  
>  	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
>  	if (!err)
> -		fprintf(stderr, "	File path:		%s\n", path);
> +		fprintf(stderr, "File path:		%s\n", path);
>  	else
>  		fprintf(stderr, "Path not found\n");
>  }
>  
> +static void dumpfs_print_inode_phy(void)
> +{
> +	int err;
> +	erofs_nid_t nid = dumpcfg.ino_phy;
> +	struct erofs_inode inode = {.nid = nid};
> +	char path[PATH_MAX + 1] = {0};
> +
> +	err = erofs_read_inode_from_disk(&inode);
> +	if (err < 0) {
> +		erofs_err("read inode %lu from disk failed", nid);
> +		return;
> +	}
> +
> +	const erofs_off_t ibase = iloc(inode.nid);
> +	const erofs_off_t pos = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(
> +			ibase + inode.inode_isize + inode.xattr_isize);
> +	erofs_blk_t blocks = inode.u.i_blocks;
> +	erofs_blk_t start = 0;
> +	erofs_blk_t end = 0;
> +	struct erofs_map_blocks map = {
> +		.index = UINT_MAX,
> +		.m_la = 0,
> +	};
> +
> +	fprintf(stderr, "Inode %lu on-disk info:\n", nid);
> +	switch (inode.datalayout) {
> +	case EROFS_INODE_FLAT_INLINE:
> +	case EROFS_INODE_FLAT_PLAIN:
> +		if (inode.u.i_blkaddr == NULL_ADDR)
> +			start = end = erofs_blknr(pos);
> +		else {
> +			start = inode.u.i_blkaddr;
> +			end = start + BLK_ROUND_UP(inode.i_size) - 1;
> +		}
> +		fprintf(stderr, "File size:			%lu\n",
> +				inode.i_size);
> +		fprintf(stderr,
> +				"	Plain Block Address:		%u - %u\n",
> +				start, end);
> +		break;
> +
> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> +	case EROFS_INODE_FLAT_COMPRESSION:
> +		err = z_erofs_map_blocks_iter(&inode, &map);
> +		if (err)
> +			erofs_err("get file blocks range failed");
> +
> +		start = erofs_blknr(map.m_pa);
> +		end = start - 1 + blocks;
> +		fprintf(stderr,
> +				"	Compressed Block Address:	%u - %u\n",
> +				start, end);

How about porting/using fiemap code directly instead?

Thanks,
Gao Xiang

> +		break;
> +	}
> +
> +	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> +	if (!err)
> +		fprintf(stderr, "File Path:			%s\n",
> +				path);
> +	else
> +		erofs_err("path not found");
> +}
> +
> +
>  static int get_file_type(const char *filename)
>  {
>  	char *postfix = strrchr(filename, '.');
> @@ -810,5 +882,7 @@ int main(int argc, char **argv)
>  	if (dumpcfg.print_inode)
>  		dumpfs_print_inode();
>  
> +	if (dumpcfg.print_inode_phy)
> +		dumpfs_print_inode_phy();
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-11 16:13   ` Gao Xiang
@ 2021-09-13  4:30     ` Huang Jianan via Linux-erofs
  2021-09-13 12:46       ` Gao Xiang
  0 siblings, 1 reply; 13+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-09-13  4:30 UTC (permalink / raw)
  To: xiang; +Cc: linux-erofs, mpiglet

在 2021/9/12 0:13, Gao Xiang 写道:
> (+Cc Jianan.)
>
> On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> From: mpiglet <mpiglet@outlook.com>
>>
>> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
>> Signed-off-by: mpiglet <mpiglet@outlook.com>
>> ---
>>   dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 474 insertions(+)
>>
>> diff --git a/dump/main.c b/dump/main.c
>> index 25ac89f..b0acc0b 100644
>> --- a/dump/main.c
>> +++ b/dump/main.c
>> @@ -19,10 +19,78 @@
>>   
>>   struct dumpcfg {
>>   	bool print_superblock;
>> +	bool print_statistic;
>>   	bool print_version;
>>   };
>>   static struct dumpcfg dumpcfg;
>>   
>> +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
>> +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> +static char *file_types[] = {
>> +	".so",
>> +	".png",
>> +	".jpg",
>> +	".xml",
>> +	".html",
>> +	".odex",
>> +	".vdex",
>> +	".apk",
>> +	".ttf",
>> +	".jar",
>> +	".json",
>> +	".ogg",
>> +	".oat",
>> +	".art",
>> +	".rc",
>> +	".otf",
>> +	".txt",
>> +	"others",
>> +};
>> +enum {
>> +	SOFILETYPE = 0,
>> +	PNGFILETYPE,
>> +	JPEGFILETYPE,
>> +	XMLFILETYPE,
>> +	HTMLFILETYPE,
>> +	ODEXFILETYPE,
>> +	VDEXFILETYPE,
>> +	APKFILETYPE,
>> +	TTFFILETYPE,
>> +	JARFILETYPE,
>> +	JSONFILETYPE,
>> +	OGGFILETYPE,
>> +	OATFILETYPE,
>> +	ARTFILETYPE,
>> +	RCFILETYPE,
>> +	OTFFILETYPE,
>> +	TXTFILETYPE,
>> +	OTHERFILETYPE,
>> +};
> Why we need enums here? Can these be resolved with some array index?
>
>> +
>> +#define	FILE_SIZE_BITS	30
>> +struct statistics {
>> +	unsigned long blocks;
>> +	unsigned long files;
>> +	unsigned long files_total_size;
>> +	unsigned long files_total_origin_size;
>> +	double compress_rate;
>> +	unsigned long compressed_files;
>> +	unsigned long uncompressed_files;
>> +
>> +	unsigned long regular_files;
>> +	unsigned long dir_files;
>> +	unsigned long chardev_files;
>> +	unsigned long blkdev_files;
>> +	unsigned long fifo_files;
>> +	unsigned long sock_files;
>> +	unsigned long symlink_files;
>> +
>> +	unsigned int file_type_stat[OTHERFILETYPE + 1];
>> +	unsigned int file_org_size[FILE_SIZE_BITS];
> What do "FILE_SIZE_BITS" and "file_org_size" mean?
>
>> +	unsigned int file_comp_size[FILE_SIZE_BITS];
>> +};
>> +static struct statistics stats;
>> +
>>   static struct option long_options[] = {
>>   	{"help", no_argument, 0, 1},
>>   	{0, 0, 0, 0},
>> @@ -33,6 +101,7 @@ static void usage(void)
>>   	fputs("usage: [options] erofs-image \n\n"
>>   		"Dump erofs layout from erofs-image, and [options] are:\n"
>>   		"-s          print information about superblock\n"
>> +		"-S      print statistic information of the erofs-image\n"
>>   		"-v/-V      print dump.erofs version info\n"
>>   		"-h/--help  display this help and exit\n", stderr);
>>   }
>> @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>>   		case 's':
>>   			dumpcfg.print_superblock = true;
>>   			break;
>> +		case 'S':
>> +			dumpcfg.print_statistic = true;
>> +			break;
>>   		case 'v':
>>   		case 'V':
>>   			dumpfs_print_version();
>> @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>>   	return 0;
>>   }
>>   
>> +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
>> +		erofs_off_t last_cluster_size,
>> +		erofs_off_t *last_cluster_compressed_size)
> Hmmm... do we really need the exact compressed bytes?
> or just compressed blocks is enough?
>
> "compressed blocks" can be gotten in erofs inode.
>
> Btw, although I think it's useful for fsck (check if an erofs is correct).
>
>> +{
>> +	int ret;
>> +	int decomp_len;
>> +	int compressed_len = 0;
>> +	char *decompress;
>> +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> +
>> +	ret = dev_read(raw, map->m_pa, map->m_plen);
>> +	if (ret < 0)
>> +		return -EIO;
>> +
>> +	if (erofs_sb_has_lz4_0padding()) {
>> +		compressed_len = map->m_plen;
>> +	} else {
>> +		// lz4 maximum compression ratio is 255
>> +		decompress = (char *)malloc(map->m_plen * 255);
>> +		if (!decompress) {
>> +			erofs_err("allocate memory for decompress space failed");
>> +			return -1;
>> +		}
>> +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> +				map->m_plen, last_cluster_size,
>> +				map->m_plen * 10);
>> +		if (decomp_len < 0) {
>> +			erofs_err("decompress last cluster to get decompressed size failed");
>> +			free(decompress);
>> +			return -1;
>> +		}
>> +		compressed_len = LZ4_compress_destSize(decompress, raw,
>> +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> +		if (compressed_len < 0) {
>> +			erofs_err("compress to get last extent size failed\n");
>> +			free(decompress);
>> +			return -1;
>> +		}
>> +		free(decompress);
>> +		// dut to the use of lz4hc (can use different compress level),
>> +		// our normal lz4 compress result may be bigger
>> +		compressed_len = compressed_len < map->m_plen ?
>> +			compressed_len : map->m_plen;
>> +	}
>> +
>> +	*last_cluster_compressed_size = compressed_len;
>> +	return 0;
>> +}
>> +
>> +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> +		erofs_off_t *size)
>> +{
>> +	int err;
>> +	erofs_blk_t compressedlcs;
>> +	erofs_off_t last_cluster_size;
>> +	erofs_off_t last_cluster_compressed_size;
>> +	struct erofs_map_blocks map = {
>> +		.index = UINT_MAX,
>> +		.m_la = inode->i_size - 1,
>> +	};
>> +
>> +	err = z_erofs_map_blocks_iter(inode, &map);
> (add Jianan here.)
>
> Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> some functionality to get the file distribution as well when the fs isn't
> mounted?
Hi Xiang,

I have sent the patch and verified it with a similar function. Better to 
use the
new interface here.

Thanks,
Jianan
>
>> +	if (err) {
>> +		erofs_err("read nid %ld's last block failed\n", inode->nid);
>> +		return err;
>> +	}
>> +	compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
>> +	*size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
>> +	last_cluster_size = inode->i_size - map.m_la;
>> +
>> +	if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
>> +		*size += last_cluster_size;
>> +	} else {
>> +		err = z_erofs_get_last_cluster_size_from_disk(&map,
>> +				last_cluster_size,
>> +				&last_cluster_compressed_size);
>> +		if (err) {
>> +			erofs_err("get nid %ld's last extent size failed",
>> +					inode->nid);
>> +			return err;
>> +		}
>> +		*size += last_cluster_compressed_size;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int get_file_compressed_size(struct erofs_inode *inode,
>> +		erofs_off_t *size)
> erofs_dump_get_file_occupied_blocks?
>
>> +{
>> +	int err;
>> +
>> +	*size = 0;
>> +	switch (inode->datalayout) {
>> +	case EROFS_INODE_FLAT_INLINE:
>> +	case EROFS_INODE_FLAT_PLAIN:
>> +		stats.uncompressed_files++;
>> +		*size = inode->i_size;
>> +		break;
>> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
>> +	case EROFS_INODE_FLAT_COMPRESSION:
>> +		stats.compressed_files++;
>> +		err = z_erofs_get_compressed_size(inode, size);
>> +		if (err) {
>> +			erofs_err("get compressed file size failed\n");
>> +			return err;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>>   static void dumpfs_print_superblock(void)
>>   {
>>   	time_t time = sbi.build_time;
>> @@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
>>   
>>   }
>>   
>> +static int get_file_type(const char *filename)
>> +{
>> +	char *postfix = strrchr(filename, '.');
>> +	int type = SOFILETYPE;
>> +
>> +	if (postfix == NULL)
>> +		return OTHERFILETYPE;
>> +	while (type < OTHERFILETYPE) {
>> +		if (strcmp(postfix, file_types[type]) == 0)
>> +			break;
>> +		type++;
>> +	}
>> +	return type;
>> +}
>> +
>> +// file count、file size、file type
> It'd be better to avoid C++ comments...
>
>> +static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
>> +{
>> +	struct erofs_inode vi = { .nid = nid};
>> +	int err;
>> +	char buf[EROFS_BLKSIZ];
>> +	char filename[PATH_MAX + 1];
>> +	erofs_off_t offset;
>> +
>> +	err = erofs_read_inode_from_disk(&vi);
>> +	if (err)
>> +		return err;
>> +
>> +	offset = 0;
>> +	while (offset < vi.i_size) {
>> +		erofs_off_t maxsize = min_t(erofs_off_t,
>> +			vi.i_size - offset, EROFS_BLKSIZ);
>> +		struct erofs_dirent *de = (void *)buf;
>> +		struct erofs_dirent *end;
>> +		unsigned int nameoff;
>> +
>> +		err = erofs_pread(&vi, buf, maxsize, offset);
>> +		if (err)
>> +			return err;
>> +
>> +		nameoff = le16_to_cpu(de->nameoff);
>> +
>> +		if (nameoff < sizeof(struct erofs_dirent) ||
>> +		    nameoff >= PAGE_SIZE) {
>> +			erofs_err("invalid de[0].nameoff %u @ nid %llu",
>> +				  nameoff, nid | 0ULL);
>> +			return -EFSCORRUPTED;
>> +		}
>> +		end = (void *)buf + nameoff;
>> +		while (de < end) {
>> +			const char *dname;
>> +			unsigned int dname_len;
>> +			struct erofs_inode inode = { .nid = de->nid };
>> +			int actual_size_mark;
>> +			int original_size_mark;
>> +			erofs_off_t actual_size = 0;
>> +			erofs_off_t original_size;
>> +
>> +			nameoff = le16_to_cpu(de->nameoff);
>> +			dname = (char *)buf + nameoff;
>> +
>> +			if (de + 1 >= end)
>> +				dname_len = strnlen(dname, maxsize - nameoff);
>> +			else
>> +				dname_len =
>> +					le16_to_cpu(de[1].nameoff) - nameoff;
>> +
>> +			/* a corrupted entry is found */
>> +			if (nameoff + dname_len > maxsize ||
>> +				dname_len > EROFS_NAME_LEN) {
>> +				erofs_err("bogus dirent @ nid %llu",
>> +						le64_to_cpu(de->nid) | 0ULL);
>> +				DBG_BUGON(1);
>> +				return -EFSCORRUPTED;
>> +			}
>> +			if (de->nid != nid && de->nid != parent_nid)
>> +				stats.files++;
>> +
>> +			memset(filename, 0, PATH_MAX + 1);
>> +			memcpy(filename, dname, dname_len);
>> +
>> +			switch (de->file_type) {
>> +			case EROFS_FT_UNKNOWN:
>> +				break;
>> +			case EROFS_FT_REG_FILE:
>> +				err = erofs_read_inode_from_disk(&inode);
>> +				if (err) {
>> +					erofs_err("read file inode from disk failed!");
>> +					return err;
>> +				}
>> +				original_size = inode.i_size;
>> +				stats.files_total_origin_size += original_size;
>> +				stats.regular_files++;
>> +
>> +				err = get_file_compressed_size(&inode,
>> +						&actual_size);
>> +				if (err) {
>> +					erofs_err("get file size failed\n");
>> +					return err;
>> +				}
>> +				stats.files_total_size += actual_size;
>> +				stats.file_type_stat[get_file_type(filename)]++;
>> +
>> +				original_size_mark = 0;
>> +				actual_size_mark = 0;
>> +				actual_size >>= 10;
>> +				original_size >>= 10;
>> +
>> +				while (actual_size || original_size) {
>> +					if (actual_size) {
>> +						actual_size >>= 1;
>> +						actual_size_mark++;
>> +					}
>> +					if (original_size) {
>> +						original_size >>= 1;
>> +						original_size_mark++;
>> +					}
>> +				}
>> +
>> +				if (original_size_mark >= FILE_SIZE_BITS - 1)
>> +					stats.file_org_size[FILE_SIZE_BITS - 1]++;
>> +				else
>> +					stats.file_org_size[original_size_mark]++;
>> +				if (actual_size_mark >= FILE_SIZE_BITS - 1)
>> +					stats.file_comp_size[FILE_SIZE_BITS - 1]++;
>> +				else
>> +					stats.file_comp_size[actual_size_mark]++;
>> +				break;
>> +
>> +			case EROFS_FT_DIR:
>> +				if (de->nid != nid && de->nid != parent_nid) {
>
>
>> +					stats.dir_files++;
>> +					stats.uncompressed_files++;
>> +					err = read_dir(de->nid, nid);
>> +					if (err) {
>> +						fprintf(stderr,
>> +								"parse dir nid %llu error occurred\n",
>> +								de->nid);
>> +						return err;
>> +					}
>> +				}
>> +				break;
>> +			case EROFS_FT_CHRDEV:
>> +				stats.chardev_files++;
>> +				stats.uncompressed_files++;
> How about using an array instead?
>
>> +				break;
>> +			case EROFS_FT_BLKDEV:
>> +				stats.blkdev_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			case EROFS_FT_FIFO:
>> +				stats.fifo_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			case EROFS_FT_SOCK:
>> +				stats.sock_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			case EROFS_FT_SYMLINK:
>> +				stats.symlink_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			}
>> +			++de;
>> +		}
>> +		offset += maxsize;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static void dumpfs_print_statistic_of_filetype(void)
>> +{
>> +	fprintf(stderr, "Filesystem total file count:         %lu\n",
>> +			stats.files);
>> +	fprintf(stderr, "Filesystem regular file count:       %lu\n",
>> +			stats.regular_files);
>> +	fprintf(stderr, "Filesystem directory count:          %lu\n",
>> +			stats.dir_files);
>> +	fprintf(stderr, "Filesystem symlink file count:       %lu\n",
>> +			stats.symlink_files);
>> +	fprintf(stderr, "Filesystem character device count:   %lu\n",
>> +			stats.chardev_files);
>> +	fprintf(stderr, "Filesystem block device count:       %lu\n",
>> +			stats.blkdev_files);
>> +	fprintf(stderr, "Filesystem FIFO file count:          %lu\n",
>> +			stats.fifo_files);
>> +	fprintf(stderr, "Filesystem SOCK file count:          %lu\n",
>> +			stats.sock_files);
> Also a loop can be used here.
>
>> +}
>> +
>> +static void dumpfs_print_chart_row(char *col1, unsigned int col2,
>> +		double col3, char *col4)
>> +{
>> +	char row[500] = {0};
>> +
>> +	sprintf(row, chart_format, col1, col2, col3, col4);
>> +	fprintf(stderr, row);
>> +}
>> +
>> +static void dumpfs_print_chart_of_file(unsigned int *file_counts,
>> +		unsigned int len)
>> +{
>> +	char col1[30];
>> +	unsigned int col2;
>> +	double col3;
>> +	char col4[400];
>> +	unsigned int lowerbound = 0;
>> +	unsigned int upperbound = 1;
>> +
>> +	fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
>> +			"ratio", "distribution");
>> +	for (int i = 0; i < len; i++) {
>> +		memset(col1, 0, 30);
> 		memset(col1, 0, sizeof(col1));
>
>> +		memset(col4, 0, 400);
> 		memset(col4, 0, sizeof(col4));
>
> Thanks,
> Gao Xiang
>
>> +		if (i == len - 1)
>> +			strcpy(col1, " others");
>> +		else if (i <= 6)
>> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
>> +		else
>> +
>> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
>> +		col2 = file_counts[i];
>> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
>> +		memset(col4, '#', col3 / 2);
>> +		dumpfs_print_chart_row(col1, col2, col3, col4);
>> +		lowerbound = upperbound;
>> +		upperbound <<= 1;
>> +	}
>> +}
>> +
>> +static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
>> +{
>> +	char col1[30];
>> +	unsigned int col2;
>> +	double col3;
>> +	char col4[401];
>> +
>> +	fprintf(stderr, header_format, "type", "count", "ratio",
>> +			"distribution");
>> +	for (int i = 0; i < len; i++) {
>> +		memset(col1, 0, 30);
>> +		memset(col4, 0, 401);
>> +		sprintf(col1, "%-17s", file_types[i]);
>> +		col2 = stats.file_type_stat[i];
>> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
>> +		memset(col4, '#', col3 / 2);
>> +		dumpfs_print_chart_row(col1, col2, col3, col4);
>> +	}
>> +}
>> +
>> +static void dumpfs_print_statistic_of_compression(void)
>> +{
>> +	stats.compress_rate = (double)(100 * stats.files_total_size) /
>> +		(double)(stats.files_total_origin_size);
>> +	fprintf(stderr, "Filesystem compressed files:         %lu\n",
>> +			stats.compressed_files);
>> +	fprintf(stderr, "Filesystem uncompressed files:       %lu\n",
>> +			stats.uncompressed_files);
>> +	fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
>> +			stats.files_total_origin_size);
>> +	fprintf(stderr, "Filesystem total file size:          %lu Bytes\n",
>> +			stats.files_total_size);
>> +	fprintf(stderr, "Filesystem compress rate:            %.2f%%\n",
>> +			stats.compress_rate);
>> +}
>> +
>> +static void dumpfs_print_statistic(void)
>> +{
>> +	int err;
>> +
>> +	stats.blocks = sbi.blocks;
>> +	err = read_dir(sbi.root_nid, sbi.root_nid);
>> +	if (err) {
>> +		erofs_err("read dir failed");
>> +		return;
>> +	}
>> +
>> +	dumpfs_print_statistic_of_filetype();
>> +	dumpfs_print_statistic_of_compression();
>> +
>> +	fprintf(stderr, "\nOriginal file size distribution:\n");
>> +	dumpfs_print_chart_of_file(stats.file_org_size, 17);
>> +	fprintf(stderr, "\nOn-Disk file size distribution:\n");
>> +	dumpfs_print_chart_of_file(stats.file_comp_size, 17);
>> +	fprintf(stderr, "\nFile type distribution:\n");
>> +	dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
>> +}
>> +
>>   int main(int argc, char **argv)
>>   {
>>   	int err = 0;
>> @@ -138,5 +608,9 @@ int main(int argc, char **argv)
>>   	if (dumpcfg.print_superblock)
>>   		dumpfs_print_superblock();
>>   
>> +	if (dumpcfg.print_statistic)
>> +		dumpfs_print_statistic();
>> +
>> +
>>   	return 0;
>>   }
>> -- 
>> 2.25.4
>>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-13  4:30     ` Huang Jianan via Linux-erofs
@ 2021-09-13 12:46       ` Gao Xiang
  2021-09-14  2:31         ` Guo Xuenan
  0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-09-13 12:46 UTC (permalink / raw)
  To: Huang Jianan; +Cc: linux-erofs, mpiglet

On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
> 在 2021/9/12 0:13, Gao Xiang 写道:
> > (+Cc Jianan.)
> > 
> > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> > > From: mpiglet <mpiglet@outlook.com>
> > > 
> > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
> > > ---
> > >   dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >   1 file changed, 474 insertions(+)
> > > 
> > > diff --git a/dump/main.c b/dump/main.c
> > > index 25ac89f..b0acc0b 100644
> > > --- a/dump/main.c
> > > +++ b/dump/main.c
> > > @@ -19,10 +19,78 @@
> > >   struct dumpcfg {
> > >   	bool print_superblock;
> > > +	bool print_statistic;
> > >   	bool print_version;
> > >   };
> > >   static struct dumpcfg dumpcfg;
> > > +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> > > +static char *file_types[] = {
> > > +	".so",
> > > +	".png",
> > > +	".jpg",
> > > +	".xml",
> > > +	".html",
> > > +	".odex",
> > > +	".vdex",
> > > +	".apk",
> > > +	".ttf",
> > > +	".jar",
> > > +	".json",
> > > +	".ogg",
> > > +	".oat",
> > > +	".art",
> > > +	".rc",
> > > +	".otf",
> > > +	".txt",
> > > +	"others",
> > > +};
> > > +enum {
> > > +	SOFILETYPE = 0,
> > > +	PNGFILETYPE,
> > > +	JPEGFILETYPE,
> > > +	XMLFILETYPE,
> > > +	HTMLFILETYPE,
> > > +	ODEXFILETYPE,
> > > +	VDEXFILETYPE,
> > > +	APKFILETYPE,
> > > +	TTFFILETYPE,
> > > +	JARFILETYPE,
> > > +	JSONFILETYPE,
> > > +	OGGFILETYPE,
> > > +	OATFILETYPE,
> > > +	ARTFILETYPE,
> > > +	RCFILETYPE,
> > > +	OTFFILETYPE,
> > > +	TXTFILETYPE,
> > > +	OTHERFILETYPE,
> > > +};
> > Why we need enums here? Can these be resolved with some array index?
> > 
> > > +
> > > +#define	FILE_SIZE_BITS	30
> > > +struct statistics {
> > > +	unsigned long blocks;
> > > +	unsigned long files;
> > > +	unsigned long files_total_size;
> > > +	unsigned long files_total_origin_size;
> > > +	double compress_rate;
> > > +	unsigned long compressed_files;
> > > +	unsigned long uncompressed_files;
> > > +
> > > +	unsigned long regular_files;
> > > +	unsigned long dir_files;
> > > +	unsigned long chardev_files;
> > > +	unsigned long blkdev_files;
> > > +	unsigned long fifo_files;
> > > +	unsigned long sock_files;
> > > +	unsigned long symlink_files;
> > > +
> > > +	unsigned int file_type_stat[OTHERFILETYPE + 1];
> > > +	unsigned int file_org_size[FILE_SIZE_BITS];
> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
> > 
> > > +	unsigned int file_comp_size[FILE_SIZE_BITS];
> > > +};
> > > +static struct statistics stats;
> > > +
> > >   static struct option long_options[] = {
> > >   	{"help", no_argument, 0, 1},
> > >   	{0, 0, 0, 0},
> > > @@ -33,6 +101,7 @@ static void usage(void)
> > >   	fputs("usage: [options] erofs-image \n\n"
> > >   		"Dump erofs layout from erofs-image, and [options] are:\n"
> > >   		"-s          print information about superblock\n"
> > > +		"-S      print statistic information of the erofs-image\n"
> > >   		"-v/-V      print dump.erofs version info\n"
> > >   		"-h/--help  display this help and exit\n", stderr);
> > >   }
> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > >   		case 's':
> > >   			dumpcfg.print_superblock = true;
> > >   			break;
> > > +		case 'S':
> > > +			dumpcfg.print_statistic = true;
> > > +			break;
> > >   		case 'v':
> > >   		case 'V':
> > >   			dumpfs_print_version();
> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > >   	return 0;
> > >   }
> > > +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> > > +		erofs_off_t last_cluster_size,
> > > +		erofs_off_t *last_cluster_compressed_size)
> > Hmmm... do we really need the exact compressed bytes?
> > or just compressed blocks is enough?
> > 
> > "compressed blocks" can be gotten in erofs inode.
> > 
> > Btw, although I think it's useful for fsck (check if an erofs is correct).
> > 
> > > +{
> > > +	int ret;
> > > +	int decomp_len;
> > > +	int compressed_len = 0;
> > > +	char *decompress;
> > > +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> > > +
> > > +	ret = dev_read(raw, map->m_pa, map->m_plen);
> > > +	if (ret < 0)
> > > +		return -EIO;
> > > +
> > > +	if (erofs_sb_has_lz4_0padding()) {
> > > +		compressed_len = map->m_plen;
> > > +	} else {
> > > +		// lz4 maximum compression ratio is 255
> > > +		decompress = (char *)malloc(map->m_plen * 255);
> > > +		if (!decompress) {
> > > +			erofs_err("allocate memory for decompress space failed");
> > > +			return -1;
> > > +		}
> > > +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> > > +				map->m_plen, last_cluster_size,
> > > +				map->m_plen * 10);
> > > +		if (decomp_len < 0) {
> > > +			erofs_err("decompress last cluster to get decompressed size failed");
> > > +			free(decompress);
> > > +			return -1;
> > > +		}
> > > +		compressed_len = LZ4_compress_destSize(decompress, raw,
> > > +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> > > +		if (compressed_len < 0) {
> > > +			erofs_err("compress to get last extent size failed\n");
> > > +			free(decompress);
> > > +			return -1;
> > > +		}
> > > +		free(decompress);
> > > +		// dut to the use of lz4hc (can use different compress level),
> > > +		// our normal lz4 compress result may be bigger
> > > +		compressed_len = compressed_len < map->m_plen ?
> > > +			compressed_len : map->m_plen;
> > > +	}
> > > +
> > > +	*last_cluster_compressed_size = compressed_len;
> > > +	return 0;
> > > +}
> > > +
> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> > > +		erofs_off_t *size)
> > > +{
> > > +	int err;
> > > +	erofs_blk_t compressedlcs;
> > > +	erofs_off_t last_cluster_size;
> > > +	erofs_off_t last_cluster_compressed_size;
> > > +	struct erofs_map_blocks map = {
> > > +		.index = UINT_MAX,
> > > +		.m_la = inode->i_size - 1,
> > > +	};
> > > +
> > > +	err = z_erofs_map_blocks_iter(inode, &map);
> > (add Jianan here.)
> > 
> > Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> > some functionality to get the file distribution as well when the fs isn't
> > mounted?
> Hi Xiang,
> 
> I have sent the patch and verified it with a similar function. Better to use
> the
> new interface here.

Yeah, thanks for the patch:
https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/

Hopefully Xuenan could base on this work.

Thanks,
Gao XIang

> 
> Thanks,
> Jianan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-13 12:46       ` Gao Xiang
@ 2021-09-14  2:31         ` Guo Xuenan
  0 siblings, 0 replies; 13+ messages in thread
From: Guo Xuenan @ 2021-09-14  2:31 UTC (permalink / raw)
  To: Gao Xiang, Huang Jianan; +Cc: linux-erofs, mpiglet

OK,I will  send out the patch V2  today, and it will  base on jianan's 
work.

在 2021/9/13 20:46, Gao Xiang 写道:
> On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
>> 在 2021/9/12 0:13, Gao Xiang 写道:
>> > (+Cc Jianan.)
>> > > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> > > From: mpiglet <mpiglet@outlook.com>
>> > > > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
>> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
>> > > ---
>> > >   dump/main.c | 474 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> > >   1 file changed, 474 insertions(+)
>> > > > > diff --git a/dump/main.c b/dump/main.c
>> > > index 25ac89f..b0acc0b 100644
>> > > --- a/dump/main.c
>> > > +++ b/dump/main.c
>> > > @@ -19,10 +19,78 @@
>> > >   struct dumpcfg {
>> > >       bool print_superblock;
>> > > +    bool print_statistic;
>> > >       bool print_version;
>> > >   };
>> > >   static struct dumpcfg dumpcfg;
>> > > +static const char chart_format[] = "%-16s    %-11d %8.2f%% 
>> |%-50s|\n";
>> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> > > +static char *file_types[] = {
>> > > +    ".so",
>> > > +    ".png",
>> > > +    ".jpg",
>> > > +    ".xml",
>> > > +    ".html",
>> > > +    ".odex",
>> > > +    ".vdex",
>> > > +    ".apk",
>> > > +    ".ttf",
>> > > +    ".jar",
>> > > +    ".json",
>> > > +    ".ogg",
>> > > +    ".oat",
>> > > +    ".art",
>> > > +    ".rc",
>> > > +    ".otf",
>> > > +    ".txt",
>> > > +    "others",
>> > > +};
>> > > +enum {
>> > > +    SOFILETYPE = 0,
>> > > +    PNGFILETYPE,
>> > > +    JPEGFILETYPE,
>> > > +    XMLFILETYPE,
>> > > +    HTMLFILETYPE,
>> > > +    ODEXFILETYPE,
>> > > +    VDEXFILETYPE,
>> > > +    APKFILETYPE,
>> > > +    TTFFILETYPE,
>> > > +    JARFILETYPE,
>> > > +    JSONFILETYPE,
>> > > +    OGGFILETYPE,
>> > > +    OATFILETYPE,
>> > > +    ARTFILETYPE,
>> > > +    RCFILETYPE,
>> > > +    OTFFILETYPE,
>> > > +    TXTFILETYPE,
>> > > +    OTHERFILETYPE,
>> > > +};
>> > Why we need enums here? Can these be resolved with some array index?
>> > > > +
>> > > +#define    FILE_SIZE_BITS    30
>> > > +struct statistics {
>> > > +    unsigned long blocks;
>> > > +    unsigned long files;
>> > > +    unsigned long files_total_size;
>> > > +    unsigned long files_total_origin_size;
>> > > +    double compress_rate;
>> > > +    unsigned long compressed_files;
>> > > +    unsigned long uncompressed_files;
>> > > +
>> > > +    unsigned long regular_files;
>> > > +    unsigned long dir_files;
>> > > +    unsigned long chardev_files;
>> > > +    unsigned long blkdev_files;
>> > > +    unsigned long fifo_files;
>> > > +    unsigned long sock_files;
>> > > +    unsigned long symlink_files;
>> > > +
>> > > +    unsigned int file_type_stat[OTHERFILETYPE + 1];
>> > > +    unsigned int file_org_size[FILE_SIZE_BITS];
>> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
>> > > > +    unsigned int file_comp_size[FILE_SIZE_BITS];
>> > > +};
>> > > +static struct statistics stats;
>> > > +
>> > >   static struct option long_options[] = {
>> > >       {"help", no_argument, 0, 1},
>> > >       {0, 0, 0, 0},
>> > > @@ -33,6 +101,7 @@ static void usage(void)
>> > >       fputs("usage: [options] erofs-image \n\n"
>> > >           "Dump erofs layout from erofs-image, and [options] are:\n"
>> > >           "-s          print information about superblock\n"
>> > > +        "-S      print statistic information of the erofs-image\n"
>> > >           "-v/-V      print dump.erofs version info\n"
>> > >           "-h/--help  display this help and exit\n", stderr);
>> > >   }
>> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, 
>> char **argv)
>> > >           case 's':
>> > >               dumpcfg.print_superblock = true;
>> > >               break;
>> > > +        case 'S':
>> > > +            dumpcfg.print_statistic = true;
>> > > +            break;
>> > >           case 'v':
>> > >           case 'V':
>> > >               dumpfs_print_version();
>> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int 
>> argc, char **argv)
>> > >       return 0;
>> > >   }
>> > > +static int z_erofs_get_last_cluster_size_from_disk(struct 
>> erofs_map_blocks *map,
>> > > +        erofs_off_t last_cluster_size,
>> > > +        erofs_off_t *last_cluster_compressed_size)
>> > Hmmm... do we really need the exact compressed bytes?
>> > or just compressed blocks is enough?
>> > > "compressed blocks" can be gotten in erofs inode.
>> > > Btw, although I think it's useful for fsck (check if an erofs is 
>> correct).
>> > > > +{
>> > > +    int ret;
>> > > +    int decomp_len;
>> > > +    int compressed_len = 0;
>> > > +    char *decompress;
>> > > +    char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> > > +
>> > > +    ret = dev_read(raw, map->m_pa, map->m_plen);
>> > > +    if (ret < 0)
>> > > +        return -EIO;
>> > > +
>> > > +    if (erofs_sb_has_lz4_0padding()) {
>> > > +        compressed_len = map->m_plen;
>> > > +    } else {
>> > > +        // lz4 maximum compression ratio is 255
>> > > +        decompress = (char *)malloc(map->m_plen * 255);
>> > > +        if (!decompress) {
>> > > +            erofs_err("allocate memory for decompress space 
>> failed");
>> > > +            return -1;
>> > > +        }
>> > > +        decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> > > +                map->m_plen, last_cluster_size,
>> > > +                map->m_plen * 10);
>> > > +        if (decomp_len < 0) {
>> > > +            erofs_err("decompress last cluster to get 
>> decompressed size failed");
>> > > +            free(decompress);
>> > > +            return -1;
>> > > +        }
>> > > +        compressed_len = LZ4_compress_destSize(decompress, raw,
>> > > +                &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> > > +        if (compressed_len < 0) {
>> > > +            erofs_err("compress to get last extent size failed\n");
>> > > +            free(decompress);
>> > > +            return -1;
>> > > +        }
>> > > +        free(decompress);
>> > > +        // dut to the use of lz4hc (can use different compress 
>> level),
>> > > +        // our normal lz4 compress result may be bigger
>> > > +        compressed_len = compressed_len < map->m_plen ?
>> > > +            compressed_len : map->m_plen;
>> > > +    }
>> > > +
>> > > +    *last_cluster_compressed_size = compressed_len;
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> > > +        erofs_off_t *size)
>> > > +{
>> > > +    int err;
>> > > +    erofs_blk_t compressedlcs;
>> > > +    erofs_off_t last_cluster_size;
>> > > +    erofs_off_t last_cluster_compressed_size;
>> > > +    struct erofs_map_blocks map = {
>> > > +        .index = UINT_MAX,
>> > > +        .m_la = inode->i_size - 1,
>> > > +    };
>> > > +
>> > > +    err = z_erofs_map_blocks_iter(inode, &map);
>> > (add Jianan here.)
>> > > Can we port the latest erofs kernel fiemap code to erofs-utils, 
>> and add
>> > some functionality to get the file distribution as well when the fs 
>> isn't
>> > mounted?
>> Hi Xiang,
>>
>> I have sent the patch and verified it with a similar function. Better 
>> to use
>> the
>> new interface here.
>
> Yeah, thanks for the patch:
> https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/ 
>
>
> Hopefully Xuenan could base on this work.
>
> Thanks,
> Gao XIang
>
>>
>> Thanks,
>> Jianan

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-09-14  2:32 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
2021-09-11 15:58   ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
2021-09-11 16:13   ` Gao Xiang
2021-09-13  4:30     ` Huang Jianan via Linux-erofs
2021-09-13 12:46       ` Gao Xiang
2021-09-14  2:31         ` Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
2021-09-11 16:25   ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
2021-09-11 16:29   ` Gao Xiang
2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).