All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils
@ 2021-09-11 13:46 Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

From: mpiglet <mpiglet@outlook.com>

Add dump-tool for erofs to facilitate users directly
analyzing the erofs image file.

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 Makefile.am        |  2 +-
 configure.ac       |  2 ++
 dump/Makefile.am   | 10 ++++++
 dump/main.c        | 84 ++++++++++++++++++++++++++++++++++++++++++++++
 include/erofs/io.h |  3 ++
 lib/namei.c        |  4 +--
 6 files changed, 102 insertions(+), 3 deletions(-)
 create mode 100644 dump/Makefile.am
 create mode 100644 dump/main.c

diff --git a/Makefile.am b/Makefile.am
index b804aa9..fedf7b5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -3,7 +3,7 @@
 
 ACLOCAL_AMFLAGS = -I m4
 
-SUBDIRS = man lib mkfs
+SUBDIRS = man lib mkfs dump
 if ENABLE_FUSE
 SUBDIRS += fuse
 endif
diff --git a/configure.ac b/configure.ac
index f626064..f4fe548 100644
--- a/configure.ac
+++ b/configure.ac
@@ -280,6 +280,8 @@ AC_CONFIG_FILES([Makefile
 		 man/Makefile
 		 lib/Makefile
 		 mkfs/Makefile
+		 dump/Makefile
 		 fuse/Makefile])
+
 AC_OUTPUT
 
diff --git a/dump/Makefile.am b/dump/Makefile.am
new file mode 100644
index 0000000..e664799
--- /dev/null
+++ b/dump/Makefile.am
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0+
+# Makefile.am
+
+AUTOMAKE_OPTIONS = foreign
+bin_PROGRAMS     = dump.erofs
+AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}
+dump_erofs_SOURCES = main.c
+dump_erofs_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
+dump_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} ${liblz4_LIBS}
+
diff --git a/dump/main.c b/dump/main.c
new file mode 100644
index 0000000..8fbc24a
--- /dev/null
+++ b/dump/main.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * dump/main.c
+ *
+ * Copyright (C) 2021-2022 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Wang Qi <mpiglet@outlook.com>
+ *            Guo Xuenan <guoxuenan@huawei.com>
+ */
+
+#include <stdlib.h>
+#include <getopt.h>
+#include <sys/sysmacros.h>
+#include <time.h>
+#include <lz4.h>
+
+#include "erofs/print.h"
+#include "erofs/io.h"
+
+static struct option long_options[] = {
+	{"help", no_argument, 0, 1},
+	{0, 0, 0, 0},
+};
+
+static void usage(void)
+{
+	fputs("usage: [options] erofs-image \n\n"
+		"Dump erofs layout from erofs-image, and [options] are:\n"
+		"-v/-V      print dump.erofs version info\n"
+		"-h/--help  display this help and exit\n", stderr);
+}
+static void dumpfs_print_version(void)
+{
+	fprintf(stderr, "dump.erofs %s\n", cfg.c_version);
+}
+
+static int dumpfs_parse_options_cfg(int argc, char **argv)
+{
+	int opt;
+
+	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
+					long_options, NULL)) != -1) {
+		switch (opt) {
+		case 'v':
+		case 'V':
+			dumpfs_print_version();
+			exit(0);
+		case 'h':
+		case 1:
+		    usage();
+		    exit(0);
+		default: /* '?' */
+			return -EINVAL;
+		}
+	}
+
+	if (optind >= argc)
+		return -EINVAL;
+
+	cfg.c_img_path = strdup(argv[optind++]);
+	if (!cfg.c_img_path)
+		return -ENOMEM;
+
+	if (optind < argc) {
+		erofs_err("unexpected argument: %s\n", argv[optind]);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int err = 0;
+
+	erofs_init_configure();
+	err = dumpfs_parse_options_cfg(argc, argv);
+	if (err) {
+		if (err == -EINVAL)
+			usage();
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 5574245..00e5de8 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -10,6 +10,7 @@
 #define __EROFS_IO_H
 
 #include <unistd.h>
+#include <sys/types.h>
 #include "internal.h"
 
 #ifndef O_BINARY
@@ -25,6 +26,8 @@ int dev_fillzero(u64 offset, size_t len, bool padding);
 int dev_fsync(void);
 int dev_resize(erofs_blk_t nblocks);
 u64 dev_length(void);
+dev_t erofs_new_decode_dev(u32 dev);
+int erofs_read_inode_from_disk(struct erofs_inode *vi);
 
 static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
 			    u32 nblocks)
diff --git a/lib/namei.c b/lib/namei.c
index 4e06ba4..21631f1 100644
--- a/lib/namei.c
+++ b/lib/namei.c
@@ -15,7 +15,7 @@
 #include "erofs/print.h"
 #include "erofs/io.h"
 
-static dev_t erofs_new_decode_dev(u32 dev)
+dev_t erofs_new_decode_dev(u32 dev)
 {
 	const unsigned int major = (dev & 0xfff00) >> 8;
 	const unsigned int minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
@@ -23,7 +23,7 @@ static dev_t erofs_new_decode_dev(u32 dev)
 	return makedev(major, minor);
 }
 
-static int erofs_read_inode_from_disk(struct erofs_inode *vi)
+int erofs_read_inode_from_disk(struct erofs_inode *vi)
 {
 	int ret, ifmt;
 	char buf[sizeof(struct erofs_inode_extended)];
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 15:58   ` Gao Xiang
  2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/dump/main.c b/dump/main.c
index 8fbc24a..25ac89f 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -17,6 +17,12 @@
 #include "erofs/print.h"
 #include "erofs/io.h"
 
+struct dumpcfg {
+	bool print_superblock;
+	bool print_version;
+};
+static struct dumpcfg dumpcfg;
+
 static struct option long_options[] = {
 	{"help", no_argument, 0, 1},
 	{0, 0, 0, 0},
@@ -26,6 +32,7 @@ static void usage(void)
 {
 	fputs("usage: [options] erofs-image \n\n"
 		"Dump erofs layout from erofs-image, and [options] are:\n"
+		"-s          print information about superblock\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -41,6 +48,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
 					long_options, NULL)) != -1) {
 		switch (opt) {
+		case 's':
+			dumpcfg.print_superblock = true;
+			break;
 		case 'v':
 		case 'V':
 			dumpfs_print_version();
@@ -68,6 +78,39 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 	return 0;
 }
 
+static void dumpfs_print_superblock(void)
+{
+	time_t time = sbi.build_time;
+
+	fprintf(stderr, "Filesystem magic number:	0x%04X\n", EROFS_SUPER_MAGIC_V1);
+	fprintf(stderr, "Filesystem blocks: 		%lu\n", sbi.blocks);
+	fprintf(stderr, "Filesystem meta block:		%u\n", sbi.meta_blkaddr);
+	fprintf(stderr, "Filesystem xattr block:	%u\n", sbi.xattr_blkaddr);
+	fprintf(stderr, "Filesystem root nid:		%ld\n", sbi.root_nid);
+	fprintf(stderr, "Filesystem valid inos:		%lu\n", sbi.inos);
+	fprintf(stderr, "Filesystem created:		%s", ctime(&time));
+	fprintf(stderr, "Filesystem uuid:		");
+	for (int i = 0; i < 16; i++)
+		fprintf(stderr, "%02x", sbi.uuid[i]);
+	fprintf(stderr, "\n");
+
+	if (erofs_sb_has_lz4_0padding())
+		fprintf(stderr, "Filesystem support lz4 0padding\n");
+	else
+		fprintf(stderr, "Filesystem not support lz4 0padding\n");
+
+	if (erofs_sb_has_big_pcluster())
+		fprintf(stderr, "Filesystem support big pcluster\n");
+	else
+		fprintf(stderr, "Filesystem not support big pcluster\n");
+
+	if (erofs_sb_has_sb_chksum())
+		fprintf(stderr, "Filesystem has super block checksum feature\n");
+	else
+		fprintf(stderr, "Filesystem has no superblock checksum feature\n");
+
+}
+
 int main(int argc, char **argv)
 {
 	int err = 0;
@@ -80,5 +123,20 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
+	err = dev_open_ro(cfg.c_img_path);
+	if (err) {
+		erofs_err("open image file failed");
+		return -1;
+	}
+
+	err = erofs_read_superblock();
+	if (err) {
+		erofs_err("read superblock failed");
+		return -1;
+	}
+
+	if (dumpcfg.print_superblock)
+		dumpfs_print_superblock();
+
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 16:13   ` Gao Xiang
  2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="y", Size: 13813 bytes --]

From: mpiglet <mpiglet@outlook.com>

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 474 insertions(+)

diff --git a/dump/main.c b/dump/main.c
index 25ac89f..b0acc0b 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -19,10 +19,78 @@
 
 struct dumpcfg {
 	bool print_superblock;
+	bool print_statistic;
 	bool print_version;
 };
 static struct dumpcfg dumpcfg;
 
+static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
+static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
+static char *file_types[] = {
+	".so",
+	".png",
+	".jpg",
+	".xml",
+	".html",
+	".odex",
+	".vdex",
+	".apk",
+	".ttf",
+	".jar",
+	".json",
+	".ogg",
+	".oat",
+	".art",
+	".rc",
+	".otf",
+	".txt",
+	"others",
+};
+enum {
+	SOFILETYPE = 0,
+	PNGFILETYPE,
+	JPEGFILETYPE,
+	XMLFILETYPE,
+	HTMLFILETYPE,
+	ODEXFILETYPE,
+	VDEXFILETYPE,
+	APKFILETYPE,
+	TTFFILETYPE,
+	JARFILETYPE,
+	JSONFILETYPE,
+	OGGFILETYPE,
+	OATFILETYPE,
+	ARTFILETYPE,
+	RCFILETYPE,
+	OTFFILETYPE,
+	TXTFILETYPE,
+	OTHERFILETYPE,
+};
+
+#define	FILE_SIZE_BITS	30
+struct statistics {
+	unsigned long blocks;
+	unsigned long files;
+	unsigned long files_total_size;
+	unsigned long files_total_origin_size;
+	double compress_rate;
+	unsigned long compressed_files;
+	unsigned long uncompressed_files;
+
+	unsigned long regular_files;
+	unsigned long dir_files;
+	unsigned long chardev_files;
+	unsigned long blkdev_files;
+	unsigned long fifo_files;
+	unsigned long sock_files;
+	unsigned long symlink_files;
+
+	unsigned int file_type_stat[OTHERFILETYPE + 1];
+	unsigned int file_org_size[FILE_SIZE_BITS];
+	unsigned int file_comp_size[FILE_SIZE_BITS];
+};
+static struct statistics stats;
+
 static struct option long_options[] = {
 	{"help", no_argument, 0, 1},
 	{0, 0, 0, 0},
@@ -33,6 +101,7 @@ static void usage(void)
 	fputs("usage: [options] erofs-image \n\n"
 		"Dump erofs layout from erofs-image, and [options] are:\n"
 		"-s          print information about superblock\n"
+		"-S      print statistic information of the erofs-image\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 		case 's':
 			dumpcfg.print_superblock = true;
 			break;
+		case 'S':
+			dumpcfg.print_statistic = true;
+			break;
 		case 'v':
 		case 'V':
 			dumpfs_print_version();
@@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 	return 0;
 }
 
+static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
+		erofs_off_t last_cluster_size,
+		erofs_off_t *last_cluster_compressed_size)
+{
+	int ret;
+	int decomp_len;
+	int compressed_len = 0;
+	char *decompress;
+	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
+
+	ret = dev_read(raw, map->m_pa, map->m_plen);
+	if (ret < 0)
+		return -EIO;
+
+	if (erofs_sb_has_lz4_0padding()) {
+		compressed_len = map->m_plen;
+	} else {
+		// lz4 maximum compression ratio is 255
+		decompress = (char *)malloc(map->m_plen * 255);
+		if (!decompress) {
+			erofs_err("allocate memory for decompress space failed");
+			return -1;
+		}
+		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
+				map->m_plen, last_cluster_size,
+				map->m_plen * 10);
+		if (decomp_len < 0) {
+			erofs_err("decompress last cluster to get decompressed size failed");
+			free(decompress);
+			return -1;
+		}
+		compressed_len = LZ4_compress_destSize(decompress, raw,
+				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
+		if (compressed_len < 0) {
+			erofs_err("compress to get last extent size failed\n");
+			free(decompress);
+			return -1;
+		}
+		free(decompress);
+		// dut to the use of lz4hc (can use different compress level),
+		// our normal lz4 compress result may be bigger
+		compressed_len = compressed_len < map->m_plen ?
+			compressed_len : map->m_plen;
+	}
+
+	*last_cluster_compressed_size = compressed_len;
+	return 0;
+}
+
+static int z_erofs_get_compressed_size(struct erofs_inode *inode,
+		erofs_off_t *size)
+{
+	int err;
+	erofs_blk_t compressedlcs;
+	erofs_off_t last_cluster_size;
+	erofs_off_t last_cluster_compressed_size;
+	struct erofs_map_blocks map = {
+		.index = UINT_MAX,
+		.m_la = inode->i_size - 1,
+	};
+
+	err = z_erofs_map_blocks_iter(inode, &map);
+	if (err) {
+		erofs_err("read nid %ld's last block failed\n", inode->nid);
+		return err;
+	}
+	compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
+	*size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
+	last_cluster_size = inode->i_size - map.m_la;
+
+	if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
+		*size += last_cluster_size;
+	} else {
+		err = z_erofs_get_last_cluster_size_from_disk(&map,
+				last_cluster_size,
+				&last_cluster_compressed_size);
+		if (err) {
+			erofs_err("get nid %ld's last extent size failed",
+					inode->nid);
+			return err;
+		}
+		*size += last_cluster_compressed_size;
+	}
+	return 0;
+}
+
+static int get_file_compressed_size(struct erofs_inode *inode,
+		erofs_off_t *size)
+{
+	int err;
+
+	*size = 0;
+	switch (inode->datalayout) {
+	case EROFS_INODE_FLAT_INLINE:
+	case EROFS_INODE_FLAT_PLAIN:
+		stats.uncompressed_files++;
+		*size = inode->i_size;
+		break;
+	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+	case EROFS_INODE_FLAT_COMPRESSION:
+		stats.compressed_files++;
+		err = z_erofs_get_compressed_size(inode, size);
+		if (err) {
+			erofs_err("get compressed file size failed\n");
+			return err;
+		}
+	}
+	return 0;
+}
+
 static void dumpfs_print_superblock(void)
 {
 	time_t time = sbi.build_time;
@@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
 
 }
 
+static int get_file_type(const char *filename)
+{
+	char *postfix = strrchr(filename, '.');
+	int type = SOFILETYPE;
+
+	if (postfix == NULL)
+		return OTHERFILETYPE;
+	while (type < OTHERFILETYPE) {
+		if (strcmp(postfix, file_types[type]) == 0)
+			break;
+		type++;
+	}
+	return type;
+}
+
+// file count、file size、file type
+static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
+{
+	struct erofs_inode vi = { .nid = nid};
+	int err;
+	char buf[EROFS_BLKSIZ];
+	char filename[PATH_MAX + 1];
+	erofs_off_t offset;
+
+	err = erofs_read_inode_from_disk(&vi);
+	if (err)
+		return err;
+
+	offset = 0;
+	while (offset < vi.i_size) {
+		erofs_off_t maxsize = min_t(erofs_off_t,
+			vi.i_size - offset, EROFS_BLKSIZ);
+		struct erofs_dirent *de = (void *)buf;
+		struct erofs_dirent *end;
+		unsigned int nameoff;
+
+		err = erofs_pread(&vi, buf, maxsize, offset);
+		if (err)
+			return err;
+
+		nameoff = le16_to_cpu(de->nameoff);
+
+		if (nameoff < sizeof(struct erofs_dirent) ||
+		    nameoff >= PAGE_SIZE) {
+			erofs_err("invalid de[0].nameoff %u @ nid %llu",
+				  nameoff, nid | 0ULL);
+			return -EFSCORRUPTED;
+		}
+		end = (void *)buf + nameoff;
+		while (de < end) {
+			const char *dname;
+			unsigned int dname_len;
+			struct erofs_inode inode = { .nid = de->nid };
+			int actual_size_mark;
+			int original_size_mark;
+			erofs_off_t actual_size = 0;
+			erofs_off_t original_size;
+
+			nameoff = le16_to_cpu(de->nameoff);
+			dname = (char *)buf + nameoff;
+
+			if (de + 1 >= end)
+				dname_len = strnlen(dname, maxsize - nameoff);
+			else
+				dname_len =
+					le16_to_cpu(de[1].nameoff) - nameoff;
+
+			/* a corrupted entry is found */
+			if (nameoff + dname_len > maxsize ||
+				dname_len > EROFS_NAME_LEN) {
+				erofs_err("bogus dirent @ nid %llu",
+						le64_to_cpu(de->nid) | 0ULL);
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+			if (de->nid != nid && de->nid != parent_nid)
+				stats.files++;
+
+			memset(filename, 0, PATH_MAX + 1);
+			memcpy(filename, dname, dname_len);
+
+			switch (de->file_type) {
+			case EROFS_FT_UNKNOWN:
+				break;
+			case EROFS_FT_REG_FILE:
+				err = erofs_read_inode_from_disk(&inode);
+				if (err) {
+					erofs_err("read file inode from disk failed!");
+					return err;
+				}
+				original_size = inode.i_size;
+				stats.files_total_origin_size += original_size;
+				stats.regular_files++;
+
+				err = get_file_compressed_size(&inode,
+						&actual_size);
+				if (err) {
+					erofs_err("get file size failed\n");
+					return err;
+				}
+				stats.files_total_size += actual_size;
+				stats.file_type_stat[get_file_type(filename)]++;
+
+				original_size_mark = 0;
+				actual_size_mark = 0;
+				actual_size >>= 10;
+				original_size >>= 10;
+
+				while (actual_size || original_size) {
+					if (actual_size) {
+						actual_size >>= 1;
+						actual_size_mark++;
+					}
+					if (original_size) {
+						original_size >>= 1;
+						original_size_mark++;
+					}
+				}
+
+				if (original_size_mark >= FILE_SIZE_BITS - 1)
+					stats.file_org_size[FILE_SIZE_BITS - 1]++;
+				else
+					stats.file_org_size[original_size_mark]++;
+				if (actual_size_mark >= FILE_SIZE_BITS - 1)
+					stats.file_comp_size[FILE_SIZE_BITS - 1]++;
+				else
+					stats.file_comp_size[actual_size_mark]++;
+				break;
+
+			case EROFS_FT_DIR:
+				if (de->nid != nid && de->nid != parent_nid) {
+					stats.dir_files++;
+					stats.uncompressed_files++;
+					err = read_dir(de->nid, nid);
+					if (err) {
+						fprintf(stderr,
+								"parse dir nid %llu error occurred\n",
+								de->nid);
+						return err;
+					}
+				}
+				break;
+			case EROFS_FT_CHRDEV:
+				stats.chardev_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_BLKDEV:
+				stats.blkdev_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_FIFO:
+				stats.fifo_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_SOCK:
+				stats.sock_files++;
+				stats.uncompressed_files++;
+				break;
+			case EROFS_FT_SYMLINK:
+				stats.symlink_files++;
+				stats.uncompressed_files++;
+				break;
+			}
+			++de;
+		}
+		offset += maxsize;
+	}
+	return 0;
+}
+
+static void dumpfs_print_statistic_of_filetype(void)
+{
+	fprintf(stderr, "Filesystem total file count:         %lu\n",
+			stats.files);
+	fprintf(stderr, "Filesystem regular file count:       %lu\n",
+			stats.regular_files);
+	fprintf(stderr, "Filesystem directory count:          %lu\n",
+			stats.dir_files);
+	fprintf(stderr, "Filesystem symlink file count:       %lu\n",
+			stats.symlink_files);
+	fprintf(stderr, "Filesystem character device count:   %lu\n",
+			stats.chardev_files);
+	fprintf(stderr, "Filesystem block device count:       %lu\n",
+			stats.blkdev_files);
+	fprintf(stderr, "Filesystem FIFO file count:          %lu\n",
+			stats.fifo_files);
+	fprintf(stderr, "Filesystem SOCK file count:          %lu\n",
+			stats.sock_files);
+}
+
+static void dumpfs_print_chart_row(char *col1, unsigned int col2,
+		double col3, char *col4)
+{
+	char row[500] = {0};
+
+	sprintf(row, chart_format, col1, col2, col3, col4);
+	fprintf(stderr, row);
+}
+
+static void dumpfs_print_chart_of_file(unsigned int *file_counts,
+		unsigned int len)
+{
+	char col1[30];
+	unsigned int col2;
+	double col3;
+	char col4[400];
+	unsigned int lowerbound = 0;
+	unsigned int upperbound = 1;
+
+	fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
+			"ratio", "distribution");
+	for (int i = 0; i < len; i++) {
+		memset(col1, 0, 30);
+		memset(col4, 0, 400);
+		if (i == len - 1)
+			strcpy(col1, " others");
+		else if (i <= 6)
+			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
+		else
+
+			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
+		col2 = file_counts[i];
+		col3 = (double)(100 * col2) / (double)stats.regular_files;
+		memset(col4, '#', col3 / 2);
+		dumpfs_print_chart_row(col1, col2, col3, col4);
+		lowerbound = upperbound;
+		upperbound <<= 1;
+	}
+}
+
+static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
+{
+	char col1[30];
+	unsigned int col2;
+	double col3;
+	char col4[401];
+
+	fprintf(stderr, header_format, "type", "count", "ratio",
+			"distribution");
+	for (int i = 0; i < len; i++) {
+		memset(col1, 0, 30);
+		memset(col4, 0, 401);
+		sprintf(col1, "%-17s", file_types[i]);
+		col2 = stats.file_type_stat[i];
+		col3 = (double)(100 * col2) / (double)stats.regular_files;
+		memset(col4, '#', col3 / 2);
+		dumpfs_print_chart_row(col1, col2, col3, col4);
+	}
+}
+
+static void dumpfs_print_statistic_of_compression(void)
+{
+	stats.compress_rate = (double)(100 * stats.files_total_size) /
+		(double)(stats.files_total_origin_size);
+	fprintf(stderr, "Filesystem compressed files:         %lu\n",
+			stats.compressed_files);
+	fprintf(stderr, "Filesystem uncompressed files:       %lu\n",
+			stats.uncompressed_files);
+	fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
+			stats.files_total_origin_size);
+	fprintf(stderr, "Filesystem total file size:          %lu Bytes\n",
+			stats.files_total_size);
+	fprintf(stderr, "Filesystem compress rate:            %.2f%%\n",
+			stats.compress_rate);
+}
+
+static void dumpfs_print_statistic(void)
+{
+	int err;
+
+	stats.blocks = sbi.blocks;
+	err = read_dir(sbi.root_nid, sbi.root_nid);
+	if (err) {
+		erofs_err("read dir failed");
+		return;
+	}
+
+	dumpfs_print_statistic_of_filetype();
+	dumpfs_print_statistic_of_compression();
+
+	fprintf(stderr, "\nOriginal file size distribution:\n");
+	dumpfs_print_chart_of_file(stats.file_org_size, 17);
+	fprintf(stderr, "\nOn-Disk file size distribution:\n");
+	dumpfs_print_chart_of_file(stats.file_comp_size, 17);
+	fprintf(stderr, "\nFile type distribution:\n");
+	dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
+}
+
 int main(int argc, char **argv)
 {
 	int err = 0;
@@ -138,5 +608,9 @@ int main(int argc, char **argv)
 	if (dumpcfg.print_superblock)
 		dumpfs_print_superblock();
 
+	if (dumpcfg.print_statistic)
+		dumpfs_print_statistic();
+
+
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
  2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 16:25   ` Gao Xiang
  2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
  2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

From: mpiglet <mpiglet@outlook.com>

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 200 insertions(+), 2 deletions(-)

diff --git a/dump/main.c b/dump/main.c
index b0acc0b..2389cef 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -19,8 +19,10 @@
 
 struct dumpcfg {
 	bool print_superblock;
+	bool print_inode;
 	bool print_statistic;
 	bool print_version;
+	u64 ino;
 };
 static struct dumpcfg dumpcfg;
 
@@ -100,8 +102,9 @@ static void usage(void)
 {
 	fputs("usage: [options] erofs-image \n\n"
 		"Dump erofs layout from erofs-image, and [options] are:\n"
-		"-s          print information about superblock\n"
-		"-S      print statistic information of the erofs-image\n"
+		"-s         print information about superblock\n"
+		"-S         print statistic information of the erofs-image\n"
+		"-i #       print target # inode info\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -113,6 +116,7 @@ static void dumpfs_print_version(void)
 static int dumpfs_parse_options_cfg(int argc, char **argv)
 {
 	int opt;
+	u64 i;
 
 	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
 					long_options, NULL)) != -1) {
@@ -127,6 +131,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 		case 'V':
 			dumpfs_print_version();
 			exit(0);
+		case 'i':
+			i = atoll(optarg);
+			dumpcfg.print_inode = true;
+			dumpcfg.ino = i;
+			break;
 		case 'h':
 		case 1:
 		    usage();
@@ -293,6 +302,193 @@ static void dumpfs_print_superblock(void)
 
 }
 
+static int get_path_by_nid(erofs_nid_t nid, erofs_nid_t parent_nid,
+		erofs_nid_t target, char *path, unsigned int pos)
+{
+	int err;
+	struct erofs_inode inode = {.nid = nid};
+	erofs_off_t offset;
+	char buf[EROFS_BLKSIZ];
+
+	path[pos++] = '/';
+	if (target == sbi.root_nid)
+		return 0;
+
+	err = erofs_read_inode_from_disk(&inode);
+	if (err) {
+		erofs_err("read inode %lu failed", nid);
+		return err;
+	}
+
+	offset = 0;
+	while (offset < inode.i_size) {
+		erofs_off_t maxsize = min_t(erofs_off_t,
+					inode.i_size - offset, EROFS_BLKSIZ);
+		struct erofs_dirent *de = (void *)buf;
+		struct erofs_dirent *end;
+		unsigned int nameoff;
+
+		err = erofs_pread(&inode, buf, maxsize, offset);
+		if (err)
+			return err;
+
+		nameoff = le16_to_cpu(de->nameoff);
+		if (nameoff < sizeof(struct erofs_dirent) ||
+		    nameoff >= PAGE_SIZE) {
+			erofs_err("invalid de[0].nameoff %u @ nid %llu",
+				  nameoff, nid | 0ULL);
+			return -EFSCORRUPTED;
+		}
+
+		end = (void *)buf + nameoff;
+		while (de < end) {
+			const char *dname;
+			unsigned int dname_len;
+
+			nameoff = le16_to_cpu(de->nameoff);
+			dname = (char *)buf + nameoff;
+			if (de + 1 >= end)
+				dname_len = strnlen(dname, maxsize - nameoff);
+			else
+				dname_len = le16_to_cpu(de[1].nameoff)
+					- nameoff;
+
+			/* a corrupted entry is found */
+			if (nameoff + dname_len > maxsize ||
+			    dname_len > EROFS_NAME_LEN) {
+				erofs_err("bogus dirent @ nid %llu",
+						le64_to_cpu(de->nid) | 0ULL);
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+
+			if (de->nid == target) {
+				memcpy(path + pos, dname, dname_len);
+				return 0;
+			}
+
+			if (de->file_type == EROFS_FT_DIR &&
+					de->nid != parent_nid &&
+					de->nid != nid) {
+				memcpy(path + pos, dname, dname_len);
+				err = get_path_by_nid(de->nid, nid,
+						target, path, pos + dname_len);
+				if (!err)
+					return 0;
+				memset(path + pos, 0, dname_len);
+			}
+			++de;
+		}
+		offset += maxsize;
+	}
+	return -1;
+}
+
+static void dumpfs_print_inode(void)
+{
+	int err;
+	erofs_off_t size;
+	erofs_nid_t nid = dumpcfg.ino;
+	struct erofs_inode inode = {.nid = nid};
+	char path[PATH_MAX + 1] = {0};
+	time_t t = inode.i_ctime;
+
+	err = erofs_read_inode_from_disk(&inode);
+	if (err) {
+		erofs_err("read inode %lu from disk failed", nid);
+		return;
+	}
+
+	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
+	switch (inode.inode_isize) {
+	case 32:
+		fprintf(stderr, "	File inode is compacted layout\n");
+		break;
+	case 64:
+		fprintf(stderr, "	File inode is extended layout\n");
+		break;
+	default:
+		erofs_err("unsupported inode layout\n");
+	}
+	fprintf(stderr, "	File size:		%lu\n",
+			inode.i_size);
+	fprintf(stderr, "	File nid:		%lu\n",
+			inode.nid);
+	fprintf(stderr, "	File extent size:	%u\n",
+			inode.extent_isize);
+	fprintf(stderr, "	File xattr size:	%u\n",
+			inode.xattr_isize);
+	fprintf(stderr, "	File inode size:	%u\n",
+			inode.inode_isize);
+	fprintf(stderr, "	File type:		");
+	switch (inode.i_mode & S_IFMT) {
+	case S_IFREG:
+		fprintf(stderr, "regular\n");
+		break;
+	case S_IFDIR:
+		fprintf(stderr, "directory\n");
+		break;
+	case S_IFLNK:
+		fprintf(stderr, "link\n");
+		break;
+	case S_IFCHR:
+		fprintf(stderr, "character device\n");
+		break;
+	case S_IFBLK:
+		fprintf(stderr, "block device\n");
+		break;
+	case S_IFIFO:
+		fprintf(stderr, "fifo\n");
+		break;
+	case S_IFSOCK:
+		fprintf(stderr, "sock\n");
+		break;
+	default:
+		break;
+	}
+
+	err = get_file_compressed_size(&inode, &size);
+	if (err) {
+		erofs_err("get file size failed\n");
+		return;
+	}
+
+	fprintf(stderr, "	File original size:	%lu\n"
+			"	File on-disk size:	%lu\n",
+			inode.i_size, size);
+	fprintf(stderr, "	File compress rate:	%.2f%%\n",
+			(double)(100 * size) / (double)(inode.i_size));
+
+	fprintf(stderr, "	File datalayout:	");
+	switch (inode.datalayout) {
+	case EROFS_INODE_FLAT_PLAIN:
+		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
+		break;
+	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION_LEGACY\n");
+		break;
+	case EROFS_INODE_FLAT_INLINE:
+		fprintf(stderr, "EROFS_INODE_FLAT_INLINE\n");
+		break;
+	case EROFS_INODE_FLAT_COMPRESSION:
+		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION\n");
+		break;
+	default:
+		break;
+	}
+
+	fprintf(stderr, "	File create time:	%s", ctime(&t));
+	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
+	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);
+	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);
+
+	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
+	if (!err)
+		fprintf(stderr, "	File path:		%s\n", path);
+	else
+		fprintf(stderr, "Path not found\n");
+}
+
 static int get_file_type(const char *filename)
 {
 	char *postfix = strrchr(filename, '.');
@@ -611,6 +807,8 @@ int main(int argc, char **argv)
 	if (dumpcfg.print_statistic)
 		dumpfs_print_statistic();
 
+	if (dumpcfg.print_inode)
+		dumpfs_print_inode();
 
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
                   ` (2 preceding siblings ...)
  2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
  2021-09-11 16:29   ` Gao Xiang
  2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang
  4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
  To: xiang, linux-erofs; +Cc: mpiglet

From: mpiglet <mpiglet@outlook.com>

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
 dump/main.c | 108 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 91 insertions(+), 17 deletions(-)

diff --git a/dump/main.c b/dump/main.c
index 2389cef..efce309 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -21,8 +21,10 @@ struct dumpcfg {
 	bool print_superblock;
 	bool print_inode;
 	bool print_statistic;
+	bool print_inode_phy;
 	bool print_version;
 	u64 ino;
+	u64 ino_phy;
 };
 static struct dumpcfg dumpcfg;
 
@@ -105,6 +107,7 @@ static void usage(void)
 		"-s         print information about superblock\n"
 		"-S         print statistic information of the erofs-image\n"
 		"-i #       print target # inode info\n"
+		"-I #       print target # inode on-disk info\n"
 		"-v/-V      print dump.erofs version info\n"
 		"-h/--help  display this help and exit\n", stderr);
 }
@@ -136,6 +139,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
 			dumpcfg.print_inode = true;
 			dumpcfg.ino = i;
 			break;
+		case 'I':
+			i = atoll(optarg);
+			dumpcfg.print_inode_phy = true;
+			dumpcfg.ino_phy = i;
+			break;
 		case 'h':
 		case 1:
 		    usage();
@@ -402,25 +410,25 @@ static void dumpfs_print_inode(void)
 	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
 	switch (inode.inode_isize) {
 	case 32:
-		fprintf(stderr, "	File inode is compacted layout\n");
+		fprintf(stderr, "File inode is compacted layout\n");
 		break;
 	case 64:
-		fprintf(stderr, "	File inode is extended layout\n");
+		fprintf(stderr, "File inode is extended layout\n");
 		break;
 	default:
 		erofs_err("unsupported inode layout\n");
 	}
-	fprintf(stderr, "	File size:		%lu\n",
+	fprintf(stderr, "File size:		%lu\n",
 			inode.i_size);
-	fprintf(stderr, "	File nid:		%lu\n",
+	fprintf(stderr, "File nid:		%lu\n",
 			inode.nid);
-	fprintf(stderr, "	File extent size:	%u\n",
+	fprintf(stderr, "File extent size:	%u\n",
 			inode.extent_isize);
-	fprintf(stderr, "	File xattr size:	%u\n",
+	fprintf(stderr, "File xattr size:	%u\n",
 			inode.xattr_isize);
-	fprintf(stderr, "	File inode size:	%u\n",
+	fprintf(stderr, "File inode size:	%u\n",
 			inode.inode_isize);
-	fprintf(stderr, "	File type:		");
+	fprintf(stderr, "File type:		");
 	switch (inode.i_mode & S_IFMT) {
 	case S_IFREG:
 		fprintf(stderr, "regular\n");
@@ -453,13 +461,13 @@ static void dumpfs_print_inode(void)
 		return;
 	}
 
-	fprintf(stderr, "	File original size:	%lu\n"
-			"	File on-disk size:	%lu\n",
+	fprintf(stderr, "File original size:	%lu\n"
+			"File on-disk size:	%lu\n",
 			inode.i_size, size);
-	fprintf(stderr, "	File compress rate:	%.2f%%\n",
+	fprintf(stderr, "File compress rate:	%.2f%%\n",
 			(double)(100 * size) / (double)(inode.i_size));
 
-	fprintf(stderr, "	File datalayout:	");
+	fprintf(stderr, "File datalayout:	");
 	switch (inode.datalayout) {
 	case EROFS_INODE_FLAT_PLAIN:
 		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
@@ -477,18 +485,82 @@ static void dumpfs_print_inode(void)
 		break;
 	}
 
-	fprintf(stderr, "	File create time:	%s", ctime(&t));
-	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
-	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);
-	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);
+	fprintf(stderr, "File create time:	%s", ctime(&t));
+	fprintf(stderr, "File uid:		%u\n", inode.i_uid);
+	fprintf(stderr, "File gid:		%u\n", inode.i_gid);
+	fprintf(stderr, "File hard-link count:	%u\n", inode.i_nlink);
 
 	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
 	if (!err)
-		fprintf(stderr, "	File path:		%s\n", path);
+		fprintf(stderr, "File path:		%s\n", path);
 	else
 		fprintf(stderr, "Path not found\n");
 }
 
+static void dumpfs_print_inode_phy(void)
+{
+	int err;
+	erofs_nid_t nid = dumpcfg.ino_phy;
+	struct erofs_inode inode = {.nid = nid};
+	char path[PATH_MAX + 1] = {0};
+
+	err = erofs_read_inode_from_disk(&inode);
+	if (err < 0) {
+		erofs_err("read inode %lu from disk failed", nid);
+		return;
+	}
+
+	const erofs_off_t ibase = iloc(inode.nid);
+	const erofs_off_t pos = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(
+			ibase + inode.inode_isize + inode.xattr_isize);
+	erofs_blk_t blocks = inode.u.i_blocks;
+	erofs_blk_t start = 0;
+	erofs_blk_t end = 0;
+	struct erofs_map_blocks map = {
+		.index = UINT_MAX,
+		.m_la = 0,
+	};
+
+	fprintf(stderr, "Inode %lu on-disk info:\n", nid);
+	switch (inode.datalayout) {
+	case EROFS_INODE_FLAT_INLINE:
+	case EROFS_INODE_FLAT_PLAIN:
+		if (inode.u.i_blkaddr == NULL_ADDR)
+			start = end = erofs_blknr(pos);
+		else {
+			start = inode.u.i_blkaddr;
+			end = start + BLK_ROUND_UP(inode.i_size) - 1;
+		}
+		fprintf(stderr, "File size:			%lu\n",
+				inode.i_size);
+		fprintf(stderr,
+				"	Plain Block Address:		%u - %u\n",
+				start, end);
+		break;
+
+	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+	case EROFS_INODE_FLAT_COMPRESSION:
+		err = z_erofs_map_blocks_iter(&inode, &map);
+		if (err)
+			erofs_err("get file blocks range failed");
+
+		start = erofs_blknr(map.m_pa);
+		end = start - 1 + blocks;
+		fprintf(stderr,
+				"	Compressed Block Address:	%u - %u\n",
+				start, end);
+		break;
+	}
+
+	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
+	if (!err)
+		fprintf(stderr, "File Path:			%s\n",
+				path);
+	else
+		erofs_err("path not found");
+}
+
+
 static int get_file_type(const char *filename)
 {
 	char *postfix = strrchr(filename, '.');
@@ -810,5 +882,7 @@ int main(int argc, char **argv)
 	if (dumpcfg.print_inode)
 		dumpfs_print_inode();
 
+	if (dumpcfg.print_inode_phy)
+		dumpfs_print_inode_phy();
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils
  2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
                   ` (3 preceding siblings ...)
  2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
@ 2021-09-11 15:45 ` Gao Xiang
  4 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 15:45 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

Hi Xuenan,

Thanks for working on dump.erofs! Such functionality was recently
requested by some other folks, it's quite helpful to be resolved
upstream.

Some comments in-line:

On Sat, Sep 11, 2021 at 09:46:31PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>

mpiglet => "Wang Qi" (according to the name in the source header)

It'd be better to use the real name if possible. ;)

> 
> Add dump-tool for erofs to facilitate users directly
> analyzing the erofs image file.
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>

Same here.

> ---
>  Makefile.am        |  2 +-
>  configure.ac       |  2 ++
>  dump/Makefile.am   | 10 ++++++
>  dump/main.c        | 84 ++++++++++++++++++++++++++++++++++++++++++++++
>  include/erofs/io.h |  3 ++
>  lib/namei.c        |  4 +--
>  6 files changed, 102 insertions(+), 3 deletions(-)
>  create mode 100644 dump/Makefile.am
>  create mode 100644 dump/main.c
> 
> diff --git a/Makefile.am b/Makefile.am
> index b804aa9..fedf7b5 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -3,7 +3,7 @@
>  
>  ACLOCAL_AMFLAGS = -I m4
>  
> -SUBDIRS = man lib mkfs
> +SUBDIRS = man lib mkfs dump
>  if ENABLE_FUSE
>  SUBDIRS += fuse
>  endif
> diff --git a/configure.ac b/configure.ac
> index f626064..f4fe548 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -280,6 +280,8 @@ AC_CONFIG_FILES([Makefile
>  		 man/Makefile
>  		 lib/Makefile
>  		 mkfs/Makefile
> +		 dump/Makefile
>  		 fuse/Makefile])
> +
>  AC_OUTPUT
>  
> diff --git a/dump/Makefile.am b/dump/Makefile.am
> new file mode 100644
> index 0000000..e664799
> --- /dev/null
> +++ b/dump/Makefile.am
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0+
> +# Makefile.am
> +
> +AUTOMAKE_OPTIONS = foreign
> +bin_PROGRAMS     = dump.erofs
> +AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}

Do we really need uuid and selinux libraries for dump.erofs?

> +dump_erofs_SOURCES = main.c
> +dump_erofs_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
> +dump_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} ${liblz4_LIBS}

Same here.

> +
> diff --git a/dump/main.c b/dump/main.c
> new file mode 100644
> index 0000000..8fbc24a
> --- /dev/null
> +++ b/dump/main.c
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * dump/main.c

It could cause some u-boot checkpatch problem...
It'd be better to get rid of the path.

> + *
> + * Copyright (C) 2021-2022 HUAWEI, Inc.
> + *             http://www.huawei.com/
> + * Created by Wang Qi <mpiglet@outlook.com>
> + *            Guo Xuenan <guoxuenan@huawei.com>
> + */
> +
> +#include <stdlib.h>
> +#include <getopt.h>
> +#include <sys/sysmacros.h>
> +#include <time.h>
> +#include <lz4.h>
> +
> +#include "erofs/print.h"
> +#include "erofs/io.h"
> +
> +static struct option long_options[] = {
> +	{"help", no_argument, 0, 1},
> +	{0, 0, 0, 0},
> +};
> +
> +static void usage(void)
> +{
> +	fputs("usage: [options] erofs-image \n\n"
> +		"Dump erofs layout from erofs-image, and [options] are:\n"
> +		"-v/-V      print dump.erofs version info\n"

How about leaving only one argument here.
It'd be better to keep in sync with dumpe2fs, so:
https://www.man7.org/linux/man-pages/man8/dumpe2fs.8.html

       -V     print the version number of dump.erofs and exit.

> +		"-h/--help  display this help and exit\n", stderr);

-h was used by dumpe2fs, so how about leaving --help only here?

> +}
> +static void dumpfs_print_version(void)
> +{
> +	fprintf(stderr, "dump.erofs %s\n", cfg.c_version);
> +}
> +
> +static int dumpfs_parse_options_cfg(int argc, char **argv)
> +{
> +	int opt;
> +
> +	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",

It seems that not all options are used in this patch.
Also, it would be better to sort them all in the alphabetical order.

> +					long_options, NULL)) != -1) {
> +		switch (opt) {
> +		case 'v':
> +		case 'V':
> +			dumpfs_print_version();
> +			exit(0);
> +		case 'h':
> +		case 1:
> +		    usage();
> +		    exit(0);
> +		default: /* '?' */
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (optind >= argc)
> +		return -EINVAL;
> +
> +	cfg.c_img_path = strdup(argv[optind++]);
> +	if (!cfg.c_img_path)
> +		return -ENOMEM;
> +
> +	if (optind < argc) {
> +		erofs_err("unexpected argument: %s\n", argv[optind]);

minor nit: memory leak of c_img_path?

> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int err = 0;
> +
> +	erofs_init_configure();
> +	err = dumpfs_parse_options_cfg(argc, argv);
> +	if (err) {
> +		if (err == -EINVAL)
> +			usage();
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/include/erofs/io.h b/include/erofs/io.h
> index 5574245..00e5de8 100644
> --- a/include/erofs/io.h
> +++ b/include/erofs/io.h
> @@ -10,6 +10,7 @@
>  #define __EROFS_IO_H
>  
>  #include <unistd.h>
> +#include <sys/types.h>

How about removing "#include <sys/types.h>" in lib/namei.c?

Thanks,
Gao Xiang

>  #include "internal.h"
>  
>  #ifndef O_BINARY
> @@ -25,6 +26,8 @@ int dev_fillzero(u64 offset, size_t len, bool padding);
>  int dev_fsync(void);
>  int dev_resize(erofs_blk_t nblocks);
>  u64 dev_length(void);
> +dev_t erofs_new_decode_dev(u32 dev);
> +int erofs_read_inode_from_disk(struct erofs_inode *vi);
>  
>  static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
>  			    u32 nblocks)
> diff --git a/lib/namei.c b/lib/namei.c
> index 4e06ba4..21631f1 100644
> --- a/lib/namei.c
> +++ b/lib/namei.c
> @@ -15,7 +15,7 @@
>  #include "erofs/print.h"
>  #include "erofs/io.h"
>  
> -static dev_t erofs_new_decode_dev(u32 dev)
> +dev_t erofs_new_decode_dev(u32 dev)
>  {
>  	const unsigned int major = (dev & 0xfff00) >> 8;
>  	const unsigned int minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
> @@ -23,7 +23,7 @@ static dev_t erofs_new_decode_dev(u32 dev)
>  	return makedev(major, minor);
>  }
>  
> -static int erofs_read_inode_from_disk(struct erofs_inode *vi)
> +int erofs_read_inode_from_disk(struct erofs_inode *vi)
>  {
>  	int ret, ifmt;
>  	char buf[sizeof(struct erofs_inode_extended)];
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information
  2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
@ 2021-09-11 15:58   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 15:58 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

On Sat, Sep 11, 2021 at 09:46:32PM +0800, Guo Xuenan wrote:
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>

Same here.

> ---
>  dump/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/dump/main.c b/dump/main.c
> index 8fbc24a..25ac89f 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -17,6 +17,12 @@
>  #include "erofs/print.h"
>  #include "erofs/io.h"
>  
> +struct dumpcfg {
> +	bool print_superblock;
> +	bool print_version;
> +};
> +static struct dumpcfg dumpcfg;
> +
>  static struct option long_options[] = {
>  	{"help", no_argument, 0, 1},
>  	{0, 0, 0, 0},
> @@ -26,6 +32,7 @@ static void usage(void)
>  {
>  	fputs("usage: [options] erofs-image \n\n"
>  		"Dump erofs layout from erofs-image, and [options] are:\n"
> +		"-s          print information about superblock\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -41,6 +48,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
>  					long_options, NULL)) != -1) {
>  		switch (opt) {
> +		case 's':
> +			dumpcfg.print_superblock = true;
> +			break;
>  		case 'v':
>  		case 'V':
>  			dumpfs_print_version();
> @@ -68,6 +78,39 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  	return 0;
>  }
>  
> +static void dumpfs_print_superblock(void)
> +{
> +	time_t time = sbi.build_time;
> +
> +	fprintf(stderr, "Filesystem magic number:	0x%04X\n", EROFS_SUPER_MAGIC_V1);
> +	fprintf(stderr, "Filesystem blocks: 		%lu\n", sbi.blocks);
> +	fprintf(stderr, "Filesystem meta block:		%u\n", sbi.meta_blkaddr);

Filesystem inode metadata start block:

> +	fprintf(stderr, "Filesystem xattr block:	%u\n", sbi.xattr_blkaddr);

Filesystem shared xattr metadata start block:

> +	fprintf(stderr, "Filesystem root nid:		%ld\n", sbi.root_nid);


> +	fprintf(stderr, "Filesystem valid inos:		%lu\n", sbi.inos);

Inode count:

> +	fprintf(stderr, "Filesystem created:		%s", ctime(&time));
> +	fprintf(stderr, "Filesystem uuid:		");

Filesystem UUID:

How about printing to stdout directly? according to
dumpe2fs:
https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/misc/dumpe2fs.c#n219

Filesystem volume name:   <none>
Last mounted on:          /
Filesystem UUID:          c46ea44a-e249-446f-af40-xxxxxxxxxxxx
Filesystem magic number:  0xEF53
Filesystem revision #:    1 (dynamic)
Filesystem features:      has_journal ext_attr resize_inode dir_index filetype needs_recovery extent 64bit flex_bg sparse_super large_file huge_file dir_nlink extra_isize metadata_csum
Filesystem flags:         signed_directory_hash 
Default mount options:    user_xattr acl
Filesystem state:         clean
Errors behavior:          Continue
Filesystem OS type:       Linux
Inode count:              8003584
Block count:              32000000
Reserved block count:     1600000
Free blocks:              18661241
Free inodes:              7681550
First block:              0
Block size:               4096
Fragment size:            4096


> +	for (int i = 0; i < 16; i++)
> +		fprintf(stderr, "%02x", sbi.uuid[i]);
> +	fprintf(stderr, "\n");

It seems not the correct UUID style...

> +
> +	if (erofs_sb_has_lz4_0padding())
> +		fprintf(stderr, "Filesystem support lz4 0padding\n");
> +	else
> +		fprintf(stderr, "Filesystem not support lz4 0padding\n");
> +
> +	if (erofs_sb_has_big_pcluster())
> +		fprintf(stderr, "Filesystem support big pcluster\n");
> +	else
> +		fprintf(stderr, "Filesystem not support big pcluster\n");
> +
> +	if (erofs_sb_has_sb_chksum())
> +		fprintf(stderr, "Filesystem has super block checksum feature\n");
> +	else
> +		fprintf(stderr, "Filesystem has no superblock checksum feature\n");

How about showing the features in a list as above?

Thanks,
Gao Xiang

> +
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	int err = 0;
> @@ -80,5 +123,20 @@ int main(int argc, char **argv)
>  		return -1;
>  	}
>  
> +	err = dev_open_ro(cfg.c_img_path);
> +	if (err) {
> +		erofs_err("open image file failed");
> +		return -1;
> +	}
> +
> +	err = erofs_read_superblock();
> +	if (err) {
> +		erofs_err("read superblock failed");
> +		return -1;
> +	}
> +
> +	if (dumpcfg.print_superblock)
> +		dumpfs_print_superblock();
> +
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
@ 2021-09-11 16:13   ` Gao Xiang
  2021-09-13  4:30     ` Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:13 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

(+Cc Jianan.)

On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
>  dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 474 insertions(+)
> 
> diff --git a/dump/main.c b/dump/main.c
> index 25ac89f..b0acc0b 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -19,10 +19,78 @@
>  
>  struct dumpcfg {
>  	bool print_superblock;
> +	bool print_statistic;
>  	bool print_version;
>  };
>  static struct dumpcfg dumpcfg;
>  
> +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
> +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> +static char *file_types[] = {
> +	".so",
> +	".png",
> +	".jpg",
> +	".xml",
> +	".html",
> +	".odex",
> +	".vdex",
> +	".apk",
> +	".ttf",
> +	".jar",
> +	".json",
> +	".ogg",
> +	".oat",
> +	".art",
> +	".rc",
> +	".otf",
> +	".txt",
> +	"others",
> +};
> +enum {
> +	SOFILETYPE = 0,
> +	PNGFILETYPE,
> +	JPEGFILETYPE,
> +	XMLFILETYPE,
> +	HTMLFILETYPE,
> +	ODEXFILETYPE,
> +	VDEXFILETYPE,
> +	APKFILETYPE,
> +	TTFFILETYPE,
> +	JARFILETYPE,
> +	JSONFILETYPE,
> +	OGGFILETYPE,
> +	OATFILETYPE,
> +	ARTFILETYPE,
> +	RCFILETYPE,
> +	OTFFILETYPE,
> +	TXTFILETYPE,
> +	OTHERFILETYPE,
> +};

Why we need enums here? Can these be resolved with some array index?

> +
> +#define	FILE_SIZE_BITS	30
> +struct statistics {
> +	unsigned long blocks;
> +	unsigned long files;
> +	unsigned long files_total_size;
> +	unsigned long files_total_origin_size;
> +	double compress_rate;
> +	unsigned long compressed_files;
> +	unsigned long uncompressed_files;
> +
> +	unsigned long regular_files;
> +	unsigned long dir_files;
> +	unsigned long chardev_files;
> +	unsigned long blkdev_files;
> +	unsigned long fifo_files;
> +	unsigned long sock_files;
> +	unsigned long symlink_files;
> +
> +	unsigned int file_type_stat[OTHERFILETYPE + 1];
> +	unsigned int file_org_size[FILE_SIZE_BITS];

What do "FILE_SIZE_BITS" and "file_org_size" mean?

> +	unsigned int file_comp_size[FILE_SIZE_BITS];
> +};
> +static struct statistics stats;
> +
>  static struct option long_options[] = {
>  	{"help", no_argument, 0, 1},
>  	{0, 0, 0, 0},
> @@ -33,6 +101,7 @@ static void usage(void)
>  	fputs("usage: [options] erofs-image \n\n"
>  		"Dump erofs layout from erofs-image, and [options] are:\n"
>  		"-s          print information about superblock\n"
> +		"-S      print statistic information of the erofs-image\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  		case 's':
>  			dumpcfg.print_superblock = true;
>  			break;
> +		case 'S':
> +			dumpcfg.print_statistic = true;
> +			break;
>  		case 'v':
>  		case 'V':
>  			dumpfs_print_version();
> @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  	return 0;
>  }
>  
> +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> +		erofs_off_t last_cluster_size,
> +		erofs_off_t *last_cluster_compressed_size)

Hmmm... do we really need the exact compressed bytes?
or just compressed blocks is enough?

"compressed blocks" can be gotten in erofs inode.

Btw, although I think it's useful for fsck (check if an erofs is correct).

> +{
> +	int ret;
> +	int decomp_len;
> +	int compressed_len = 0;
> +	char *decompress;
> +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> +
> +	ret = dev_read(raw, map->m_pa, map->m_plen);
> +	if (ret < 0)
> +		return -EIO;
> +
> +	if (erofs_sb_has_lz4_0padding()) {
> +		compressed_len = map->m_plen;
> +	} else {
> +		// lz4 maximum compression ratio is 255
> +		decompress = (char *)malloc(map->m_plen * 255);
> +		if (!decompress) {
> +			erofs_err("allocate memory for decompress space failed");
> +			return -1;
> +		}
> +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> +				map->m_plen, last_cluster_size,
> +				map->m_plen * 10);
> +		if (decomp_len < 0) {
> +			erofs_err("decompress last cluster to get decompressed size failed");
> +			free(decompress);
> +			return -1;
> +		}
> +		compressed_len = LZ4_compress_destSize(decompress, raw,
> +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> +		if (compressed_len < 0) {
> +			erofs_err("compress to get last extent size failed\n");
> +			free(decompress);
> +			return -1;
> +		}
> +		free(decompress);
> +		// dut to the use of lz4hc (can use different compress level),
> +		// our normal lz4 compress result may be bigger
> +		compressed_len = compressed_len < map->m_plen ?
> +			compressed_len : map->m_plen;
> +	}
> +
> +	*last_cluster_compressed_size = compressed_len;
> +	return 0;
> +}
> +
> +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> +		erofs_off_t *size)
> +{
> +	int err;
> +	erofs_blk_t compressedlcs;
> +	erofs_off_t last_cluster_size;
> +	erofs_off_t last_cluster_compressed_size;
> +	struct erofs_map_blocks map = {
> +		.index = UINT_MAX,
> +		.m_la = inode->i_size - 1,
> +	};
> +
> +	err = z_erofs_map_blocks_iter(inode, &map);

(add Jianan here.)

Can we port the latest erofs kernel fiemap code to erofs-utils, and add
some functionality to get the file distribution as well when the fs isn't
mounted?


> +	if (err) {
> +		erofs_err("read nid %ld's last block failed\n", inode->nid);
> +		return err;
> +	}
> +	compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
> +	*size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
> +	last_cluster_size = inode->i_size - map.m_la;
> +
> +	if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
> +		*size += last_cluster_size;
> +	} else {
> +		err = z_erofs_get_last_cluster_size_from_disk(&map,
> +				last_cluster_size,
> +				&last_cluster_compressed_size);
> +		if (err) {
> +			erofs_err("get nid %ld's last extent size failed",
> +					inode->nid);
> +			return err;
> +		}
> +		*size += last_cluster_compressed_size;
> +	}
> +	return 0;
> +}
> +
> +static int get_file_compressed_size(struct erofs_inode *inode,
> +		erofs_off_t *size)

erofs_dump_get_file_occupied_blocks?

> +{
> +	int err;
> +
> +	*size = 0;
> +	switch (inode->datalayout) {
> +	case EROFS_INODE_FLAT_INLINE:
> +	case EROFS_INODE_FLAT_PLAIN:
> +		stats.uncompressed_files++;
> +		*size = inode->i_size;
> +		break;
> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> +	case EROFS_INODE_FLAT_COMPRESSION:
> +		stats.compressed_files++;
> +		err = z_erofs_get_compressed_size(inode, size);
> +		if (err) {
> +			erofs_err("get compressed file size failed\n");
> +			return err;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static void dumpfs_print_superblock(void)
>  {
>  	time_t time = sbi.build_time;
> @@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
>  
>  }
>  
> +static int get_file_type(const char *filename)
> +{
> +	char *postfix = strrchr(filename, '.');
> +	int type = SOFILETYPE;
> +
> +	if (postfix == NULL)
> +		return OTHERFILETYPE;
> +	while (type < OTHERFILETYPE) {
> +		if (strcmp(postfix, file_types[type]) == 0)
> +			break;
> +		type++;
> +	}
> +	return type;
> +}
> +
> +// file count、file size、file type

It'd be better to avoid C++ comments...

> +static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
> +{
> +	struct erofs_inode vi = { .nid = nid};
> +	int err;
> +	char buf[EROFS_BLKSIZ];
> +	char filename[PATH_MAX + 1];
> +	erofs_off_t offset;
> +
> +	err = erofs_read_inode_from_disk(&vi);
> +	if (err)
> +		return err;
> +
> +	offset = 0;
> +	while (offset < vi.i_size) {
> +		erofs_off_t maxsize = min_t(erofs_off_t,
> +			vi.i_size - offset, EROFS_BLKSIZ);
> +		struct erofs_dirent *de = (void *)buf;
> +		struct erofs_dirent *end;
> +		unsigned int nameoff;
> +
> +		err = erofs_pread(&vi, buf, maxsize, offset);
> +		if (err)
> +			return err;
> +
> +		nameoff = le16_to_cpu(de->nameoff);
> +
> +		if (nameoff < sizeof(struct erofs_dirent) ||
> +		    nameoff >= PAGE_SIZE) {
> +			erofs_err("invalid de[0].nameoff %u @ nid %llu",
> +				  nameoff, nid | 0ULL);
> +			return -EFSCORRUPTED;
> +		}
> +		end = (void *)buf + nameoff;
> +		while (de < end) {
> +			const char *dname;
> +			unsigned int dname_len;
> +			struct erofs_inode inode = { .nid = de->nid };
> +			int actual_size_mark;
> +			int original_size_mark;
> +			erofs_off_t actual_size = 0;
> +			erofs_off_t original_size;
> +
> +			nameoff = le16_to_cpu(de->nameoff);
> +			dname = (char *)buf + nameoff;
> +
> +			if (de + 1 >= end)
> +				dname_len = strnlen(dname, maxsize - nameoff);
> +			else
> +				dname_len =
> +					le16_to_cpu(de[1].nameoff) - nameoff;
> +
> +			/* a corrupted entry is found */
> +			if (nameoff + dname_len > maxsize ||
> +				dname_len > EROFS_NAME_LEN) {
> +				erofs_err("bogus dirent @ nid %llu",
> +						le64_to_cpu(de->nid) | 0ULL);
> +				DBG_BUGON(1);
> +				return -EFSCORRUPTED;
> +			}
> +			if (de->nid != nid && de->nid != parent_nid)
> +				stats.files++;
> +
> +			memset(filename, 0, PATH_MAX + 1);
> +			memcpy(filename, dname, dname_len);
> +
> +			switch (de->file_type) {
> +			case EROFS_FT_UNKNOWN:
> +				break;
> +			case EROFS_FT_REG_FILE:
> +				err = erofs_read_inode_from_disk(&inode);
> +				if (err) {
> +					erofs_err("read file inode from disk failed!");
> +					return err;
> +				}
> +				original_size = inode.i_size;
> +				stats.files_total_origin_size += original_size;
> +				stats.regular_files++;
> +
> +				err = get_file_compressed_size(&inode,
> +						&actual_size);
> +				if (err) {
> +					erofs_err("get file size failed\n");
> +					return err;
> +				}
> +				stats.files_total_size += actual_size;
> +				stats.file_type_stat[get_file_type(filename)]++;
> +
> +				original_size_mark = 0;
> +				actual_size_mark = 0;
> +				actual_size >>= 10;
> +				original_size >>= 10;
> +
> +				while (actual_size || original_size) {
> +					if (actual_size) {
> +						actual_size >>= 1;
> +						actual_size_mark++;
> +					}
> +					if (original_size) {
> +						original_size >>= 1;
> +						original_size_mark++;
> +					}
> +				}
> +
> +				if (original_size_mark >= FILE_SIZE_BITS - 1)
> +					stats.file_org_size[FILE_SIZE_BITS - 1]++;
> +				else
> +					stats.file_org_size[original_size_mark]++;
> +				if (actual_size_mark >= FILE_SIZE_BITS - 1)
> +					stats.file_comp_size[FILE_SIZE_BITS - 1]++;
> +				else
> +					stats.file_comp_size[actual_size_mark]++;
> +				break;
> +
> +			case EROFS_FT_DIR:
> +				if (de->nid != nid && de->nid != parent_nid) {



> +					stats.dir_files++;
> +					stats.uncompressed_files++;
> +					err = read_dir(de->nid, nid);
> +					if (err) {
> +						fprintf(stderr,
> +								"parse dir nid %llu error occurred\n",
> +								de->nid);
> +						return err;
> +					}
> +				}
> +				break;
> +			case EROFS_FT_CHRDEV:
> +				stats.chardev_files++;
> +				stats.uncompressed_files++;

How about using an array instead?

> +				break;
> +			case EROFS_FT_BLKDEV:
> +				stats.blkdev_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			case EROFS_FT_FIFO:
> +				stats.fifo_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			case EROFS_FT_SOCK:
> +				stats.sock_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			case EROFS_FT_SYMLINK:
> +				stats.symlink_files++;
> +				stats.uncompressed_files++;
> +				break;
> +			}
> +			++de;
> +		}
> +		offset += maxsize;
> +	}
> +	return 0;
> +}
> +
> +static void dumpfs_print_statistic_of_filetype(void)
> +{
> +	fprintf(stderr, "Filesystem total file count:         %lu\n",
> +			stats.files);
> +	fprintf(stderr, "Filesystem regular file count:       %lu\n",
> +			stats.regular_files);
> +	fprintf(stderr, "Filesystem directory count:          %lu\n",
> +			stats.dir_files);
> +	fprintf(stderr, "Filesystem symlink file count:       %lu\n",
> +			stats.symlink_files);
> +	fprintf(stderr, "Filesystem character device count:   %lu\n",
> +			stats.chardev_files);
> +	fprintf(stderr, "Filesystem block device count:       %lu\n",
> +			stats.blkdev_files);
> +	fprintf(stderr, "Filesystem FIFO file count:          %lu\n",
> +			stats.fifo_files);
> +	fprintf(stderr, "Filesystem SOCK file count:          %lu\n",
> +			stats.sock_files);

Also a loop can be used here.

> +}
> +
> +static void dumpfs_print_chart_row(char *col1, unsigned int col2,
> +		double col3, char *col4)
> +{
> +	char row[500] = {0};
> +
> +	sprintf(row, chart_format, col1, col2, col3, col4);
> +	fprintf(stderr, row);
> +}
> +
> +static void dumpfs_print_chart_of_file(unsigned int *file_counts,
> +		unsigned int len)
> +{
> +	char col1[30];
> +	unsigned int col2;
> +	double col3;
> +	char col4[400];
> +	unsigned int lowerbound = 0;
> +	unsigned int upperbound = 1;
> +
> +	fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
> +			"ratio", "distribution");
> +	for (int i = 0; i < len; i++) {
> +		memset(col1, 0, 30);

		memset(col1, 0, sizeof(col1));

> +		memset(col4, 0, 400);

		memset(col4, 0, sizeof(col4));

Thanks,
Gao Xiang

> +		if (i == len - 1)
> +			strcpy(col1, " others");
> +		else if (i <= 6)
> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
> +		else
> +
> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
> +		col2 = file_counts[i];
> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
> +		memset(col4, '#', col3 / 2);
> +		dumpfs_print_chart_row(col1, col2, col3, col4);
> +		lowerbound = upperbound;
> +		upperbound <<= 1;
> +	}
> +}
> +
> +static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
> +{
> +	char col1[30];
> +	unsigned int col2;
> +	double col3;
> +	char col4[401];
> +
> +	fprintf(stderr, header_format, "type", "count", "ratio",
> +			"distribution");
> +	for (int i = 0; i < len; i++) {
> +		memset(col1, 0, 30);
> +		memset(col4, 0, 401);
> +		sprintf(col1, "%-17s", file_types[i]);
> +		col2 = stats.file_type_stat[i];
> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
> +		memset(col4, '#', col3 / 2);
> +		dumpfs_print_chart_row(col1, col2, col3, col4);
> +	}
> +}
> +
> +static void dumpfs_print_statistic_of_compression(void)
> +{
> +	stats.compress_rate = (double)(100 * stats.files_total_size) /
> +		(double)(stats.files_total_origin_size);
> +	fprintf(stderr, "Filesystem compressed files:         %lu\n",
> +			stats.compressed_files);
> +	fprintf(stderr, "Filesystem uncompressed files:       %lu\n",
> +			stats.uncompressed_files);
> +	fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
> +			stats.files_total_origin_size);
> +	fprintf(stderr, "Filesystem total file size:          %lu Bytes\n",
> +			stats.files_total_size);
> +	fprintf(stderr, "Filesystem compress rate:            %.2f%%\n",
> +			stats.compress_rate);
> +}
> +
> +static void dumpfs_print_statistic(void)
> +{
> +	int err;
> +
> +	stats.blocks = sbi.blocks;
> +	err = read_dir(sbi.root_nid, sbi.root_nid);
> +	if (err) {
> +		erofs_err("read dir failed");
> +		return;
> +	}
> +
> +	dumpfs_print_statistic_of_filetype();
> +	dumpfs_print_statistic_of_compression();
> +
> +	fprintf(stderr, "\nOriginal file size distribution:\n");
> +	dumpfs_print_chart_of_file(stats.file_org_size, 17);
> +	fprintf(stderr, "\nOn-Disk file size distribution:\n");
> +	dumpfs_print_chart_of_file(stats.file_comp_size, 17);
> +	fprintf(stderr, "\nFile type distribution:\n");
> +	dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	int err = 0;
> @@ -138,5 +608,9 @@ int main(int argc, char **argv)
>  	if (dumpcfg.print_superblock)
>  		dumpfs_print_superblock();
>  
> +	if (dumpcfg.print_statistic)
> +		dumpfs_print_statistic();
> +
> +
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number
  2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
@ 2021-09-11 16:25   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:25 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

On Sat, Sep 11, 2021 at 09:46:34PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
>  dump/main.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 200 insertions(+), 2 deletions(-)
> 
> diff --git a/dump/main.c b/dump/main.c
> index b0acc0b..2389cef 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -19,8 +19,10 @@
>  
>  struct dumpcfg {
>  	bool print_superblock;
> +	bool print_inode;
>  	bool print_statistic;
>  	bool print_version;
> +	u64 ino;
>  };
>  static struct dumpcfg dumpcfg;
>  
> @@ -100,8 +102,9 @@ static void usage(void)
>  {
>  	fputs("usage: [options] erofs-image \n\n"
>  		"Dump erofs layout from erofs-image, and [options] are:\n"
> -		"-s          print information about superblock\n"
> -		"-S      print statistic information of the erofs-image\n"
> +		"-s         print information about superblock\n"
> +		"-S         print statistic information of the erofs-image\n"
> +		"-i #       print target # inode info\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -113,6 +116,7 @@ static void dumpfs_print_version(void)
>  static int dumpfs_parse_options_cfg(int argc, char **argv)
>  {
>  	int opt;
> +	u64 i;
>  
>  	while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
>  					long_options, NULL)) != -1) {
> @@ -127,6 +131,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  		case 'V':
>  			dumpfs_print_version();
>  			exit(0);
> +		case 'i':
> +			i = atoll(optarg);
> +			dumpcfg.print_inode = true;
> +			dumpcfg.ino = i;
> +			break;
>  		case 'h':
>  		case 1:
>  		    usage();
> @@ -293,6 +302,193 @@ static void dumpfs_print_superblock(void)
>  
>  }
>  
> +static int get_path_by_nid(erofs_nid_t nid, erofs_nid_t parent_nid,
> +		erofs_nid_t target, char *path, unsigned int pos)

Can we refactor it as a transversal function (together with a function
in the previous patch)? Also, how to resolve hard links?

> +{
> +	int err;
> +	struct erofs_inode inode = {.nid = nid};
> +	erofs_off_t offset;
> +	char buf[EROFS_BLKSIZ];
> +
> +	path[pos++] = '/';
> +	if (target == sbi.root_nid)
> +		return 0;
> +
> +	err = erofs_read_inode_from_disk(&inode);
> +	if (err) {
> +		erofs_err("read inode %lu failed", nid);
> +		return err;
> +	}
> +
> +	offset = 0;
> +	while (offset < inode.i_size) {
> +		erofs_off_t maxsize = min_t(erofs_off_t,
> +					inode.i_size - offset, EROFS_BLKSIZ);
> +		struct erofs_dirent *de = (void *)buf;
> +		struct erofs_dirent *end;
> +		unsigned int nameoff;
> +
> +		err = erofs_pread(&inode, buf, maxsize, offset);
> +		if (err)
> +			return err;
> +
> +		nameoff = le16_to_cpu(de->nameoff);
> +		if (nameoff < sizeof(struct erofs_dirent) ||
> +		    nameoff >= PAGE_SIZE) {
> +			erofs_err("invalid de[0].nameoff %u @ nid %llu",
> +				  nameoff, nid | 0ULL);
> +			return -EFSCORRUPTED;
> +		}
> +
> +		end = (void *)buf + nameoff;
> +		while (de < end) {
> +			const char *dname;
> +			unsigned int dname_len;
> +
> +			nameoff = le16_to_cpu(de->nameoff);
> +			dname = (char *)buf + nameoff;
> +			if (de + 1 >= end)
> +				dname_len = strnlen(dname, maxsize - nameoff);
> +			else
> +				dname_len = le16_to_cpu(de[1].nameoff)
> +					- nameoff;
> +
> +			/* a corrupted entry is found */
> +			if (nameoff + dname_len > maxsize ||
> +			    dname_len > EROFS_NAME_LEN) {
> +				erofs_err("bogus dirent @ nid %llu",
> +						le64_to_cpu(de->nid) | 0ULL);
> +				DBG_BUGON(1);
> +				return -EFSCORRUPTED;
> +			}
> +
> +			if (de->nid == target) {
> +				memcpy(path + pos, dname, dname_len);
> +				return 0;
> +			}
> +
> +			if (de->file_type == EROFS_FT_DIR &&
> +					de->nid != parent_nid &&
> +					de->nid != nid) {
> +				memcpy(path + pos, dname, dname_len);
> +				err = get_path_by_nid(de->nid, nid,
> +						target, path, pos + dname_len);
> +				if (!err)
> +					return 0;
> +				memset(path + pos, 0, dname_len);
> +			}
> +			++de;
> +		}
> +		offset += maxsize;
> +	}
> +	return -1;
> +}
> +
> +static void dumpfs_print_inode(void)
> +{
> +	int err;
> +	erofs_off_t size;
> +	erofs_nid_t nid = dumpcfg.ino;
> +	struct erofs_inode inode = {.nid = nid};
> +	char path[PATH_MAX + 1] = {0};
> +	time_t t = inode.i_ctime;
> +
> +	err = erofs_read_inode_from_disk(&inode);
> +	if (err) {
> +		erofs_err("read inode %lu from disk failed", nid);
> +		return;
> +	}
> +
> +	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
> +	switch (inode.inode_isize) {
> +	case 32:
> +		fprintf(stderr, "	File inode is compacted layout\n");

It's enough to print "Inode core size: 32/64."

> +		break;
> +	case 64:
> +		fprintf(stderr, "	File inode is extended layout\n");
> +		break;
> +	default:
> +		erofs_err("unsupported inode layout\n");
> +	}
> +	fprintf(stderr, "	File size:		%lu\n",
> +			inode.i_size);
> +	fprintf(stderr, "	File nid:		%lu\n",
> +			inode.nid);
> +	fprintf(stderr, "	File extent size:	%u\n",
> +			inode.extent_isize);
> +	fprintf(stderr, "	File xattr size:	%u\n",
> +			inode.xattr_isize);
> +	fprintf(stderr, "	File inode size:	%u\n",
> +			inode.inode_isize);
> +	fprintf(stderr, "	File type:		");
> +	switch (inode.i_mode & S_IFMT) {
> +	case S_IFREG:
> +		fprintf(stderr, "regular\n");
> +		break;
> +	case S_IFDIR:
> +		fprintf(stderr, "directory\n");
> +		break;
> +	case S_IFLNK:
> +		fprintf(stderr, "link\n");
> +		break;
> +	case S_IFCHR:
> +		fprintf(stderr, "character device\n");
> +		break;
> +	case S_IFBLK:
> +		fprintf(stderr, "block device\n");
> +		break;
> +	case S_IFIFO:
> +		fprintf(stderr, "fifo\n");
> +		break;
> +	case S_IFSOCK:
> +		fprintf(stderr, "sock\n");
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	err = get_file_compressed_size(&inode, &size);
> +	if (err) {
> +		erofs_err("get file size failed\n");
> +		return;
> +	}
> +
> +	fprintf(stderr, "	File original size:	%lu\n"
> +			"	File on-disk size:	%lu\n",
> +			inode.i_size, size);
> +	fprintf(stderr, "	File compress rate:	%.2f%%\n",
> +			(double)(100 * size) / (double)(inode.i_size));

I think we could use "compressed blocks" instead...

> +
> +	fprintf(stderr, "	File datalayout:	");
> +	switch (inode.datalayout) {
> +	case EROFS_INODE_FLAT_PLAIN:
> +		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
> +		break;
> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> +		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION_LEGACY\n");
> +		break;
> +	case EROFS_INODE_FLAT_INLINE:
> +		fprintf(stderr, "EROFS_INODE_FLAT_INLINE\n");
> +		break;
> +	case EROFS_INODE_FLAT_COMPRESSION:
> +		fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION\n");
> +		break;

Just using a number is fine, since there could be some new types in the
future (also I'd like to rename EROFS_INODE_FLAT_COMPRESSION_LEGACY later.)


> +	default:
> +		break;
> +	}
> +
> +	fprintf(stderr, "	File create time:	%s", ctime(&t));
> +	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
> +	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);

Lack of Access mode.

> +	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);

Anyway...How about just using "stat" likewise style and add more fields?

  File: erofs.rst
  Size: 14035     	Blocks: 32         IO Block: 4096   regular file
Device: 10303h/66307d	Inode: 7120988     Links: 1
Access: (0644/-rw-r--r--)  Uid: ( 1000/hsiangkao)   Gid: ( 1000/hsiangkao)
Access: 2021-09-11 00:42:02.748083341 +0800
Modify: 2021-09-03 02:54:32.188031546 +0800
Change: 2021-09-03 02:54:32.188031546 +0800
 Birth: -

Thanks,
Gao Xiang

> +
> +	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> +	if (!err)
> +		fprintf(stderr, "	File path:		%s\n", path);
> +	else
> +		fprintf(stderr, "Path not found\n");
> +}
> +
>  static int get_file_type(const char *filename)
>  {
>  	char *postfix = strrchr(filename, '.');
> @@ -611,6 +807,8 @@ int main(int argc, char **argv)
>  	if (dumpcfg.print_statistic)
>  		dumpfs_print_statistic();
>  
> +	if (dumpcfg.print_inode)
> +		dumpfs_print_inode();
>  
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk
  2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
@ 2021-09-11 16:29   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:29 UTC (permalink / raw)
  To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet

On Sat, Sep 11, 2021 at 09:46:35PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
> 
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
>  dump/main.c | 108 +++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 91 insertions(+), 17 deletions(-)
> 
> diff --git a/dump/main.c b/dump/main.c
> index 2389cef..efce309 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -21,8 +21,10 @@ struct dumpcfg {
>  	bool print_superblock;
>  	bool print_inode;
>  	bool print_statistic;
> +	bool print_inode_phy;
>  	bool print_version;
>  	u64 ino;
> +	u64 ino_phy;
>  };
>  static struct dumpcfg dumpcfg;
>  
> @@ -105,6 +107,7 @@ static void usage(void)
>  		"-s         print information about superblock\n"
>  		"-S         print statistic information of the erofs-image\n"
>  		"-i #       print target # inode info\n"
> +		"-I #       print target # inode on-disk info\n"
>  		"-v/-V      print dump.erofs version info\n"
>  		"-h/--help  display this help and exit\n", stderr);
>  }
> @@ -136,6 +139,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>  			dumpcfg.print_inode = true;
>  			dumpcfg.ino = i;
>  			break;
> +		case 'I':
> +			i = atoll(optarg);
> +			dumpcfg.print_inode_phy = true;
> +			dumpcfg.ino_phy = i;
> +			break;
>  		case 'h':
>  		case 1:
>  		    usage();
> @@ -402,25 +410,25 @@ static void dumpfs_print_inode(void)
>  	fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
>  	switch (inode.inode_isize) {
>  	case 32:
> -		fprintf(stderr, "	File inode is compacted layout\n");
> +		fprintf(stderr, "File inode is compacted layout\n");
>  		break;
>  	case 64:
> -		fprintf(stderr, "	File inode is extended layout\n");
> +		fprintf(stderr, "File inode is extended layout\n");
>  		break;
>  	default:
>  		erofs_err("unsupported inode layout\n");
>  	}
> -	fprintf(stderr, "	File size:		%lu\n",
> +	fprintf(stderr, "File size:		%lu\n",

How about folding these in the previous patch?

>  			inode.i_size);
> -	fprintf(stderr, "	File nid:		%lu\n",
> +	fprintf(stderr, "File nid:		%lu\n",
>  			inode.nid);
> -	fprintf(stderr, "	File extent size:	%u\n",
> +	fprintf(stderr, "File extent size:	%u\n",
>  			inode.extent_isize);
> -	fprintf(stderr, "	File xattr size:	%u\n",
> +	fprintf(stderr, "File xattr size:	%u\n",
>  			inode.xattr_isize);
> -	fprintf(stderr, "	File inode size:	%u\n",
> +	fprintf(stderr, "File inode size:	%u\n",
>  			inode.inode_isize);
> -	fprintf(stderr, "	File type:		");
> +	fprintf(stderr, "File type:		");
>  	switch (inode.i_mode & S_IFMT) {
>  	case S_IFREG:
>  		fprintf(stderr, "regular\n");
> @@ -453,13 +461,13 @@ static void dumpfs_print_inode(void)
>  		return;
>  	}
>  
> -	fprintf(stderr, "	File original size:	%lu\n"
> -			"	File on-disk size:	%lu\n",
> +	fprintf(stderr, "File original size:	%lu\n"
> +			"File on-disk size:	%lu\n",
>  			inode.i_size, size);
> -	fprintf(stderr, "	File compress rate:	%.2f%%\n",
> +	fprintf(stderr, "File compress rate:	%.2f%%\n",
>  			(double)(100 * size) / (double)(inode.i_size));
>  
> -	fprintf(stderr, "	File datalayout:	");
> +	fprintf(stderr, "File datalayout:	");
>  	switch (inode.datalayout) {
>  	case EROFS_INODE_FLAT_PLAIN:
>  		fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
> @@ -477,18 +485,82 @@ static void dumpfs_print_inode(void)
>  		break;
>  	}
>  
> -	fprintf(stderr, "	File create time:	%s", ctime(&t));
> -	fprintf(stderr, "	File uid:		%u\n", inode.i_uid);
> -	fprintf(stderr, "	File gid:		%u\n", inode.i_gid);
> -	fprintf(stderr, "	File hard-link count:	%u\n", inode.i_nlink);
> +	fprintf(stderr, "File create time:	%s", ctime(&t));
> +	fprintf(stderr, "File uid:		%u\n", inode.i_uid);
> +	fprintf(stderr, "File gid:		%u\n", inode.i_gid);
> +	fprintf(stderr, "File hard-link count:	%u\n", inode.i_nlink);
>  
>  	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
>  	if (!err)
> -		fprintf(stderr, "	File path:		%s\n", path);
> +		fprintf(stderr, "File path:		%s\n", path);
>  	else
>  		fprintf(stderr, "Path not found\n");
>  }
>  
> +static void dumpfs_print_inode_phy(void)
> +{
> +	int err;
> +	erofs_nid_t nid = dumpcfg.ino_phy;
> +	struct erofs_inode inode = {.nid = nid};
> +	char path[PATH_MAX + 1] = {0};
> +
> +	err = erofs_read_inode_from_disk(&inode);
> +	if (err < 0) {
> +		erofs_err("read inode %lu from disk failed", nid);
> +		return;
> +	}
> +
> +	const erofs_off_t ibase = iloc(inode.nid);
> +	const erofs_off_t pos = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(
> +			ibase + inode.inode_isize + inode.xattr_isize);
> +	erofs_blk_t blocks = inode.u.i_blocks;
> +	erofs_blk_t start = 0;
> +	erofs_blk_t end = 0;
> +	struct erofs_map_blocks map = {
> +		.index = UINT_MAX,
> +		.m_la = 0,
> +	};
> +
> +	fprintf(stderr, "Inode %lu on-disk info:\n", nid);
> +	switch (inode.datalayout) {
> +	case EROFS_INODE_FLAT_INLINE:
> +	case EROFS_INODE_FLAT_PLAIN:
> +		if (inode.u.i_blkaddr == NULL_ADDR)
> +			start = end = erofs_blknr(pos);
> +		else {
> +			start = inode.u.i_blkaddr;
> +			end = start + BLK_ROUND_UP(inode.i_size) - 1;
> +		}
> +		fprintf(stderr, "File size:			%lu\n",
> +				inode.i_size);
> +		fprintf(stderr,
> +				"	Plain Block Address:		%u - %u\n",
> +				start, end);
> +		break;
> +
> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> +	case EROFS_INODE_FLAT_COMPRESSION:
> +		err = z_erofs_map_blocks_iter(&inode, &map);
> +		if (err)
> +			erofs_err("get file blocks range failed");
> +
> +		start = erofs_blknr(map.m_pa);
> +		end = start - 1 + blocks;
> +		fprintf(stderr,
> +				"	Compressed Block Address:	%u - %u\n",
> +				start, end);

How about porting/using fiemap code directly instead?

Thanks,
Gao Xiang

> +		break;
> +	}
> +
> +	err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> +	if (!err)
> +		fprintf(stderr, "File Path:			%s\n",
> +				path);
> +	else
> +		erofs_err("path not found");
> +}
> +
> +
>  static int get_file_type(const char *filename)
>  {
>  	char *postfix = strrchr(filename, '.');
> @@ -810,5 +882,7 @@ int main(int argc, char **argv)
>  	if (dumpcfg.print_inode)
>  		dumpfs_print_inode();
>  
> +	if (dumpcfg.print_inode_phy)
> +		dumpfs_print_inode_phy();
>  	return 0;
>  }
> -- 
> 2.25.4
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-11 16:13   ` Gao Xiang
@ 2021-09-13  4:30     ` Huang Jianan via Linux-erofs
  2021-09-13 12:46       ` Gao Xiang
  0 siblings, 1 reply; 13+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-09-13  4:30 UTC (permalink / raw)
  To: xiang; +Cc: linux-erofs, mpiglet

在 2021/9/12 0:13, Gao Xiang 写道:
> (+Cc Jianan.)
>
> On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> From: mpiglet <mpiglet@outlook.com>
>>
>> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
>> Signed-off-by: mpiglet <mpiglet@outlook.com>
>> ---
>>   dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 474 insertions(+)
>>
>> diff --git a/dump/main.c b/dump/main.c
>> index 25ac89f..b0acc0b 100644
>> --- a/dump/main.c
>> +++ b/dump/main.c
>> @@ -19,10 +19,78 @@
>>   
>>   struct dumpcfg {
>>   	bool print_superblock;
>> +	bool print_statistic;
>>   	bool print_version;
>>   };
>>   static struct dumpcfg dumpcfg;
>>   
>> +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
>> +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> +static char *file_types[] = {
>> +	".so",
>> +	".png",
>> +	".jpg",
>> +	".xml",
>> +	".html",
>> +	".odex",
>> +	".vdex",
>> +	".apk",
>> +	".ttf",
>> +	".jar",
>> +	".json",
>> +	".ogg",
>> +	".oat",
>> +	".art",
>> +	".rc",
>> +	".otf",
>> +	".txt",
>> +	"others",
>> +};
>> +enum {
>> +	SOFILETYPE = 0,
>> +	PNGFILETYPE,
>> +	JPEGFILETYPE,
>> +	XMLFILETYPE,
>> +	HTMLFILETYPE,
>> +	ODEXFILETYPE,
>> +	VDEXFILETYPE,
>> +	APKFILETYPE,
>> +	TTFFILETYPE,
>> +	JARFILETYPE,
>> +	JSONFILETYPE,
>> +	OGGFILETYPE,
>> +	OATFILETYPE,
>> +	ARTFILETYPE,
>> +	RCFILETYPE,
>> +	OTFFILETYPE,
>> +	TXTFILETYPE,
>> +	OTHERFILETYPE,
>> +};
> Why we need enums here? Can these be resolved with some array index?
>
>> +
>> +#define	FILE_SIZE_BITS	30
>> +struct statistics {
>> +	unsigned long blocks;
>> +	unsigned long files;
>> +	unsigned long files_total_size;
>> +	unsigned long files_total_origin_size;
>> +	double compress_rate;
>> +	unsigned long compressed_files;
>> +	unsigned long uncompressed_files;
>> +
>> +	unsigned long regular_files;
>> +	unsigned long dir_files;
>> +	unsigned long chardev_files;
>> +	unsigned long blkdev_files;
>> +	unsigned long fifo_files;
>> +	unsigned long sock_files;
>> +	unsigned long symlink_files;
>> +
>> +	unsigned int file_type_stat[OTHERFILETYPE + 1];
>> +	unsigned int file_org_size[FILE_SIZE_BITS];
> What do "FILE_SIZE_BITS" and "file_org_size" mean?
>
>> +	unsigned int file_comp_size[FILE_SIZE_BITS];
>> +};
>> +static struct statistics stats;
>> +
>>   static struct option long_options[] = {
>>   	{"help", no_argument, 0, 1},
>>   	{0, 0, 0, 0},
>> @@ -33,6 +101,7 @@ static void usage(void)
>>   	fputs("usage: [options] erofs-image \n\n"
>>   		"Dump erofs layout from erofs-image, and [options] are:\n"
>>   		"-s          print information about superblock\n"
>> +		"-S      print statistic information of the erofs-image\n"
>>   		"-v/-V      print dump.erofs version info\n"
>>   		"-h/--help  display this help and exit\n", stderr);
>>   }
>> @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>>   		case 's':
>>   			dumpcfg.print_superblock = true;
>>   			break;
>> +		case 'S':
>> +			dumpcfg.print_statistic = true;
>> +			break;
>>   		case 'v':
>>   		case 'V':
>>   			dumpfs_print_version();
>> @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>>   	return 0;
>>   }
>>   
>> +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
>> +		erofs_off_t last_cluster_size,
>> +		erofs_off_t *last_cluster_compressed_size)
> Hmmm... do we really need the exact compressed bytes?
> or just compressed blocks is enough?
>
> "compressed blocks" can be gotten in erofs inode.
>
> Btw, although I think it's useful for fsck (check if an erofs is correct).
>
>> +{
>> +	int ret;
>> +	int decomp_len;
>> +	int compressed_len = 0;
>> +	char *decompress;
>> +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> +
>> +	ret = dev_read(raw, map->m_pa, map->m_plen);
>> +	if (ret < 0)
>> +		return -EIO;
>> +
>> +	if (erofs_sb_has_lz4_0padding()) {
>> +		compressed_len = map->m_plen;
>> +	} else {
>> +		// lz4 maximum compression ratio is 255
>> +		decompress = (char *)malloc(map->m_plen * 255);
>> +		if (!decompress) {
>> +			erofs_err("allocate memory for decompress space failed");
>> +			return -1;
>> +		}
>> +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> +				map->m_plen, last_cluster_size,
>> +				map->m_plen * 10);
>> +		if (decomp_len < 0) {
>> +			erofs_err("decompress last cluster to get decompressed size failed");
>> +			free(decompress);
>> +			return -1;
>> +		}
>> +		compressed_len = LZ4_compress_destSize(decompress, raw,
>> +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> +		if (compressed_len < 0) {
>> +			erofs_err("compress to get last extent size failed\n");
>> +			free(decompress);
>> +			return -1;
>> +		}
>> +		free(decompress);
>> +		// dut to the use of lz4hc (can use different compress level),
>> +		// our normal lz4 compress result may be bigger
>> +		compressed_len = compressed_len < map->m_plen ?
>> +			compressed_len : map->m_plen;
>> +	}
>> +
>> +	*last_cluster_compressed_size = compressed_len;
>> +	return 0;
>> +}
>> +
>> +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> +		erofs_off_t *size)
>> +{
>> +	int err;
>> +	erofs_blk_t compressedlcs;
>> +	erofs_off_t last_cluster_size;
>> +	erofs_off_t last_cluster_compressed_size;
>> +	struct erofs_map_blocks map = {
>> +		.index = UINT_MAX,
>> +		.m_la = inode->i_size - 1,
>> +	};
>> +
>> +	err = z_erofs_map_blocks_iter(inode, &map);
> (add Jianan here.)
>
> Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> some functionality to get the file distribution as well when the fs isn't
> mounted?
Hi Xiang,

I have sent the patch and verified it with a similar function. Better to 
use the
new interface here.

Thanks,
Jianan
>
>> +	if (err) {
>> +		erofs_err("read nid %ld's last block failed\n", inode->nid);
>> +		return err;
>> +	}
>> +	compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
>> +	*size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
>> +	last_cluster_size = inode->i_size - map.m_la;
>> +
>> +	if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
>> +		*size += last_cluster_size;
>> +	} else {
>> +		err = z_erofs_get_last_cluster_size_from_disk(&map,
>> +				last_cluster_size,
>> +				&last_cluster_compressed_size);
>> +		if (err) {
>> +			erofs_err("get nid %ld's last extent size failed",
>> +					inode->nid);
>> +			return err;
>> +		}
>> +		*size += last_cluster_compressed_size;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int get_file_compressed_size(struct erofs_inode *inode,
>> +		erofs_off_t *size)
> erofs_dump_get_file_occupied_blocks?
>
>> +{
>> +	int err;
>> +
>> +	*size = 0;
>> +	switch (inode->datalayout) {
>> +	case EROFS_INODE_FLAT_INLINE:
>> +	case EROFS_INODE_FLAT_PLAIN:
>> +		stats.uncompressed_files++;
>> +		*size = inode->i_size;
>> +		break;
>> +	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
>> +	case EROFS_INODE_FLAT_COMPRESSION:
>> +		stats.compressed_files++;
>> +		err = z_erofs_get_compressed_size(inode, size);
>> +		if (err) {
>> +			erofs_err("get compressed file size failed\n");
>> +			return err;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>>   static void dumpfs_print_superblock(void)
>>   {
>>   	time_t time = sbi.build_time;
>> @@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
>>   
>>   }
>>   
>> +static int get_file_type(const char *filename)
>> +{
>> +	char *postfix = strrchr(filename, '.');
>> +	int type = SOFILETYPE;
>> +
>> +	if (postfix == NULL)
>> +		return OTHERFILETYPE;
>> +	while (type < OTHERFILETYPE) {
>> +		if (strcmp(postfix, file_types[type]) == 0)
>> +			break;
>> +		type++;
>> +	}
>> +	return type;
>> +}
>> +
>> +// file count、file size、file type
> It'd be better to avoid C++ comments...
>
>> +static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
>> +{
>> +	struct erofs_inode vi = { .nid = nid};
>> +	int err;
>> +	char buf[EROFS_BLKSIZ];
>> +	char filename[PATH_MAX + 1];
>> +	erofs_off_t offset;
>> +
>> +	err = erofs_read_inode_from_disk(&vi);
>> +	if (err)
>> +		return err;
>> +
>> +	offset = 0;
>> +	while (offset < vi.i_size) {
>> +		erofs_off_t maxsize = min_t(erofs_off_t,
>> +			vi.i_size - offset, EROFS_BLKSIZ);
>> +		struct erofs_dirent *de = (void *)buf;
>> +		struct erofs_dirent *end;
>> +		unsigned int nameoff;
>> +
>> +		err = erofs_pread(&vi, buf, maxsize, offset);
>> +		if (err)
>> +			return err;
>> +
>> +		nameoff = le16_to_cpu(de->nameoff);
>> +
>> +		if (nameoff < sizeof(struct erofs_dirent) ||
>> +		    nameoff >= PAGE_SIZE) {
>> +			erofs_err("invalid de[0].nameoff %u @ nid %llu",
>> +				  nameoff, nid | 0ULL);
>> +			return -EFSCORRUPTED;
>> +		}
>> +		end = (void *)buf + nameoff;
>> +		while (de < end) {
>> +			const char *dname;
>> +			unsigned int dname_len;
>> +			struct erofs_inode inode = { .nid = de->nid };
>> +			int actual_size_mark;
>> +			int original_size_mark;
>> +			erofs_off_t actual_size = 0;
>> +			erofs_off_t original_size;
>> +
>> +			nameoff = le16_to_cpu(de->nameoff);
>> +			dname = (char *)buf + nameoff;
>> +
>> +			if (de + 1 >= end)
>> +				dname_len = strnlen(dname, maxsize - nameoff);
>> +			else
>> +				dname_len =
>> +					le16_to_cpu(de[1].nameoff) - nameoff;
>> +
>> +			/* a corrupted entry is found */
>> +			if (nameoff + dname_len > maxsize ||
>> +				dname_len > EROFS_NAME_LEN) {
>> +				erofs_err("bogus dirent @ nid %llu",
>> +						le64_to_cpu(de->nid) | 0ULL);
>> +				DBG_BUGON(1);
>> +				return -EFSCORRUPTED;
>> +			}
>> +			if (de->nid != nid && de->nid != parent_nid)
>> +				stats.files++;
>> +
>> +			memset(filename, 0, PATH_MAX + 1);
>> +			memcpy(filename, dname, dname_len);
>> +
>> +			switch (de->file_type) {
>> +			case EROFS_FT_UNKNOWN:
>> +				break;
>> +			case EROFS_FT_REG_FILE:
>> +				err = erofs_read_inode_from_disk(&inode);
>> +				if (err) {
>> +					erofs_err("read file inode from disk failed!");
>> +					return err;
>> +				}
>> +				original_size = inode.i_size;
>> +				stats.files_total_origin_size += original_size;
>> +				stats.regular_files++;
>> +
>> +				err = get_file_compressed_size(&inode,
>> +						&actual_size);
>> +				if (err) {
>> +					erofs_err("get file size failed\n");
>> +					return err;
>> +				}
>> +				stats.files_total_size += actual_size;
>> +				stats.file_type_stat[get_file_type(filename)]++;
>> +
>> +				original_size_mark = 0;
>> +				actual_size_mark = 0;
>> +				actual_size >>= 10;
>> +				original_size >>= 10;
>> +
>> +				while (actual_size || original_size) {
>> +					if (actual_size) {
>> +						actual_size >>= 1;
>> +						actual_size_mark++;
>> +					}
>> +					if (original_size) {
>> +						original_size >>= 1;
>> +						original_size_mark++;
>> +					}
>> +				}
>> +
>> +				if (original_size_mark >= FILE_SIZE_BITS - 1)
>> +					stats.file_org_size[FILE_SIZE_BITS - 1]++;
>> +				else
>> +					stats.file_org_size[original_size_mark]++;
>> +				if (actual_size_mark >= FILE_SIZE_BITS - 1)
>> +					stats.file_comp_size[FILE_SIZE_BITS - 1]++;
>> +				else
>> +					stats.file_comp_size[actual_size_mark]++;
>> +				break;
>> +
>> +			case EROFS_FT_DIR:
>> +				if (de->nid != nid && de->nid != parent_nid) {
>
>
>> +					stats.dir_files++;
>> +					stats.uncompressed_files++;
>> +					err = read_dir(de->nid, nid);
>> +					if (err) {
>> +						fprintf(stderr,
>> +								"parse dir nid %llu error occurred\n",
>> +								de->nid);
>> +						return err;
>> +					}
>> +				}
>> +				break;
>> +			case EROFS_FT_CHRDEV:
>> +				stats.chardev_files++;
>> +				stats.uncompressed_files++;
> How about using an array instead?
>
>> +				break;
>> +			case EROFS_FT_BLKDEV:
>> +				stats.blkdev_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			case EROFS_FT_FIFO:
>> +				stats.fifo_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			case EROFS_FT_SOCK:
>> +				stats.sock_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			case EROFS_FT_SYMLINK:
>> +				stats.symlink_files++;
>> +				stats.uncompressed_files++;
>> +				break;
>> +			}
>> +			++de;
>> +		}
>> +		offset += maxsize;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static void dumpfs_print_statistic_of_filetype(void)
>> +{
>> +	fprintf(stderr, "Filesystem total file count:         %lu\n",
>> +			stats.files);
>> +	fprintf(stderr, "Filesystem regular file count:       %lu\n",
>> +			stats.regular_files);
>> +	fprintf(stderr, "Filesystem directory count:          %lu\n",
>> +			stats.dir_files);
>> +	fprintf(stderr, "Filesystem symlink file count:       %lu\n",
>> +			stats.symlink_files);
>> +	fprintf(stderr, "Filesystem character device count:   %lu\n",
>> +			stats.chardev_files);
>> +	fprintf(stderr, "Filesystem block device count:       %lu\n",
>> +			stats.blkdev_files);
>> +	fprintf(stderr, "Filesystem FIFO file count:          %lu\n",
>> +			stats.fifo_files);
>> +	fprintf(stderr, "Filesystem SOCK file count:          %lu\n",
>> +			stats.sock_files);
> Also a loop can be used here.
>
>> +}
>> +
>> +static void dumpfs_print_chart_row(char *col1, unsigned int col2,
>> +		double col3, char *col4)
>> +{
>> +	char row[500] = {0};
>> +
>> +	sprintf(row, chart_format, col1, col2, col3, col4);
>> +	fprintf(stderr, row);
>> +}
>> +
>> +static void dumpfs_print_chart_of_file(unsigned int *file_counts,
>> +		unsigned int len)
>> +{
>> +	char col1[30];
>> +	unsigned int col2;
>> +	double col3;
>> +	char col4[400];
>> +	unsigned int lowerbound = 0;
>> +	unsigned int upperbound = 1;
>> +
>> +	fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
>> +			"ratio", "distribution");
>> +	for (int i = 0; i < len; i++) {
>> +		memset(col1, 0, 30);
> 		memset(col1, 0, sizeof(col1));
>
>> +		memset(col4, 0, 400);
> 		memset(col4, 0, sizeof(col4));
>
> Thanks,
> Gao Xiang
>
>> +		if (i == len - 1)
>> +			strcpy(col1, " others");
>> +		else if (i <= 6)
>> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
>> +		else
>> +
>> +			sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
>> +		col2 = file_counts[i];
>> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
>> +		memset(col4, '#', col3 / 2);
>> +		dumpfs_print_chart_row(col1, col2, col3, col4);
>> +		lowerbound = upperbound;
>> +		upperbound <<= 1;
>> +	}
>> +}
>> +
>> +static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
>> +{
>> +	char col1[30];
>> +	unsigned int col2;
>> +	double col3;
>> +	char col4[401];
>> +
>> +	fprintf(stderr, header_format, "type", "count", "ratio",
>> +			"distribution");
>> +	for (int i = 0; i < len; i++) {
>> +		memset(col1, 0, 30);
>> +		memset(col4, 0, 401);
>> +		sprintf(col1, "%-17s", file_types[i]);
>> +		col2 = stats.file_type_stat[i];
>> +		col3 = (double)(100 * col2) / (double)stats.regular_files;
>> +		memset(col4, '#', col3 / 2);
>> +		dumpfs_print_chart_row(col1, col2, col3, col4);
>> +	}
>> +}
>> +
>> +static void dumpfs_print_statistic_of_compression(void)
>> +{
>> +	stats.compress_rate = (double)(100 * stats.files_total_size) /
>> +		(double)(stats.files_total_origin_size);
>> +	fprintf(stderr, "Filesystem compressed files:         %lu\n",
>> +			stats.compressed_files);
>> +	fprintf(stderr, "Filesystem uncompressed files:       %lu\n",
>> +			stats.uncompressed_files);
>> +	fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
>> +			stats.files_total_origin_size);
>> +	fprintf(stderr, "Filesystem total file size:          %lu Bytes\n",
>> +			stats.files_total_size);
>> +	fprintf(stderr, "Filesystem compress rate:            %.2f%%\n",
>> +			stats.compress_rate);
>> +}
>> +
>> +static void dumpfs_print_statistic(void)
>> +{
>> +	int err;
>> +
>> +	stats.blocks = sbi.blocks;
>> +	err = read_dir(sbi.root_nid, sbi.root_nid);
>> +	if (err) {
>> +		erofs_err("read dir failed");
>> +		return;
>> +	}
>> +
>> +	dumpfs_print_statistic_of_filetype();
>> +	dumpfs_print_statistic_of_compression();
>> +
>> +	fprintf(stderr, "\nOriginal file size distribution:\n");
>> +	dumpfs_print_chart_of_file(stats.file_org_size, 17);
>> +	fprintf(stderr, "\nOn-Disk file size distribution:\n");
>> +	dumpfs_print_chart_of_file(stats.file_comp_size, 17);
>> +	fprintf(stderr, "\nFile type distribution:\n");
>> +	dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
>> +}
>> +
>>   int main(int argc, char **argv)
>>   {
>>   	int err = 0;
>> @@ -138,5 +608,9 @@ int main(int argc, char **argv)
>>   	if (dumpcfg.print_superblock)
>>   		dumpfs_print_superblock();
>>   
>> +	if (dumpcfg.print_statistic)
>> +		dumpfs_print_statistic();
>> +
>> +
>>   	return 0;
>>   }
>> -- 
>> 2.25.4
>>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-13  4:30     ` Huang Jianan via Linux-erofs
@ 2021-09-13 12:46       ` Gao Xiang
  2021-09-14  2:31         ` Guo Xuenan
  0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-09-13 12:46 UTC (permalink / raw)
  To: Huang Jianan; +Cc: linux-erofs, mpiglet

On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
> 在 2021/9/12 0:13, Gao Xiang 写道:
> > (+Cc Jianan.)
> > 
> > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> > > From: mpiglet <mpiglet@outlook.com>
> > > 
> > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
> > > ---
> > >   dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >   1 file changed, 474 insertions(+)
> > > 
> > > diff --git a/dump/main.c b/dump/main.c
> > > index 25ac89f..b0acc0b 100644
> > > --- a/dump/main.c
> > > +++ b/dump/main.c
> > > @@ -19,10 +19,78 @@
> > >   struct dumpcfg {
> > >   	bool print_superblock;
> > > +	bool print_statistic;
> > >   	bool print_version;
> > >   };
> > >   static struct dumpcfg dumpcfg;
> > > +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> > > +static char *file_types[] = {
> > > +	".so",
> > > +	".png",
> > > +	".jpg",
> > > +	".xml",
> > > +	".html",
> > > +	".odex",
> > > +	".vdex",
> > > +	".apk",
> > > +	".ttf",
> > > +	".jar",
> > > +	".json",
> > > +	".ogg",
> > > +	".oat",
> > > +	".art",
> > > +	".rc",
> > > +	".otf",
> > > +	".txt",
> > > +	"others",
> > > +};
> > > +enum {
> > > +	SOFILETYPE = 0,
> > > +	PNGFILETYPE,
> > > +	JPEGFILETYPE,
> > > +	XMLFILETYPE,
> > > +	HTMLFILETYPE,
> > > +	ODEXFILETYPE,
> > > +	VDEXFILETYPE,
> > > +	APKFILETYPE,
> > > +	TTFFILETYPE,
> > > +	JARFILETYPE,
> > > +	JSONFILETYPE,
> > > +	OGGFILETYPE,
> > > +	OATFILETYPE,
> > > +	ARTFILETYPE,
> > > +	RCFILETYPE,
> > > +	OTFFILETYPE,
> > > +	TXTFILETYPE,
> > > +	OTHERFILETYPE,
> > > +};
> > Why we need enums here? Can these be resolved with some array index?
> > 
> > > +
> > > +#define	FILE_SIZE_BITS	30
> > > +struct statistics {
> > > +	unsigned long blocks;
> > > +	unsigned long files;
> > > +	unsigned long files_total_size;
> > > +	unsigned long files_total_origin_size;
> > > +	double compress_rate;
> > > +	unsigned long compressed_files;
> > > +	unsigned long uncompressed_files;
> > > +
> > > +	unsigned long regular_files;
> > > +	unsigned long dir_files;
> > > +	unsigned long chardev_files;
> > > +	unsigned long blkdev_files;
> > > +	unsigned long fifo_files;
> > > +	unsigned long sock_files;
> > > +	unsigned long symlink_files;
> > > +
> > > +	unsigned int file_type_stat[OTHERFILETYPE + 1];
> > > +	unsigned int file_org_size[FILE_SIZE_BITS];
> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
> > 
> > > +	unsigned int file_comp_size[FILE_SIZE_BITS];
> > > +};
> > > +static struct statistics stats;
> > > +
> > >   static struct option long_options[] = {
> > >   	{"help", no_argument, 0, 1},
> > >   	{0, 0, 0, 0},
> > > @@ -33,6 +101,7 @@ static void usage(void)
> > >   	fputs("usage: [options] erofs-image \n\n"
> > >   		"Dump erofs layout from erofs-image, and [options] are:\n"
> > >   		"-s          print information about superblock\n"
> > > +		"-S      print statistic information of the erofs-image\n"
> > >   		"-v/-V      print dump.erofs version info\n"
> > >   		"-h/--help  display this help and exit\n", stderr);
> > >   }
> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > >   		case 's':
> > >   			dumpcfg.print_superblock = true;
> > >   			break;
> > > +		case 'S':
> > > +			dumpcfg.print_statistic = true;
> > > +			break;
> > >   		case 'v':
> > >   		case 'V':
> > >   			dumpfs_print_version();
> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > >   	return 0;
> > >   }
> > > +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> > > +		erofs_off_t last_cluster_size,
> > > +		erofs_off_t *last_cluster_compressed_size)
> > Hmmm... do we really need the exact compressed bytes?
> > or just compressed blocks is enough?
> > 
> > "compressed blocks" can be gotten in erofs inode.
> > 
> > Btw, although I think it's useful for fsck (check if an erofs is correct).
> > 
> > > +{
> > > +	int ret;
> > > +	int decomp_len;
> > > +	int compressed_len = 0;
> > > +	char *decompress;
> > > +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> > > +
> > > +	ret = dev_read(raw, map->m_pa, map->m_plen);
> > > +	if (ret < 0)
> > > +		return -EIO;
> > > +
> > > +	if (erofs_sb_has_lz4_0padding()) {
> > > +		compressed_len = map->m_plen;
> > > +	} else {
> > > +		// lz4 maximum compression ratio is 255
> > > +		decompress = (char *)malloc(map->m_plen * 255);
> > > +		if (!decompress) {
> > > +			erofs_err("allocate memory for decompress space failed");
> > > +			return -1;
> > > +		}
> > > +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> > > +				map->m_plen, last_cluster_size,
> > > +				map->m_plen * 10);
> > > +		if (decomp_len < 0) {
> > > +			erofs_err("decompress last cluster to get decompressed size failed");
> > > +			free(decompress);
> > > +			return -1;
> > > +		}
> > > +		compressed_len = LZ4_compress_destSize(decompress, raw,
> > > +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> > > +		if (compressed_len < 0) {
> > > +			erofs_err("compress to get last extent size failed\n");
> > > +			free(decompress);
> > > +			return -1;
> > > +		}
> > > +		free(decompress);
> > > +		// dut to the use of lz4hc (can use different compress level),
> > > +		// our normal lz4 compress result may be bigger
> > > +		compressed_len = compressed_len < map->m_plen ?
> > > +			compressed_len : map->m_plen;
> > > +	}
> > > +
> > > +	*last_cluster_compressed_size = compressed_len;
> > > +	return 0;
> > > +}
> > > +
> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> > > +		erofs_off_t *size)
> > > +{
> > > +	int err;
> > > +	erofs_blk_t compressedlcs;
> > > +	erofs_off_t last_cluster_size;
> > > +	erofs_off_t last_cluster_compressed_size;
> > > +	struct erofs_map_blocks map = {
> > > +		.index = UINT_MAX,
> > > +		.m_la = inode->i_size - 1,
> > > +	};
> > > +
> > > +	err = z_erofs_map_blocks_iter(inode, &map);
> > (add Jianan here.)
> > 
> > Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> > some functionality to get the file distribution as well when the fs isn't
> > mounted?
> Hi Xiang,
> 
> I have sent the patch and verified it with a similar function. Better to use
> the
> new interface here.

Yeah, thanks for the patch:
https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/

Hopefully Xuenan could base on this work.

Thanks,
Gao XIang

> 
> Thanks,
> Jianan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
  2021-09-13 12:46       ` Gao Xiang
@ 2021-09-14  2:31         ` Guo Xuenan
  0 siblings, 0 replies; 13+ messages in thread
From: Guo Xuenan @ 2021-09-14  2:31 UTC (permalink / raw)
  To: Gao Xiang, Huang Jianan; +Cc: linux-erofs, mpiglet

OK,I will  send out the patch V2  today, and it will  base on jianan's 
work.

在 2021/9/13 20:46, Gao Xiang 写道:
> On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
>> 在 2021/9/12 0:13, Gao Xiang 写道:
>> > (+Cc Jianan.)
>> > > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> > > From: mpiglet <mpiglet@outlook.com>
>> > > > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
>> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
>> > > ---
>> > >   dump/main.c | 474 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> > >   1 file changed, 474 insertions(+)
>> > > > > diff --git a/dump/main.c b/dump/main.c
>> > > index 25ac89f..b0acc0b 100644
>> > > --- a/dump/main.c
>> > > +++ b/dump/main.c
>> > > @@ -19,10 +19,78 @@
>> > >   struct dumpcfg {
>> > >       bool print_superblock;
>> > > +    bool print_statistic;
>> > >       bool print_version;
>> > >   };
>> > >   static struct dumpcfg dumpcfg;
>> > > +static const char chart_format[] = "%-16s    %-11d %8.2f%% 
>> |%-50s|\n";
>> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> > > +static char *file_types[] = {
>> > > +    ".so",
>> > > +    ".png",
>> > > +    ".jpg",
>> > > +    ".xml",
>> > > +    ".html",
>> > > +    ".odex",
>> > > +    ".vdex",
>> > > +    ".apk",
>> > > +    ".ttf",
>> > > +    ".jar",
>> > > +    ".json",
>> > > +    ".ogg",
>> > > +    ".oat",
>> > > +    ".art",
>> > > +    ".rc",
>> > > +    ".otf",
>> > > +    ".txt",
>> > > +    "others",
>> > > +};
>> > > +enum {
>> > > +    SOFILETYPE = 0,
>> > > +    PNGFILETYPE,
>> > > +    JPEGFILETYPE,
>> > > +    XMLFILETYPE,
>> > > +    HTMLFILETYPE,
>> > > +    ODEXFILETYPE,
>> > > +    VDEXFILETYPE,
>> > > +    APKFILETYPE,
>> > > +    TTFFILETYPE,
>> > > +    JARFILETYPE,
>> > > +    JSONFILETYPE,
>> > > +    OGGFILETYPE,
>> > > +    OATFILETYPE,
>> > > +    ARTFILETYPE,
>> > > +    RCFILETYPE,
>> > > +    OTFFILETYPE,
>> > > +    TXTFILETYPE,
>> > > +    OTHERFILETYPE,
>> > > +};
>> > Why we need enums here? Can these be resolved with some array index?
>> > > > +
>> > > +#define    FILE_SIZE_BITS    30
>> > > +struct statistics {
>> > > +    unsigned long blocks;
>> > > +    unsigned long files;
>> > > +    unsigned long files_total_size;
>> > > +    unsigned long files_total_origin_size;
>> > > +    double compress_rate;
>> > > +    unsigned long compressed_files;
>> > > +    unsigned long uncompressed_files;
>> > > +
>> > > +    unsigned long regular_files;
>> > > +    unsigned long dir_files;
>> > > +    unsigned long chardev_files;
>> > > +    unsigned long blkdev_files;
>> > > +    unsigned long fifo_files;
>> > > +    unsigned long sock_files;
>> > > +    unsigned long symlink_files;
>> > > +
>> > > +    unsigned int file_type_stat[OTHERFILETYPE + 1];
>> > > +    unsigned int file_org_size[FILE_SIZE_BITS];
>> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
>> > > > +    unsigned int file_comp_size[FILE_SIZE_BITS];
>> > > +};
>> > > +static struct statistics stats;
>> > > +
>> > >   static struct option long_options[] = {
>> > >       {"help", no_argument, 0, 1},
>> > >       {0, 0, 0, 0},
>> > > @@ -33,6 +101,7 @@ static void usage(void)
>> > >       fputs("usage: [options] erofs-image \n\n"
>> > >           "Dump erofs layout from erofs-image, and [options] are:\n"
>> > >           "-s          print information about superblock\n"
>> > > +        "-S      print statistic information of the erofs-image\n"
>> > >           "-v/-V      print dump.erofs version info\n"
>> > >           "-h/--help  display this help and exit\n", stderr);
>> > >   }
>> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, 
>> char **argv)
>> > >           case 's':
>> > >               dumpcfg.print_superblock = true;
>> > >               break;
>> > > +        case 'S':
>> > > +            dumpcfg.print_statistic = true;
>> > > +            break;
>> > >           case 'v':
>> > >           case 'V':
>> > >               dumpfs_print_version();
>> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int 
>> argc, char **argv)
>> > >       return 0;
>> > >   }
>> > > +static int z_erofs_get_last_cluster_size_from_disk(struct 
>> erofs_map_blocks *map,
>> > > +        erofs_off_t last_cluster_size,
>> > > +        erofs_off_t *last_cluster_compressed_size)
>> > Hmmm... do we really need the exact compressed bytes?
>> > or just compressed blocks is enough?
>> > > "compressed blocks" can be gotten in erofs inode.
>> > > Btw, although I think it's useful for fsck (check if an erofs is 
>> correct).
>> > > > +{
>> > > +    int ret;
>> > > +    int decomp_len;
>> > > +    int compressed_len = 0;
>> > > +    char *decompress;
>> > > +    char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> > > +
>> > > +    ret = dev_read(raw, map->m_pa, map->m_plen);
>> > > +    if (ret < 0)
>> > > +        return -EIO;
>> > > +
>> > > +    if (erofs_sb_has_lz4_0padding()) {
>> > > +        compressed_len = map->m_plen;
>> > > +    } else {
>> > > +        // lz4 maximum compression ratio is 255
>> > > +        decompress = (char *)malloc(map->m_plen * 255);
>> > > +        if (!decompress) {
>> > > +            erofs_err("allocate memory for decompress space 
>> failed");
>> > > +            return -1;
>> > > +        }
>> > > +        decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> > > +                map->m_plen, last_cluster_size,
>> > > +                map->m_plen * 10);
>> > > +        if (decomp_len < 0) {
>> > > +            erofs_err("decompress last cluster to get 
>> decompressed size failed");
>> > > +            free(decompress);
>> > > +            return -1;
>> > > +        }
>> > > +        compressed_len = LZ4_compress_destSize(decompress, raw,
>> > > +                &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> > > +        if (compressed_len < 0) {
>> > > +            erofs_err("compress to get last extent size failed\n");
>> > > +            free(decompress);
>> > > +            return -1;
>> > > +        }
>> > > +        free(decompress);
>> > > +        // dut to the use of lz4hc (can use different compress 
>> level),
>> > > +        // our normal lz4 compress result may be bigger
>> > > +        compressed_len = compressed_len < map->m_plen ?
>> > > +            compressed_len : map->m_plen;
>> > > +    }
>> > > +
>> > > +    *last_cluster_compressed_size = compressed_len;
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> > > +        erofs_off_t *size)
>> > > +{
>> > > +    int err;
>> > > +    erofs_blk_t compressedlcs;
>> > > +    erofs_off_t last_cluster_size;
>> > > +    erofs_off_t last_cluster_compressed_size;
>> > > +    struct erofs_map_blocks map = {
>> > > +        .index = UINT_MAX,
>> > > +        .m_la = inode->i_size - 1,
>> > > +    };
>> > > +
>> > > +    err = z_erofs_map_blocks_iter(inode, &map);
>> > (add Jianan here.)
>> > > Can we port the latest erofs kernel fiemap code to erofs-utils, 
>> and add
>> > some functionality to get the file distribution as well when the fs 
>> isn't
>> > mounted?
>> Hi Xiang,
>>
>> I have sent the patch and verified it with a similar function. Better 
>> to use
>> the
>> new interface here.
>
> Yeah, thanks for the patch:
> https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/ 
>
>
> Hopefully Xuenan could base on this work.
>
> Thanks,
> Gao XIang
>
>>
>> Thanks,
>> Jianan

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-09-14  2:32 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
2021-09-11 15:58   ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
2021-09-11 16:13   ` Gao Xiang
2021-09-13  4:30     ` Huang Jianan via Linux-erofs
2021-09-13 12:46       ` Gao Xiang
2021-09-14  2:31         ` Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
2021-09-11 16:25   ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
2021-09-11 16:29   ` Gao Xiang
2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.