* [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
2021-09-11 15:58 ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
To: xiang, linux-erofs; +Cc: mpiglet
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
dump/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/dump/main.c b/dump/main.c
index 8fbc24a..25ac89f 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -17,6 +17,12 @@
#include "erofs/print.h"
#include "erofs/io.h"
+struct dumpcfg {
+ bool print_superblock;
+ bool print_version;
+};
+static struct dumpcfg dumpcfg;
+
static struct option long_options[] = {
{"help", no_argument, 0, 1},
{0, 0, 0, 0},
@@ -26,6 +32,7 @@ static void usage(void)
{
fputs("usage: [options] erofs-image \n\n"
"Dump erofs layout from erofs-image, and [options] are:\n"
+ "-s print information about superblock\n"
"-v/-V print dump.erofs version info\n"
"-h/--help display this help and exit\n", stderr);
}
@@ -41,6 +48,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
long_options, NULL)) != -1) {
switch (opt) {
+ case 's':
+ dumpcfg.print_superblock = true;
+ break;
case 'v':
case 'V':
dumpfs_print_version();
@@ -68,6 +78,39 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
return 0;
}
+static void dumpfs_print_superblock(void)
+{
+ time_t time = sbi.build_time;
+
+ fprintf(stderr, "Filesystem magic number: 0x%04X\n", EROFS_SUPER_MAGIC_V1);
+ fprintf(stderr, "Filesystem blocks: %lu\n", sbi.blocks);
+ fprintf(stderr, "Filesystem meta block: %u\n", sbi.meta_blkaddr);
+ fprintf(stderr, "Filesystem xattr block: %u\n", sbi.xattr_blkaddr);
+ fprintf(stderr, "Filesystem root nid: %ld\n", sbi.root_nid);
+ fprintf(stderr, "Filesystem valid inos: %lu\n", sbi.inos);
+ fprintf(stderr, "Filesystem created: %s", ctime(&time));
+ fprintf(stderr, "Filesystem uuid: ");
+ for (int i = 0; i < 16; i++)
+ fprintf(stderr, "%02x", sbi.uuid[i]);
+ fprintf(stderr, "\n");
+
+ if (erofs_sb_has_lz4_0padding())
+ fprintf(stderr, "Filesystem support lz4 0padding\n");
+ else
+ fprintf(stderr, "Filesystem not support lz4 0padding\n");
+
+ if (erofs_sb_has_big_pcluster())
+ fprintf(stderr, "Filesystem support big pcluster\n");
+ else
+ fprintf(stderr, "Filesystem not support big pcluster\n");
+
+ if (erofs_sb_has_sb_chksum())
+ fprintf(stderr, "Filesystem has super block checksum feature\n");
+ else
+ fprintf(stderr, "Filesystem has no superblock checksum feature\n");
+
+}
+
int main(int argc, char **argv)
{
int err = 0;
@@ -80,5 +123,20 @@ int main(int argc, char **argv)
return -1;
}
+ err = dev_open_ro(cfg.c_img_path);
+ if (err) {
+ erofs_err("open image file failed");
+ return -1;
+ }
+
+ err = erofs_read_superblock();
+ if (err) {
+ erofs_err("read superblock failed");
+ return -1;
+ }
+
+ if (dumpcfg.print_superblock)
+ dumpfs_print_superblock();
+
return 0;
}
--
2.25.4
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information
2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
@ 2021-09-11 15:58 ` Gao Xiang
0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 15:58 UTC (permalink / raw)
To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet
On Sat, Sep 11, 2021 at 09:46:32PM +0800, Guo Xuenan wrote:
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
Same here.
> ---
> dump/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 58 insertions(+)
>
> diff --git a/dump/main.c b/dump/main.c
> index 8fbc24a..25ac89f 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -17,6 +17,12 @@
> #include "erofs/print.h"
> #include "erofs/io.h"
>
> +struct dumpcfg {
> + bool print_superblock;
> + bool print_version;
> +};
> +static struct dumpcfg dumpcfg;
> +
> static struct option long_options[] = {
> {"help", no_argument, 0, 1},
> {0, 0, 0, 0},
> @@ -26,6 +32,7 @@ static void usage(void)
> {
> fputs("usage: [options] erofs-image \n\n"
> "Dump erofs layout from erofs-image, and [options] are:\n"
> + "-s print information about superblock\n"
> "-v/-V print dump.erofs version info\n"
> "-h/--help display this help and exit\n", stderr);
> }
> @@ -41,6 +48,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
> long_options, NULL)) != -1) {
> switch (opt) {
> + case 's':
> + dumpcfg.print_superblock = true;
> + break;
> case 'v':
> case 'V':
> dumpfs_print_version();
> @@ -68,6 +78,39 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> return 0;
> }
>
> +static void dumpfs_print_superblock(void)
> +{
> + time_t time = sbi.build_time;
> +
> + fprintf(stderr, "Filesystem magic number: 0x%04X\n", EROFS_SUPER_MAGIC_V1);
> + fprintf(stderr, "Filesystem blocks: %lu\n", sbi.blocks);
> + fprintf(stderr, "Filesystem meta block: %u\n", sbi.meta_blkaddr);
Filesystem inode metadata start block:
> + fprintf(stderr, "Filesystem xattr block: %u\n", sbi.xattr_blkaddr);
Filesystem shared xattr metadata start block:
> + fprintf(stderr, "Filesystem root nid: %ld\n", sbi.root_nid);
> + fprintf(stderr, "Filesystem valid inos: %lu\n", sbi.inos);
Inode count:
> + fprintf(stderr, "Filesystem created: %s", ctime(&time));
> + fprintf(stderr, "Filesystem uuid: ");
Filesystem UUID:
How about printing to stdout directly? according to
dumpe2fs:
https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/misc/dumpe2fs.c#n219
Filesystem volume name: <none>
Last mounted on: /
Filesystem UUID: c46ea44a-e249-446f-af40-xxxxxxxxxxxx
Filesystem magic number: 0xEF53
Filesystem revision #: 1 (dynamic)
Filesystem features: has_journal ext_attr resize_inode dir_index filetype needs_recovery extent 64bit flex_bg sparse_super large_file huge_file dir_nlink extra_isize metadata_csum
Filesystem flags: signed_directory_hash
Default mount options: user_xattr acl
Filesystem state: clean
Errors behavior: Continue
Filesystem OS type: Linux
Inode count: 8003584
Block count: 32000000
Reserved block count: 1600000
Free blocks: 18661241
Free inodes: 7681550
First block: 0
Block size: 4096
Fragment size: 4096
> + for (int i = 0; i < 16; i++)
> + fprintf(stderr, "%02x", sbi.uuid[i]);
> + fprintf(stderr, "\n");
It seems not the correct UUID style...
> +
> + if (erofs_sb_has_lz4_0padding())
> + fprintf(stderr, "Filesystem support lz4 0padding\n");
> + else
> + fprintf(stderr, "Filesystem not support lz4 0padding\n");
> +
> + if (erofs_sb_has_big_pcluster())
> + fprintf(stderr, "Filesystem support big pcluster\n");
> + else
> + fprintf(stderr, "Filesystem not support big pcluster\n");
> +
> + if (erofs_sb_has_sb_chksum())
> + fprintf(stderr, "Filesystem has super block checksum feature\n");
> + else
> + fprintf(stderr, "Filesystem has no superblock checksum feature\n");
How about showing the features in a list as above?
Thanks,
Gao Xiang
> +
> +}
> +
> int main(int argc, char **argv)
> {
> int err = 0;
> @@ -80,5 +123,20 @@ int main(int argc, char **argv)
> return -1;
> }
>
> + err = dev_open_ro(cfg.c_img_path);
> + if (err) {
> + erofs_err("open image file failed");
> + return -1;
> + }
> +
> + err = erofs_read_superblock();
> + if (err) {
> + erofs_err("read superblock failed");
> + return -1;
> + }
> +
> + if (dumpcfg.print_superblock)
> + dumpfs_print_superblock();
> +
> return 0;
> }
> --
> 2.25.4
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
2021-09-11 16:13 ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
` (2 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
To: xiang, linux-erofs; +Cc: mpiglet
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="y", Size: 13813 bytes --]
From: mpiglet <mpiglet@outlook.com>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 474 insertions(+)
diff --git a/dump/main.c b/dump/main.c
index 25ac89f..b0acc0b 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -19,10 +19,78 @@
struct dumpcfg {
bool print_superblock;
+ bool print_statistic;
bool print_version;
};
static struct dumpcfg dumpcfg;
+static const char chart_format[] = "%-16s %-11d %8.2f%% |%-50s|\n";
+static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
+static char *file_types[] = {
+ ".so",
+ ".png",
+ ".jpg",
+ ".xml",
+ ".html",
+ ".odex",
+ ".vdex",
+ ".apk",
+ ".ttf",
+ ".jar",
+ ".json",
+ ".ogg",
+ ".oat",
+ ".art",
+ ".rc",
+ ".otf",
+ ".txt",
+ "others",
+};
+enum {
+ SOFILETYPE = 0,
+ PNGFILETYPE,
+ JPEGFILETYPE,
+ XMLFILETYPE,
+ HTMLFILETYPE,
+ ODEXFILETYPE,
+ VDEXFILETYPE,
+ APKFILETYPE,
+ TTFFILETYPE,
+ JARFILETYPE,
+ JSONFILETYPE,
+ OGGFILETYPE,
+ OATFILETYPE,
+ ARTFILETYPE,
+ RCFILETYPE,
+ OTFFILETYPE,
+ TXTFILETYPE,
+ OTHERFILETYPE,
+};
+
+#define FILE_SIZE_BITS 30
+struct statistics {
+ unsigned long blocks;
+ unsigned long files;
+ unsigned long files_total_size;
+ unsigned long files_total_origin_size;
+ double compress_rate;
+ unsigned long compressed_files;
+ unsigned long uncompressed_files;
+
+ unsigned long regular_files;
+ unsigned long dir_files;
+ unsigned long chardev_files;
+ unsigned long blkdev_files;
+ unsigned long fifo_files;
+ unsigned long sock_files;
+ unsigned long symlink_files;
+
+ unsigned int file_type_stat[OTHERFILETYPE + 1];
+ unsigned int file_org_size[FILE_SIZE_BITS];
+ unsigned int file_comp_size[FILE_SIZE_BITS];
+};
+static struct statistics stats;
+
static struct option long_options[] = {
{"help", no_argument, 0, 1},
{0, 0, 0, 0},
@@ -33,6 +101,7 @@ static void usage(void)
fputs("usage: [options] erofs-image \n\n"
"Dump erofs layout from erofs-image, and [options] are:\n"
"-s print information about superblock\n"
+ "-S print statistic information of the erofs-image\n"
"-v/-V print dump.erofs version info\n"
"-h/--help display this help and exit\n", stderr);
}
@@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
case 's':
dumpcfg.print_superblock = true;
break;
+ case 'S':
+ dumpcfg.print_statistic = true;
+ break;
case 'v':
case 'V':
dumpfs_print_version();
@@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
return 0;
}
+static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
+ erofs_off_t last_cluster_size,
+ erofs_off_t *last_cluster_compressed_size)
+{
+ int ret;
+ int decomp_len;
+ int compressed_len = 0;
+ char *decompress;
+ char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
+
+ ret = dev_read(raw, map->m_pa, map->m_plen);
+ if (ret < 0)
+ return -EIO;
+
+ if (erofs_sb_has_lz4_0padding()) {
+ compressed_len = map->m_plen;
+ } else {
+ // lz4 maximum compression ratio is 255
+ decompress = (char *)malloc(map->m_plen * 255);
+ if (!decompress) {
+ erofs_err("allocate memory for decompress space failed");
+ return -1;
+ }
+ decomp_len = LZ4_decompress_safe_partial(raw, decompress,
+ map->m_plen, last_cluster_size,
+ map->m_plen * 10);
+ if (decomp_len < 0) {
+ erofs_err("decompress last cluster to get decompressed size failed");
+ free(decompress);
+ return -1;
+ }
+ compressed_len = LZ4_compress_destSize(decompress, raw,
+ &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
+ if (compressed_len < 0) {
+ erofs_err("compress to get last extent size failed\n");
+ free(decompress);
+ return -1;
+ }
+ free(decompress);
+ // dut to the use of lz4hc (can use different compress level),
+ // our normal lz4 compress result may be bigger
+ compressed_len = compressed_len < map->m_plen ?
+ compressed_len : map->m_plen;
+ }
+
+ *last_cluster_compressed_size = compressed_len;
+ return 0;
+}
+
+static int z_erofs_get_compressed_size(struct erofs_inode *inode,
+ erofs_off_t *size)
+{
+ int err;
+ erofs_blk_t compressedlcs;
+ erofs_off_t last_cluster_size;
+ erofs_off_t last_cluster_compressed_size;
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ .m_la = inode->i_size - 1,
+ };
+
+ err = z_erofs_map_blocks_iter(inode, &map);
+ if (err) {
+ erofs_err("read nid %ld's last block failed\n", inode->nid);
+ return err;
+ }
+ compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
+ *size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
+ last_cluster_size = inode->i_size - map.m_la;
+
+ if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
+ *size += last_cluster_size;
+ } else {
+ err = z_erofs_get_last_cluster_size_from_disk(&map,
+ last_cluster_size,
+ &last_cluster_compressed_size);
+ if (err) {
+ erofs_err("get nid %ld's last extent size failed",
+ inode->nid);
+ return err;
+ }
+ *size += last_cluster_compressed_size;
+ }
+ return 0;
+}
+
+static int get_file_compressed_size(struct erofs_inode *inode,
+ erofs_off_t *size)
+{
+ int err;
+
+ *size = 0;
+ switch (inode->datalayout) {
+ case EROFS_INODE_FLAT_INLINE:
+ case EROFS_INODE_FLAT_PLAIN:
+ stats.uncompressed_files++;
+ *size = inode->i_size;
+ break;
+ case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+ case EROFS_INODE_FLAT_COMPRESSION:
+ stats.compressed_files++;
+ err = z_erofs_get_compressed_size(inode, size);
+ if (err) {
+ erofs_err("get compressed file size failed\n");
+ return err;
+ }
+ }
+ return 0;
+}
+
static void dumpfs_print_superblock(void)
{
time_t time = sbi.build_time;
@@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
}
+static int get_file_type(const char *filename)
+{
+ char *postfix = strrchr(filename, '.');
+ int type = SOFILETYPE;
+
+ if (postfix == NULL)
+ return OTHERFILETYPE;
+ while (type < OTHERFILETYPE) {
+ if (strcmp(postfix, file_types[type]) == 0)
+ break;
+ type++;
+ }
+ return type;
+}
+
+// file count、file size、file type
+static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
+{
+ struct erofs_inode vi = { .nid = nid};
+ int err;
+ char buf[EROFS_BLKSIZ];
+ char filename[PATH_MAX + 1];
+ erofs_off_t offset;
+
+ err = erofs_read_inode_from_disk(&vi);
+ if (err)
+ return err;
+
+ offset = 0;
+ while (offset < vi.i_size) {
+ erofs_off_t maxsize = min_t(erofs_off_t,
+ vi.i_size - offset, EROFS_BLKSIZ);
+ struct erofs_dirent *de = (void *)buf;
+ struct erofs_dirent *end;
+ unsigned int nameoff;
+
+ err = erofs_pread(&vi, buf, maxsize, offset);
+ if (err)
+ return err;
+
+ nameoff = le16_to_cpu(de->nameoff);
+
+ if (nameoff < sizeof(struct erofs_dirent) ||
+ nameoff >= PAGE_SIZE) {
+ erofs_err("invalid de[0].nameoff %u @ nid %llu",
+ nameoff, nid | 0ULL);
+ return -EFSCORRUPTED;
+ }
+ end = (void *)buf + nameoff;
+ while (de < end) {
+ const char *dname;
+ unsigned int dname_len;
+ struct erofs_inode inode = { .nid = de->nid };
+ int actual_size_mark;
+ int original_size_mark;
+ erofs_off_t actual_size = 0;
+ erofs_off_t original_size;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ dname = (char *)buf + nameoff;
+
+ if (de + 1 >= end)
+ dname_len = strnlen(dname, maxsize - nameoff);
+ else
+ dname_len =
+ le16_to_cpu(de[1].nameoff) - nameoff;
+
+ /* a corrupted entry is found */
+ if (nameoff + dname_len > maxsize ||
+ dname_len > EROFS_NAME_LEN) {
+ erofs_err("bogus dirent @ nid %llu",
+ le64_to_cpu(de->nid) | 0ULL);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ if (de->nid != nid && de->nid != parent_nid)
+ stats.files++;
+
+ memset(filename, 0, PATH_MAX + 1);
+ memcpy(filename, dname, dname_len);
+
+ switch (de->file_type) {
+ case EROFS_FT_UNKNOWN:
+ break;
+ case EROFS_FT_REG_FILE:
+ err = erofs_read_inode_from_disk(&inode);
+ if (err) {
+ erofs_err("read file inode from disk failed!");
+ return err;
+ }
+ original_size = inode.i_size;
+ stats.files_total_origin_size += original_size;
+ stats.regular_files++;
+
+ err = get_file_compressed_size(&inode,
+ &actual_size);
+ if (err) {
+ erofs_err("get file size failed\n");
+ return err;
+ }
+ stats.files_total_size += actual_size;
+ stats.file_type_stat[get_file_type(filename)]++;
+
+ original_size_mark = 0;
+ actual_size_mark = 0;
+ actual_size >>= 10;
+ original_size >>= 10;
+
+ while (actual_size || original_size) {
+ if (actual_size) {
+ actual_size >>= 1;
+ actual_size_mark++;
+ }
+ if (original_size) {
+ original_size >>= 1;
+ original_size_mark++;
+ }
+ }
+
+ if (original_size_mark >= FILE_SIZE_BITS - 1)
+ stats.file_org_size[FILE_SIZE_BITS - 1]++;
+ else
+ stats.file_org_size[original_size_mark]++;
+ if (actual_size_mark >= FILE_SIZE_BITS - 1)
+ stats.file_comp_size[FILE_SIZE_BITS - 1]++;
+ else
+ stats.file_comp_size[actual_size_mark]++;
+ break;
+
+ case EROFS_FT_DIR:
+ if (de->nid != nid && de->nid != parent_nid) {
+ stats.dir_files++;
+ stats.uncompressed_files++;
+ err = read_dir(de->nid, nid);
+ if (err) {
+ fprintf(stderr,
+ "parse dir nid %llu error occurred\n",
+ de->nid);
+ return err;
+ }
+ }
+ break;
+ case EROFS_FT_CHRDEV:
+ stats.chardev_files++;
+ stats.uncompressed_files++;
+ break;
+ case EROFS_FT_BLKDEV:
+ stats.blkdev_files++;
+ stats.uncompressed_files++;
+ break;
+ case EROFS_FT_FIFO:
+ stats.fifo_files++;
+ stats.uncompressed_files++;
+ break;
+ case EROFS_FT_SOCK:
+ stats.sock_files++;
+ stats.uncompressed_files++;
+ break;
+ case EROFS_FT_SYMLINK:
+ stats.symlink_files++;
+ stats.uncompressed_files++;
+ break;
+ }
+ ++de;
+ }
+ offset += maxsize;
+ }
+ return 0;
+}
+
+static void dumpfs_print_statistic_of_filetype(void)
+{
+ fprintf(stderr, "Filesystem total file count: %lu\n",
+ stats.files);
+ fprintf(stderr, "Filesystem regular file count: %lu\n",
+ stats.regular_files);
+ fprintf(stderr, "Filesystem directory count: %lu\n",
+ stats.dir_files);
+ fprintf(stderr, "Filesystem symlink file count: %lu\n",
+ stats.symlink_files);
+ fprintf(stderr, "Filesystem character device count: %lu\n",
+ stats.chardev_files);
+ fprintf(stderr, "Filesystem block device count: %lu\n",
+ stats.blkdev_files);
+ fprintf(stderr, "Filesystem FIFO file count: %lu\n",
+ stats.fifo_files);
+ fprintf(stderr, "Filesystem SOCK file count: %lu\n",
+ stats.sock_files);
+}
+
+static void dumpfs_print_chart_row(char *col1, unsigned int col2,
+ double col3, char *col4)
+{
+ char row[500] = {0};
+
+ sprintf(row, chart_format, col1, col2, col3, col4);
+ fprintf(stderr, row);
+}
+
+static void dumpfs_print_chart_of_file(unsigned int *file_counts,
+ unsigned int len)
+{
+ char col1[30];
+ unsigned int col2;
+ double col3;
+ char col4[400];
+ unsigned int lowerbound = 0;
+ unsigned int upperbound = 1;
+
+ fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
+ "ratio", "distribution");
+ for (int i = 0; i < len; i++) {
+ memset(col1, 0, 30);
+ memset(col4, 0, 400);
+ if (i == len - 1)
+ strcpy(col1, " others");
+ else if (i <= 6)
+ sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
+ else
+
+ sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
+ col2 = file_counts[i];
+ col3 = (double)(100 * col2) / (double)stats.regular_files;
+ memset(col4, '#', col3 / 2);
+ dumpfs_print_chart_row(col1, col2, col3, col4);
+ lowerbound = upperbound;
+ upperbound <<= 1;
+ }
+}
+
+static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
+{
+ char col1[30];
+ unsigned int col2;
+ double col3;
+ char col4[401];
+
+ fprintf(stderr, header_format, "type", "count", "ratio",
+ "distribution");
+ for (int i = 0; i < len; i++) {
+ memset(col1, 0, 30);
+ memset(col4, 0, 401);
+ sprintf(col1, "%-17s", file_types[i]);
+ col2 = stats.file_type_stat[i];
+ col3 = (double)(100 * col2) / (double)stats.regular_files;
+ memset(col4, '#', col3 / 2);
+ dumpfs_print_chart_row(col1, col2, col3, col4);
+ }
+}
+
+static void dumpfs_print_statistic_of_compression(void)
+{
+ stats.compress_rate = (double)(100 * stats.files_total_size) /
+ (double)(stats.files_total_origin_size);
+ fprintf(stderr, "Filesystem compressed files: %lu\n",
+ stats.compressed_files);
+ fprintf(stderr, "Filesystem uncompressed files: %lu\n",
+ stats.uncompressed_files);
+ fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
+ stats.files_total_origin_size);
+ fprintf(stderr, "Filesystem total file size: %lu Bytes\n",
+ stats.files_total_size);
+ fprintf(stderr, "Filesystem compress rate: %.2f%%\n",
+ stats.compress_rate);
+}
+
+static void dumpfs_print_statistic(void)
+{
+ int err;
+
+ stats.blocks = sbi.blocks;
+ err = read_dir(sbi.root_nid, sbi.root_nid);
+ if (err) {
+ erofs_err("read dir failed");
+ return;
+ }
+
+ dumpfs_print_statistic_of_filetype();
+ dumpfs_print_statistic_of_compression();
+
+ fprintf(stderr, "\nOriginal file size distribution:\n");
+ dumpfs_print_chart_of_file(stats.file_org_size, 17);
+ fprintf(stderr, "\nOn-Disk file size distribution:\n");
+ dumpfs_print_chart_of_file(stats.file_comp_size, 17);
+ fprintf(stderr, "\nFile type distribution:\n");
+ dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
+}
+
int main(int argc, char **argv)
{
int err = 0;
@@ -138,5 +608,9 @@ int main(int argc, char **argv)
if (dumpcfg.print_superblock)
dumpfs_print_superblock();
+ if (dumpcfg.print_statistic)
+ dumpfs_print_statistic();
+
+
return 0;
}
--
2.25.4
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
@ 2021-09-11 16:13 ` Gao Xiang
2021-09-13 4:30 ` Huang Jianan via Linux-erofs
0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:13 UTC (permalink / raw)
To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet
(+Cc Jianan.)
On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
>
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
> dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 474 insertions(+)
>
> diff --git a/dump/main.c b/dump/main.c
> index 25ac89f..b0acc0b 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -19,10 +19,78 @@
>
> struct dumpcfg {
> bool print_superblock;
> + bool print_statistic;
> bool print_version;
> };
> static struct dumpcfg dumpcfg;
>
> +static const char chart_format[] = "%-16s %-11d %8.2f%% |%-50s|\n";
> +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> +static char *file_types[] = {
> + ".so",
> + ".png",
> + ".jpg",
> + ".xml",
> + ".html",
> + ".odex",
> + ".vdex",
> + ".apk",
> + ".ttf",
> + ".jar",
> + ".json",
> + ".ogg",
> + ".oat",
> + ".art",
> + ".rc",
> + ".otf",
> + ".txt",
> + "others",
> +};
> +enum {
> + SOFILETYPE = 0,
> + PNGFILETYPE,
> + JPEGFILETYPE,
> + XMLFILETYPE,
> + HTMLFILETYPE,
> + ODEXFILETYPE,
> + VDEXFILETYPE,
> + APKFILETYPE,
> + TTFFILETYPE,
> + JARFILETYPE,
> + JSONFILETYPE,
> + OGGFILETYPE,
> + OATFILETYPE,
> + ARTFILETYPE,
> + RCFILETYPE,
> + OTFFILETYPE,
> + TXTFILETYPE,
> + OTHERFILETYPE,
> +};
Why we need enums here? Can these be resolved with some array index?
> +
> +#define FILE_SIZE_BITS 30
> +struct statistics {
> + unsigned long blocks;
> + unsigned long files;
> + unsigned long files_total_size;
> + unsigned long files_total_origin_size;
> + double compress_rate;
> + unsigned long compressed_files;
> + unsigned long uncompressed_files;
> +
> + unsigned long regular_files;
> + unsigned long dir_files;
> + unsigned long chardev_files;
> + unsigned long blkdev_files;
> + unsigned long fifo_files;
> + unsigned long sock_files;
> + unsigned long symlink_files;
> +
> + unsigned int file_type_stat[OTHERFILETYPE + 1];
> + unsigned int file_org_size[FILE_SIZE_BITS];
What do "FILE_SIZE_BITS" and "file_org_size" mean?
> + unsigned int file_comp_size[FILE_SIZE_BITS];
> +};
> +static struct statistics stats;
> +
> static struct option long_options[] = {
> {"help", no_argument, 0, 1},
> {0, 0, 0, 0},
> @@ -33,6 +101,7 @@ static void usage(void)
> fputs("usage: [options] erofs-image \n\n"
> "Dump erofs layout from erofs-image, and [options] are:\n"
> "-s print information about superblock\n"
> + "-S print statistic information of the erofs-image\n"
> "-v/-V print dump.erofs version info\n"
> "-h/--help display this help and exit\n", stderr);
> }
> @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> case 's':
> dumpcfg.print_superblock = true;
> break;
> + case 'S':
> + dumpcfg.print_statistic = true;
> + break;
> case 'v':
> case 'V':
> dumpfs_print_version();
> @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> return 0;
> }
>
> +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> + erofs_off_t last_cluster_size,
> + erofs_off_t *last_cluster_compressed_size)
Hmmm... do we really need the exact compressed bytes?
or just compressed blocks is enough?
"compressed blocks" can be gotten in erofs inode.
Btw, although I think it's useful for fsck (check if an erofs is correct).
> +{
> + int ret;
> + int decomp_len;
> + int compressed_len = 0;
> + char *decompress;
> + char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> +
> + ret = dev_read(raw, map->m_pa, map->m_plen);
> + if (ret < 0)
> + return -EIO;
> +
> + if (erofs_sb_has_lz4_0padding()) {
> + compressed_len = map->m_plen;
> + } else {
> + // lz4 maximum compression ratio is 255
> + decompress = (char *)malloc(map->m_plen * 255);
> + if (!decompress) {
> + erofs_err("allocate memory for decompress space failed");
> + return -1;
> + }
> + decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> + map->m_plen, last_cluster_size,
> + map->m_plen * 10);
> + if (decomp_len < 0) {
> + erofs_err("decompress last cluster to get decompressed size failed");
> + free(decompress);
> + return -1;
> + }
> + compressed_len = LZ4_compress_destSize(decompress, raw,
> + &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> + if (compressed_len < 0) {
> + erofs_err("compress to get last extent size failed\n");
> + free(decompress);
> + return -1;
> + }
> + free(decompress);
> + // dut to the use of lz4hc (can use different compress level),
> + // our normal lz4 compress result may be bigger
> + compressed_len = compressed_len < map->m_plen ?
> + compressed_len : map->m_plen;
> + }
> +
> + *last_cluster_compressed_size = compressed_len;
> + return 0;
> +}
> +
> +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> + erofs_off_t *size)
> +{
> + int err;
> + erofs_blk_t compressedlcs;
> + erofs_off_t last_cluster_size;
> + erofs_off_t last_cluster_compressed_size;
> + struct erofs_map_blocks map = {
> + .index = UINT_MAX,
> + .m_la = inode->i_size - 1,
> + };
> +
> + err = z_erofs_map_blocks_iter(inode, &map);
(add Jianan here.)
Can we port the latest erofs kernel fiemap code to erofs-utils, and add
some functionality to get the file distribution as well when the fs isn't
mounted?
> + if (err) {
> + erofs_err("read nid %ld's last block failed\n", inode->nid);
> + return err;
> + }
> + compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
> + *size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
> + last_cluster_size = inode->i_size - map.m_la;
> +
> + if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
> + *size += last_cluster_size;
> + } else {
> + err = z_erofs_get_last_cluster_size_from_disk(&map,
> + last_cluster_size,
> + &last_cluster_compressed_size);
> + if (err) {
> + erofs_err("get nid %ld's last extent size failed",
> + inode->nid);
> + return err;
> + }
> + *size += last_cluster_compressed_size;
> + }
> + return 0;
> +}
> +
> +static int get_file_compressed_size(struct erofs_inode *inode,
> + erofs_off_t *size)
erofs_dump_get_file_occupied_blocks?
> +{
> + int err;
> +
> + *size = 0;
> + switch (inode->datalayout) {
> + case EROFS_INODE_FLAT_INLINE:
> + case EROFS_INODE_FLAT_PLAIN:
> + stats.uncompressed_files++;
> + *size = inode->i_size;
> + break;
> + case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> + case EROFS_INODE_FLAT_COMPRESSION:
> + stats.compressed_files++;
> + err = z_erofs_get_compressed_size(inode, size);
> + if (err) {
> + erofs_err("get compressed file size failed\n");
> + return err;
> + }
> + }
> + return 0;
> +}
> +
> static void dumpfs_print_superblock(void)
> {
> time_t time = sbi.build_time;
> @@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
>
> }
>
> +static int get_file_type(const char *filename)
> +{
> + char *postfix = strrchr(filename, '.');
> + int type = SOFILETYPE;
> +
> + if (postfix == NULL)
> + return OTHERFILETYPE;
> + while (type < OTHERFILETYPE) {
> + if (strcmp(postfix, file_types[type]) == 0)
> + break;
> + type++;
> + }
> + return type;
> +}
> +
> +// file count、file size、file type
It'd be better to avoid C++ comments...
> +static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
> +{
> + struct erofs_inode vi = { .nid = nid};
> + int err;
> + char buf[EROFS_BLKSIZ];
> + char filename[PATH_MAX + 1];
> + erofs_off_t offset;
> +
> + err = erofs_read_inode_from_disk(&vi);
> + if (err)
> + return err;
> +
> + offset = 0;
> + while (offset < vi.i_size) {
> + erofs_off_t maxsize = min_t(erofs_off_t,
> + vi.i_size - offset, EROFS_BLKSIZ);
> + struct erofs_dirent *de = (void *)buf;
> + struct erofs_dirent *end;
> + unsigned int nameoff;
> +
> + err = erofs_pread(&vi, buf, maxsize, offset);
> + if (err)
> + return err;
> +
> + nameoff = le16_to_cpu(de->nameoff);
> +
> + if (nameoff < sizeof(struct erofs_dirent) ||
> + nameoff >= PAGE_SIZE) {
> + erofs_err("invalid de[0].nameoff %u @ nid %llu",
> + nameoff, nid | 0ULL);
> + return -EFSCORRUPTED;
> + }
> + end = (void *)buf + nameoff;
> + while (de < end) {
> + const char *dname;
> + unsigned int dname_len;
> + struct erofs_inode inode = { .nid = de->nid };
> + int actual_size_mark;
> + int original_size_mark;
> + erofs_off_t actual_size = 0;
> + erofs_off_t original_size;
> +
> + nameoff = le16_to_cpu(de->nameoff);
> + dname = (char *)buf + nameoff;
> +
> + if (de + 1 >= end)
> + dname_len = strnlen(dname, maxsize - nameoff);
> + else
> + dname_len =
> + le16_to_cpu(de[1].nameoff) - nameoff;
> +
> + /* a corrupted entry is found */
> + if (nameoff + dname_len > maxsize ||
> + dname_len > EROFS_NAME_LEN) {
> + erofs_err("bogus dirent @ nid %llu",
> + le64_to_cpu(de->nid) | 0ULL);
> + DBG_BUGON(1);
> + return -EFSCORRUPTED;
> + }
> + if (de->nid != nid && de->nid != parent_nid)
> + stats.files++;
> +
> + memset(filename, 0, PATH_MAX + 1);
> + memcpy(filename, dname, dname_len);
> +
> + switch (de->file_type) {
> + case EROFS_FT_UNKNOWN:
> + break;
> + case EROFS_FT_REG_FILE:
> + err = erofs_read_inode_from_disk(&inode);
> + if (err) {
> + erofs_err("read file inode from disk failed!");
> + return err;
> + }
> + original_size = inode.i_size;
> + stats.files_total_origin_size += original_size;
> + stats.regular_files++;
> +
> + err = get_file_compressed_size(&inode,
> + &actual_size);
> + if (err) {
> + erofs_err("get file size failed\n");
> + return err;
> + }
> + stats.files_total_size += actual_size;
> + stats.file_type_stat[get_file_type(filename)]++;
> +
> + original_size_mark = 0;
> + actual_size_mark = 0;
> + actual_size >>= 10;
> + original_size >>= 10;
> +
> + while (actual_size || original_size) {
> + if (actual_size) {
> + actual_size >>= 1;
> + actual_size_mark++;
> + }
> + if (original_size) {
> + original_size >>= 1;
> + original_size_mark++;
> + }
> + }
> +
> + if (original_size_mark >= FILE_SIZE_BITS - 1)
> + stats.file_org_size[FILE_SIZE_BITS - 1]++;
> + else
> + stats.file_org_size[original_size_mark]++;
> + if (actual_size_mark >= FILE_SIZE_BITS - 1)
> + stats.file_comp_size[FILE_SIZE_BITS - 1]++;
> + else
> + stats.file_comp_size[actual_size_mark]++;
> + break;
> +
> + case EROFS_FT_DIR:
> + if (de->nid != nid && de->nid != parent_nid) {
> + stats.dir_files++;
> + stats.uncompressed_files++;
> + err = read_dir(de->nid, nid);
> + if (err) {
> + fprintf(stderr,
> + "parse dir nid %llu error occurred\n",
> + de->nid);
> + return err;
> + }
> + }
> + break;
> + case EROFS_FT_CHRDEV:
> + stats.chardev_files++;
> + stats.uncompressed_files++;
How about using an array instead?
> + break;
> + case EROFS_FT_BLKDEV:
> + stats.blkdev_files++;
> + stats.uncompressed_files++;
> + break;
> + case EROFS_FT_FIFO:
> + stats.fifo_files++;
> + stats.uncompressed_files++;
> + break;
> + case EROFS_FT_SOCK:
> + stats.sock_files++;
> + stats.uncompressed_files++;
> + break;
> + case EROFS_FT_SYMLINK:
> + stats.symlink_files++;
> + stats.uncompressed_files++;
> + break;
> + }
> + ++de;
> + }
> + offset += maxsize;
> + }
> + return 0;
> +}
> +
> +static void dumpfs_print_statistic_of_filetype(void)
> +{
> + fprintf(stderr, "Filesystem total file count: %lu\n",
> + stats.files);
> + fprintf(stderr, "Filesystem regular file count: %lu\n",
> + stats.regular_files);
> + fprintf(stderr, "Filesystem directory count: %lu\n",
> + stats.dir_files);
> + fprintf(stderr, "Filesystem symlink file count: %lu\n",
> + stats.symlink_files);
> + fprintf(stderr, "Filesystem character device count: %lu\n",
> + stats.chardev_files);
> + fprintf(stderr, "Filesystem block device count: %lu\n",
> + stats.blkdev_files);
> + fprintf(stderr, "Filesystem FIFO file count: %lu\n",
> + stats.fifo_files);
> + fprintf(stderr, "Filesystem SOCK file count: %lu\n",
> + stats.sock_files);
Also a loop can be used here.
> +}
> +
> +static void dumpfs_print_chart_row(char *col1, unsigned int col2,
> + double col3, char *col4)
> +{
> + char row[500] = {0};
> +
> + sprintf(row, chart_format, col1, col2, col3, col4);
> + fprintf(stderr, row);
> +}
> +
> +static void dumpfs_print_chart_of_file(unsigned int *file_counts,
> + unsigned int len)
> +{
> + char col1[30];
> + unsigned int col2;
> + double col3;
> + char col4[400];
> + unsigned int lowerbound = 0;
> + unsigned int upperbound = 1;
> +
> + fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
> + "ratio", "distribution");
> + for (int i = 0; i < len; i++) {
> + memset(col1, 0, 30);
memset(col1, 0, sizeof(col1));
> + memset(col4, 0, 400);
memset(col4, 0, sizeof(col4));
Thanks,
Gao Xiang
> + if (i == len - 1)
> + strcpy(col1, " others");
> + else if (i <= 6)
> + sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
> + else
> +
> + sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
> + col2 = file_counts[i];
> + col3 = (double)(100 * col2) / (double)stats.regular_files;
> + memset(col4, '#', col3 / 2);
> + dumpfs_print_chart_row(col1, col2, col3, col4);
> + lowerbound = upperbound;
> + upperbound <<= 1;
> + }
> +}
> +
> +static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
> +{
> + char col1[30];
> + unsigned int col2;
> + double col3;
> + char col4[401];
> +
> + fprintf(stderr, header_format, "type", "count", "ratio",
> + "distribution");
> + for (int i = 0; i < len; i++) {
> + memset(col1, 0, 30);
> + memset(col4, 0, 401);
> + sprintf(col1, "%-17s", file_types[i]);
> + col2 = stats.file_type_stat[i];
> + col3 = (double)(100 * col2) / (double)stats.regular_files;
> + memset(col4, '#', col3 / 2);
> + dumpfs_print_chart_row(col1, col2, col3, col4);
> + }
> +}
> +
> +static void dumpfs_print_statistic_of_compression(void)
> +{
> + stats.compress_rate = (double)(100 * stats.files_total_size) /
> + (double)(stats.files_total_origin_size);
> + fprintf(stderr, "Filesystem compressed files: %lu\n",
> + stats.compressed_files);
> + fprintf(stderr, "Filesystem uncompressed files: %lu\n",
> + stats.uncompressed_files);
> + fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
> + stats.files_total_origin_size);
> + fprintf(stderr, "Filesystem total file size: %lu Bytes\n",
> + stats.files_total_size);
> + fprintf(stderr, "Filesystem compress rate: %.2f%%\n",
> + stats.compress_rate);
> +}
> +
> +static void dumpfs_print_statistic(void)
> +{
> + int err;
> +
> + stats.blocks = sbi.blocks;
> + err = read_dir(sbi.root_nid, sbi.root_nid);
> + if (err) {
> + erofs_err("read dir failed");
> + return;
> + }
> +
> + dumpfs_print_statistic_of_filetype();
> + dumpfs_print_statistic_of_compression();
> +
> + fprintf(stderr, "\nOriginal file size distribution:\n");
> + dumpfs_print_chart_of_file(stats.file_org_size, 17);
> + fprintf(stderr, "\nOn-Disk file size distribution:\n");
> + dumpfs_print_chart_of_file(stats.file_comp_size, 17);
> + fprintf(stderr, "\nFile type distribution:\n");
> + dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
> +}
> +
> int main(int argc, char **argv)
> {
> int err = 0;
> @@ -138,5 +608,9 @@ int main(int argc, char **argv)
> if (dumpcfg.print_superblock)
> dumpfs_print_superblock();
>
> + if (dumpcfg.print_statistic)
> + dumpfs_print_statistic();
> +
> +
> return 0;
> }
> --
> 2.25.4
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
2021-09-11 16:13 ` Gao Xiang
@ 2021-09-13 4:30 ` Huang Jianan via Linux-erofs
2021-09-13 12:46 ` Gao Xiang
0 siblings, 1 reply; 13+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-09-13 4:30 UTC (permalink / raw)
To: xiang; +Cc: linux-erofs, mpiglet
在 2021/9/12 0:13, Gao Xiang 写道:
> (+Cc Jianan.)
>
> On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> From: mpiglet <mpiglet@outlook.com>
>>
>> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
>> Signed-off-by: mpiglet <mpiglet@outlook.com>
>> ---
>> dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 474 insertions(+)
>>
>> diff --git a/dump/main.c b/dump/main.c
>> index 25ac89f..b0acc0b 100644
>> --- a/dump/main.c
>> +++ b/dump/main.c
>> @@ -19,10 +19,78 @@
>>
>> struct dumpcfg {
>> bool print_superblock;
>> + bool print_statistic;
>> bool print_version;
>> };
>> static struct dumpcfg dumpcfg;
>>
>> +static const char chart_format[] = "%-16s %-11d %8.2f%% |%-50s|\n";
>> +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> +static char *file_types[] = {
>> + ".so",
>> + ".png",
>> + ".jpg",
>> + ".xml",
>> + ".html",
>> + ".odex",
>> + ".vdex",
>> + ".apk",
>> + ".ttf",
>> + ".jar",
>> + ".json",
>> + ".ogg",
>> + ".oat",
>> + ".art",
>> + ".rc",
>> + ".otf",
>> + ".txt",
>> + "others",
>> +};
>> +enum {
>> + SOFILETYPE = 0,
>> + PNGFILETYPE,
>> + JPEGFILETYPE,
>> + XMLFILETYPE,
>> + HTMLFILETYPE,
>> + ODEXFILETYPE,
>> + VDEXFILETYPE,
>> + APKFILETYPE,
>> + TTFFILETYPE,
>> + JARFILETYPE,
>> + JSONFILETYPE,
>> + OGGFILETYPE,
>> + OATFILETYPE,
>> + ARTFILETYPE,
>> + RCFILETYPE,
>> + OTFFILETYPE,
>> + TXTFILETYPE,
>> + OTHERFILETYPE,
>> +};
> Why we need enums here? Can these be resolved with some array index?
>
>> +
>> +#define FILE_SIZE_BITS 30
>> +struct statistics {
>> + unsigned long blocks;
>> + unsigned long files;
>> + unsigned long files_total_size;
>> + unsigned long files_total_origin_size;
>> + double compress_rate;
>> + unsigned long compressed_files;
>> + unsigned long uncompressed_files;
>> +
>> + unsigned long regular_files;
>> + unsigned long dir_files;
>> + unsigned long chardev_files;
>> + unsigned long blkdev_files;
>> + unsigned long fifo_files;
>> + unsigned long sock_files;
>> + unsigned long symlink_files;
>> +
>> + unsigned int file_type_stat[OTHERFILETYPE + 1];
>> + unsigned int file_org_size[FILE_SIZE_BITS];
> What do "FILE_SIZE_BITS" and "file_org_size" mean?
>
>> + unsigned int file_comp_size[FILE_SIZE_BITS];
>> +};
>> +static struct statistics stats;
>> +
>> static struct option long_options[] = {
>> {"help", no_argument, 0, 1},
>> {0, 0, 0, 0},
>> @@ -33,6 +101,7 @@ static void usage(void)
>> fputs("usage: [options] erofs-image \n\n"
>> "Dump erofs layout from erofs-image, and [options] are:\n"
>> "-s print information about superblock\n"
>> + "-S print statistic information of the erofs-image\n"
>> "-v/-V print dump.erofs version info\n"
>> "-h/--help display this help and exit\n", stderr);
>> }
>> @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>> case 's':
>> dumpcfg.print_superblock = true;
>> break;
>> + case 'S':
>> + dumpcfg.print_statistic = true;
>> + break;
>> case 'v':
>> case 'V':
>> dumpfs_print_version();
>> @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
>> return 0;
>> }
>>
>> +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
>> + erofs_off_t last_cluster_size,
>> + erofs_off_t *last_cluster_compressed_size)
> Hmmm... do we really need the exact compressed bytes?
> or just compressed blocks is enough?
>
> "compressed blocks" can be gotten in erofs inode.
>
> Btw, although I think it's useful for fsck (check if an erofs is correct).
>
>> +{
>> + int ret;
>> + int decomp_len;
>> + int compressed_len = 0;
>> + char *decompress;
>> + char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> +
>> + ret = dev_read(raw, map->m_pa, map->m_plen);
>> + if (ret < 0)
>> + return -EIO;
>> +
>> + if (erofs_sb_has_lz4_0padding()) {
>> + compressed_len = map->m_plen;
>> + } else {
>> + // lz4 maximum compression ratio is 255
>> + decompress = (char *)malloc(map->m_plen * 255);
>> + if (!decompress) {
>> + erofs_err("allocate memory for decompress space failed");
>> + return -1;
>> + }
>> + decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> + map->m_plen, last_cluster_size,
>> + map->m_plen * 10);
>> + if (decomp_len < 0) {
>> + erofs_err("decompress last cluster to get decompressed size failed");
>> + free(decompress);
>> + return -1;
>> + }
>> + compressed_len = LZ4_compress_destSize(decompress, raw,
>> + &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> + if (compressed_len < 0) {
>> + erofs_err("compress to get last extent size failed\n");
>> + free(decompress);
>> + return -1;
>> + }
>> + free(decompress);
>> + // dut to the use of lz4hc (can use different compress level),
>> + // our normal lz4 compress result may be bigger
>> + compressed_len = compressed_len < map->m_plen ?
>> + compressed_len : map->m_plen;
>> + }
>> +
>> + *last_cluster_compressed_size = compressed_len;
>> + return 0;
>> +}
>> +
>> +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> + erofs_off_t *size)
>> +{
>> + int err;
>> + erofs_blk_t compressedlcs;
>> + erofs_off_t last_cluster_size;
>> + erofs_off_t last_cluster_compressed_size;
>> + struct erofs_map_blocks map = {
>> + .index = UINT_MAX,
>> + .m_la = inode->i_size - 1,
>> + };
>> +
>> + err = z_erofs_map_blocks_iter(inode, &map);
> (add Jianan here.)
>
> Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> some functionality to get the file distribution as well when the fs isn't
> mounted?
Hi Xiang,
I have sent the patch and verified it with a similar function. Better to
use the
new interface here.
Thanks,
Jianan
>
>> + if (err) {
>> + erofs_err("read nid %ld's last block failed\n", inode->nid);
>> + return err;
>> + }
>> + compressedlcs = map.m_plen >> inode->z_logical_clusterbits;
>> + *size = (inode->u.i_blocks - compressedlcs) * EROFS_BLKSIZ;
>> + last_cluster_size = inode->i_size - map.m_la;
>> +
>> + if (!(map.m_flags & EROFS_MAP_ZIPPED)) {
>> + *size += last_cluster_size;
>> + } else {
>> + err = z_erofs_get_last_cluster_size_from_disk(&map,
>> + last_cluster_size,
>> + &last_cluster_compressed_size);
>> + if (err) {
>> + erofs_err("get nid %ld's last extent size failed",
>> + inode->nid);
>> + return err;
>> + }
>> + *size += last_cluster_compressed_size;
>> + }
>> + return 0;
>> +}
>> +
>> +static int get_file_compressed_size(struct erofs_inode *inode,
>> + erofs_off_t *size)
> erofs_dump_get_file_occupied_blocks?
>
>> +{
>> + int err;
>> +
>> + *size = 0;
>> + switch (inode->datalayout) {
>> + case EROFS_INODE_FLAT_INLINE:
>> + case EROFS_INODE_FLAT_PLAIN:
>> + stats.uncompressed_files++;
>> + *size = inode->i_size;
>> + break;
>> + case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
>> + case EROFS_INODE_FLAT_COMPRESSION:
>> + stats.compressed_files++;
>> + err = z_erofs_get_compressed_size(inode, size);
>> + if (err) {
>> + erofs_err("get compressed file size failed\n");
>> + return err;
>> + }
>> + }
>> + return 0;
>> +}
>> +
>> static void dumpfs_print_superblock(void)
>> {
>> time_t time = sbi.build_time;
>> @@ -111,6 +293,294 @@ static void dumpfs_print_superblock(void)
>>
>> }
>>
>> +static int get_file_type(const char *filename)
>> +{
>> + char *postfix = strrchr(filename, '.');
>> + int type = SOFILETYPE;
>> +
>> + if (postfix == NULL)
>> + return OTHERFILETYPE;
>> + while (type < OTHERFILETYPE) {
>> + if (strcmp(postfix, file_types[type]) == 0)
>> + break;
>> + type++;
>> + }
>> + return type;
>> +}
>> +
>> +// file count、file size、file type
> It'd be better to avoid C++ comments...
>
>> +static int read_dir(erofs_nid_t nid, erofs_nid_t parent_nid)
>> +{
>> + struct erofs_inode vi = { .nid = nid};
>> + int err;
>> + char buf[EROFS_BLKSIZ];
>> + char filename[PATH_MAX + 1];
>> + erofs_off_t offset;
>> +
>> + err = erofs_read_inode_from_disk(&vi);
>> + if (err)
>> + return err;
>> +
>> + offset = 0;
>> + while (offset < vi.i_size) {
>> + erofs_off_t maxsize = min_t(erofs_off_t,
>> + vi.i_size - offset, EROFS_BLKSIZ);
>> + struct erofs_dirent *de = (void *)buf;
>> + struct erofs_dirent *end;
>> + unsigned int nameoff;
>> +
>> + err = erofs_pread(&vi, buf, maxsize, offset);
>> + if (err)
>> + return err;
>> +
>> + nameoff = le16_to_cpu(de->nameoff);
>> +
>> + if (nameoff < sizeof(struct erofs_dirent) ||
>> + nameoff >= PAGE_SIZE) {
>> + erofs_err("invalid de[0].nameoff %u @ nid %llu",
>> + nameoff, nid | 0ULL);
>> + return -EFSCORRUPTED;
>> + }
>> + end = (void *)buf + nameoff;
>> + while (de < end) {
>> + const char *dname;
>> + unsigned int dname_len;
>> + struct erofs_inode inode = { .nid = de->nid };
>> + int actual_size_mark;
>> + int original_size_mark;
>> + erofs_off_t actual_size = 0;
>> + erofs_off_t original_size;
>> +
>> + nameoff = le16_to_cpu(de->nameoff);
>> + dname = (char *)buf + nameoff;
>> +
>> + if (de + 1 >= end)
>> + dname_len = strnlen(dname, maxsize - nameoff);
>> + else
>> + dname_len =
>> + le16_to_cpu(de[1].nameoff) - nameoff;
>> +
>> + /* a corrupted entry is found */
>> + if (nameoff + dname_len > maxsize ||
>> + dname_len > EROFS_NAME_LEN) {
>> + erofs_err("bogus dirent @ nid %llu",
>> + le64_to_cpu(de->nid) | 0ULL);
>> + DBG_BUGON(1);
>> + return -EFSCORRUPTED;
>> + }
>> + if (de->nid != nid && de->nid != parent_nid)
>> + stats.files++;
>> +
>> + memset(filename, 0, PATH_MAX + 1);
>> + memcpy(filename, dname, dname_len);
>> +
>> + switch (de->file_type) {
>> + case EROFS_FT_UNKNOWN:
>> + break;
>> + case EROFS_FT_REG_FILE:
>> + err = erofs_read_inode_from_disk(&inode);
>> + if (err) {
>> + erofs_err("read file inode from disk failed!");
>> + return err;
>> + }
>> + original_size = inode.i_size;
>> + stats.files_total_origin_size += original_size;
>> + stats.regular_files++;
>> +
>> + err = get_file_compressed_size(&inode,
>> + &actual_size);
>> + if (err) {
>> + erofs_err("get file size failed\n");
>> + return err;
>> + }
>> + stats.files_total_size += actual_size;
>> + stats.file_type_stat[get_file_type(filename)]++;
>> +
>> + original_size_mark = 0;
>> + actual_size_mark = 0;
>> + actual_size >>= 10;
>> + original_size >>= 10;
>> +
>> + while (actual_size || original_size) {
>> + if (actual_size) {
>> + actual_size >>= 1;
>> + actual_size_mark++;
>> + }
>> + if (original_size) {
>> + original_size >>= 1;
>> + original_size_mark++;
>> + }
>> + }
>> +
>> + if (original_size_mark >= FILE_SIZE_BITS - 1)
>> + stats.file_org_size[FILE_SIZE_BITS - 1]++;
>> + else
>> + stats.file_org_size[original_size_mark]++;
>> + if (actual_size_mark >= FILE_SIZE_BITS - 1)
>> + stats.file_comp_size[FILE_SIZE_BITS - 1]++;
>> + else
>> + stats.file_comp_size[actual_size_mark]++;
>> + break;
>> +
>> + case EROFS_FT_DIR:
>> + if (de->nid != nid && de->nid != parent_nid) {
>
>
>> + stats.dir_files++;
>> + stats.uncompressed_files++;
>> + err = read_dir(de->nid, nid);
>> + if (err) {
>> + fprintf(stderr,
>> + "parse dir nid %llu error occurred\n",
>> + de->nid);
>> + return err;
>> + }
>> + }
>> + break;
>> + case EROFS_FT_CHRDEV:
>> + stats.chardev_files++;
>> + stats.uncompressed_files++;
> How about using an array instead?
>
>> + break;
>> + case EROFS_FT_BLKDEV:
>> + stats.blkdev_files++;
>> + stats.uncompressed_files++;
>> + break;
>> + case EROFS_FT_FIFO:
>> + stats.fifo_files++;
>> + stats.uncompressed_files++;
>> + break;
>> + case EROFS_FT_SOCK:
>> + stats.sock_files++;
>> + stats.uncompressed_files++;
>> + break;
>> + case EROFS_FT_SYMLINK:
>> + stats.symlink_files++;
>> + stats.uncompressed_files++;
>> + break;
>> + }
>> + ++de;
>> + }
>> + offset += maxsize;
>> + }
>> + return 0;
>> +}
>> +
>> +static void dumpfs_print_statistic_of_filetype(void)
>> +{
>> + fprintf(stderr, "Filesystem total file count: %lu\n",
>> + stats.files);
>> + fprintf(stderr, "Filesystem regular file count: %lu\n",
>> + stats.regular_files);
>> + fprintf(stderr, "Filesystem directory count: %lu\n",
>> + stats.dir_files);
>> + fprintf(stderr, "Filesystem symlink file count: %lu\n",
>> + stats.symlink_files);
>> + fprintf(stderr, "Filesystem character device count: %lu\n",
>> + stats.chardev_files);
>> + fprintf(stderr, "Filesystem block device count: %lu\n",
>> + stats.blkdev_files);
>> + fprintf(stderr, "Filesystem FIFO file count: %lu\n",
>> + stats.fifo_files);
>> + fprintf(stderr, "Filesystem SOCK file count: %lu\n",
>> + stats.sock_files);
> Also a loop can be used here.
>
>> +}
>> +
>> +static void dumpfs_print_chart_row(char *col1, unsigned int col2,
>> + double col3, char *col4)
>> +{
>> + char row[500] = {0};
>> +
>> + sprintf(row, chart_format, col1, col2, col3, col4);
>> + fprintf(stderr, row);
>> +}
>> +
>> +static void dumpfs_print_chart_of_file(unsigned int *file_counts,
>> + unsigned int len)
>> +{
>> + char col1[30];
>> + unsigned int col2;
>> + double col3;
>> + char col4[400];
>> + unsigned int lowerbound = 0;
>> + unsigned int upperbound = 1;
>> +
>> + fprintf(stderr, header_format, ">=(KB) .. <(KB) ", "count",
>> + "ratio", "distribution");
>> + for (int i = 0; i < len; i++) {
>> + memset(col1, 0, 30);
> memset(col1, 0, sizeof(col1));
>
>> + memset(col4, 0, 400);
> memset(col4, 0, sizeof(col4));
>
> Thanks,
> Gao Xiang
>
>> + if (i == len - 1)
>> + strcpy(col1, " others");
>> + else if (i <= 6)
>> + sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
>> + else
>> +
>> + sprintf(col1, "%6d .. %-6d", lowerbound, upperbound);
>> + col2 = file_counts[i];
>> + col3 = (double)(100 * col2) / (double)stats.regular_files;
>> + memset(col4, '#', col3 / 2);
>> + dumpfs_print_chart_row(col1, col2, col3, col4);
>> + lowerbound = upperbound;
>> + upperbound <<= 1;
>> + }
>> +}
>> +
>> +static void dumpfs_print_chart_of_file_type(char **file_types, unsigned int len)
>> +{
>> + char col1[30];
>> + unsigned int col2;
>> + double col3;
>> + char col4[401];
>> +
>> + fprintf(stderr, header_format, "type", "count", "ratio",
>> + "distribution");
>> + for (int i = 0; i < len; i++) {
>> + memset(col1, 0, 30);
>> + memset(col4, 0, 401);
>> + sprintf(col1, "%-17s", file_types[i]);
>> + col2 = stats.file_type_stat[i];
>> + col3 = (double)(100 * col2) / (double)stats.regular_files;
>> + memset(col4, '#', col3 / 2);
>> + dumpfs_print_chart_row(col1, col2, col3, col4);
>> + }
>> +}
>> +
>> +static void dumpfs_print_statistic_of_compression(void)
>> +{
>> + stats.compress_rate = (double)(100 * stats.files_total_size) /
>> + (double)(stats.files_total_origin_size);
>> + fprintf(stderr, "Filesystem compressed files: %lu\n",
>> + stats.compressed_files);
>> + fprintf(stderr, "Filesystem uncompressed files: %lu\n",
>> + stats.uncompressed_files);
>> + fprintf(stderr, "Filesystem total original file size: %lu Bytes\n",
>> + stats.files_total_origin_size);
>> + fprintf(stderr, "Filesystem total file size: %lu Bytes\n",
>> + stats.files_total_size);
>> + fprintf(stderr, "Filesystem compress rate: %.2f%%\n",
>> + stats.compress_rate);
>> +}
>> +
>> +static void dumpfs_print_statistic(void)
>> +{
>> + int err;
>> +
>> + stats.blocks = sbi.blocks;
>> + err = read_dir(sbi.root_nid, sbi.root_nid);
>> + if (err) {
>> + erofs_err("read dir failed");
>> + return;
>> + }
>> +
>> + dumpfs_print_statistic_of_filetype();
>> + dumpfs_print_statistic_of_compression();
>> +
>> + fprintf(stderr, "\nOriginal file size distribution:\n");
>> + dumpfs_print_chart_of_file(stats.file_org_size, 17);
>> + fprintf(stderr, "\nOn-Disk file size distribution:\n");
>> + dumpfs_print_chart_of_file(stats.file_comp_size, 17);
>> + fprintf(stderr, "\nFile type distribution:\n");
>> + dumpfs_print_chart_of_file_type(file_types, OTHERFILETYPE + 1);
>> +}
>> +
>> int main(int argc, char **argv)
>> {
>> int err = 0;
>> @@ -138,5 +608,9 @@ int main(int argc, char **argv)
>> if (dumpcfg.print_superblock)
>> dumpfs_print_superblock();
>>
>> + if (dumpcfg.print_statistic)
>> + dumpfs_print_statistic();
>> +
>> +
>> return 0;
>> }
>> --
>> 2.25.4
>>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
2021-09-13 4:30 ` Huang Jianan via Linux-erofs
@ 2021-09-13 12:46 ` Gao Xiang
2021-09-14 2:31 ` Guo Xuenan
0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-09-13 12:46 UTC (permalink / raw)
To: Huang Jianan; +Cc: linux-erofs, mpiglet
On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
> 在 2021/9/12 0:13, Gao Xiang 写道:
> > (+Cc Jianan.)
> >
> > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> > > From: mpiglet <mpiglet@outlook.com>
> > >
> > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
> > > ---
> > > dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > 1 file changed, 474 insertions(+)
> > >
> > > diff --git a/dump/main.c b/dump/main.c
> > > index 25ac89f..b0acc0b 100644
> > > --- a/dump/main.c
> > > +++ b/dump/main.c
> > > @@ -19,10 +19,78 @@
> > > struct dumpcfg {
> > > bool print_superblock;
> > > + bool print_statistic;
> > > bool print_version;
> > > };
> > > static struct dumpcfg dumpcfg;
> > > +static const char chart_format[] = "%-16s %-11d %8.2f%% |%-50s|\n";
> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> > > +static char *file_types[] = {
> > > + ".so",
> > > + ".png",
> > > + ".jpg",
> > > + ".xml",
> > > + ".html",
> > > + ".odex",
> > > + ".vdex",
> > > + ".apk",
> > > + ".ttf",
> > > + ".jar",
> > > + ".json",
> > > + ".ogg",
> > > + ".oat",
> > > + ".art",
> > > + ".rc",
> > > + ".otf",
> > > + ".txt",
> > > + "others",
> > > +};
> > > +enum {
> > > + SOFILETYPE = 0,
> > > + PNGFILETYPE,
> > > + JPEGFILETYPE,
> > > + XMLFILETYPE,
> > > + HTMLFILETYPE,
> > > + ODEXFILETYPE,
> > > + VDEXFILETYPE,
> > > + APKFILETYPE,
> > > + TTFFILETYPE,
> > > + JARFILETYPE,
> > > + JSONFILETYPE,
> > > + OGGFILETYPE,
> > > + OATFILETYPE,
> > > + ARTFILETYPE,
> > > + RCFILETYPE,
> > > + OTFFILETYPE,
> > > + TXTFILETYPE,
> > > + OTHERFILETYPE,
> > > +};
> > Why we need enums here? Can these be resolved with some array index?
> >
> > > +
> > > +#define FILE_SIZE_BITS 30
> > > +struct statistics {
> > > + unsigned long blocks;
> > > + unsigned long files;
> > > + unsigned long files_total_size;
> > > + unsigned long files_total_origin_size;
> > > + double compress_rate;
> > > + unsigned long compressed_files;
> > > + unsigned long uncompressed_files;
> > > +
> > > + unsigned long regular_files;
> > > + unsigned long dir_files;
> > > + unsigned long chardev_files;
> > > + unsigned long blkdev_files;
> > > + unsigned long fifo_files;
> > > + unsigned long sock_files;
> > > + unsigned long symlink_files;
> > > +
> > > + unsigned int file_type_stat[OTHERFILETYPE + 1];
> > > + unsigned int file_org_size[FILE_SIZE_BITS];
> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
> >
> > > + unsigned int file_comp_size[FILE_SIZE_BITS];
> > > +};
> > > +static struct statistics stats;
> > > +
> > > static struct option long_options[] = {
> > > {"help", no_argument, 0, 1},
> > > {0, 0, 0, 0},
> > > @@ -33,6 +101,7 @@ static void usage(void)
> > > fputs("usage: [options] erofs-image \n\n"
> > > "Dump erofs layout from erofs-image, and [options] are:\n"
> > > "-s print information about superblock\n"
> > > + "-S print statistic information of the erofs-image\n"
> > > "-v/-V print dump.erofs version info\n"
> > > "-h/--help display this help and exit\n", stderr);
> > > }
> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > > case 's':
> > > dumpcfg.print_superblock = true;
> > > break;
> > > + case 'S':
> > > + dumpcfg.print_statistic = true;
> > > + break;
> > > case 'v':
> > > case 'V':
> > > dumpfs_print_version();
> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > > return 0;
> > > }
> > > +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> > > + erofs_off_t last_cluster_size,
> > > + erofs_off_t *last_cluster_compressed_size)
> > Hmmm... do we really need the exact compressed bytes?
> > or just compressed blocks is enough?
> >
> > "compressed blocks" can be gotten in erofs inode.
> >
> > Btw, although I think it's useful for fsck (check if an erofs is correct).
> >
> > > +{
> > > + int ret;
> > > + int decomp_len;
> > > + int compressed_len = 0;
> > > + char *decompress;
> > > + char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> > > +
> > > + ret = dev_read(raw, map->m_pa, map->m_plen);
> > > + if (ret < 0)
> > > + return -EIO;
> > > +
> > > + if (erofs_sb_has_lz4_0padding()) {
> > > + compressed_len = map->m_plen;
> > > + } else {
> > > + // lz4 maximum compression ratio is 255
> > > + decompress = (char *)malloc(map->m_plen * 255);
> > > + if (!decompress) {
> > > + erofs_err("allocate memory for decompress space failed");
> > > + return -1;
> > > + }
> > > + decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> > > + map->m_plen, last_cluster_size,
> > > + map->m_plen * 10);
> > > + if (decomp_len < 0) {
> > > + erofs_err("decompress last cluster to get decompressed size failed");
> > > + free(decompress);
> > > + return -1;
> > > + }
> > > + compressed_len = LZ4_compress_destSize(decompress, raw,
> > > + &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> > > + if (compressed_len < 0) {
> > > + erofs_err("compress to get last extent size failed\n");
> > > + free(decompress);
> > > + return -1;
> > > + }
> > > + free(decompress);
> > > + // dut to the use of lz4hc (can use different compress level),
> > > + // our normal lz4 compress result may be bigger
> > > + compressed_len = compressed_len < map->m_plen ?
> > > + compressed_len : map->m_plen;
> > > + }
> > > +
> > > + *last_cluster_compressed_size = compressed_len;
> > > + return 0;
> > > +}
> > > +
> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> > > + erofs_off_t *size)
> > > +{
> > > + int err;
> > > + erofs_blk_t compressedlcs;
> > > + erofs_off_t last_cluster_size;
> > > + erofs_off_t last_cluster_compressed_size;
> > > + struct erofs_map_blocks map = {
> > > + .index = UINT_MAX,
> > > + .m_la = inode->i_size - 1,
> > > + };
> > > +
> > > + err = z_erofs_map_blocks_iter(inode, &map);
> > (add Jianan here.)
> >
> > Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> > some functionality to get the file distribution as well when the fs isn't
> > mounted?
> Hi Xiang,
>
> I have sent the patch and verified it with a similar function. Better to use
> the
> new interface here.
Yeah, thanks for the patch:
https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/
Hopefully Xuenan could base on this work.
Thanks,
Gao XIang
>
> Thanks,
> Jianan
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
2021-09-13 12:46 ` Gao Xiang
@ 2021-09-14 2:31 ` Guo Xuenan
0 siblings, 0 replies; 13+ messages in thread
From: Guo Xuenan @ 2021-09-14 2:31 UTC (permalink / raw)
To: Gao Xiang, Huang Jianan; +Cc: linux-erofs, mpiglet
OK,I will send out the patch V2 today, and it will base on jianan's
work.
在 2021/9/13 20:46, Gao Xiang 写道:
> On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
>> 在 2021/9/12 0:13, Gao Xiang 写道:
>> > (+Cc Jianan.)
>> > > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> > > From: mpiglet <mpiglet@outlook.com>
>> > > > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
>> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
>> > > ---
>> > > dump/main.c | 474
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> > > 1 file changed, 474 insertions(+)
>> > > > > diff --git a/dump/main.c b/dump/main.c
>> > > index 25ac89f..b0acc0b 100644
>> > > --- a/dump/main.c
>> > > +++ b/dump/main.c
>> > > @@ -19,10 +19,78 @@
>> > > struct dumpcfg {
>> > > bool print_superblock;
>> > > + bool print_statistic;
>> > > bool print_version;
>> > > };
>> > > static struct dumpcfg dumpcfg;
>> > > +static const char chart_format[] = "%-16s %-11d %8.2f%%
>> |%-50s|\n";
>> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> > > +static char *file_types[] = {
>> > > + ".so",
>> > > + ".png",
>> > > + ".jpg",
>> > > + ".xml",
>> > > + ".html",
>> > > + ".odex",
>> > > + ".vdex",
>> > > + ".apk",
>> > > + ".ttf",
>> > > + ".jar",
>> > > + ".json",
>> > > + ".ogg",
>> > > + ".oat",
>> > > + ".art",
>> > > + ".rc",
>> > > + ".otf",
>> > > + ".txt",
>> > > + "others",
>> > > +};
>> > > +enum {
>> > > + SOFILETYPE = 0,
>> > > + PNGFILETYPE,
>> > > + JPEGFILETYPE,
>> > > + XMLFILETYPE,
>> > > + HTMLFILETYPE,
>> > > + ODEXFILETYPE,
>> > > + VDEXFILETYPE,
>> > > + APKFILETYPE,
>> > > + TTFFILETYPE,
>> > > + JARFILETYPE,
>> > > + JSONFILETYPE,
>> > > + OGGFILETYPE,
>> > > + OATFILETYPE,
>> > > + ARTFILETYPE,
>> > > + RCFILETYPE,
>> > > + OTFFILETYPE,
>> > > + TXTFILETYPE,
>> > > + OTHERFILETYPE,
>> > > +};
>> > Why we need enums here? Can these be resolved with some array index?
>> > > > +
>> > > +#define FILE_SIZE_BITS 30
>> > > +struct statistics {
>> > > + unsigned long blocks;
>> > > + unsigned long files;
>> > > + unsigned long files_total_size;
>> > > + unsigned long files_total_origin_size;
>> > > + double compress_rate;
>> > > + unsigned long compressed_files;
>> > > + unsigned long uncompressed_files;
>> > > +
>> > > + unsigned long regular_files;
>> > > + unsigned long dir_files;
>> > > + unsigned long chardev_files;
>> > > + unsigned long blkdev_files;
>> > > + unsigned long fifo_files;
>> > > + unsigned long sock_files;
>> > > + unsigned long symlink_files;
>> > > +
>> > > + unsigned int file_type_stat[OTHERFILETYPE + 1];
>> > > + unsigned int file_org_size[FILE_SIZE_BITS];
>> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
>> > > > + unsigned int file_comp_size[FILE_SIZE_BITS];
>> > > +};
>> > > +static struct statistics stats;
>> > > +
>> > > static struct option long_options[] = {
>> > > {"help", no_argument, 0, 1},
>> > > {0, 0, 0, 0},
>> > > @@ -33,6 +101,7 @@ static void usage(void)
>> > > fputs("usage: [options] erofs-image \n\n"
>> > > "Dump erofs layout from erofs-image, and [options] are:\n"
>> > > "-s print information about superblock\n"
>> > > + "-S print statistic information of the erofs-image\n"
>> > > "-v/-V print dump.erofs version info\n"
>> > > "-h/--help display this help and exit\n", stderr);
>> > > }
>> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc,
>> char **argv)
>> > > case 's':
>> > > dumpcfg.print_superblock = true;
>> > > break;
>> > > + case 'S':
>> > > + dumpcfg.print_statistic = true;
>> > > + break;
>> > > case 'v':
>> > > case 'V':
>> > > dumpfs_print_version();
>> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int
>> argc, char **argv)
>> > > return 0;
>> > > }
>> > > +static int z_erofs_get_last_cluster_size_from_disk(struct
>> erofs_map_blocks *map,
>> > > + erofs_off_t last_cluster_size,
>> > > + erofs_off_t *last_cluster_compressed_size)
>> > Hmmm... do we really need the exact compressed bytes?
>> > or just compressed blocks is enough?
>> > > "compressed blocks" can be gotten in erofs inode.
>> > > Btw, although I think it's useful for fsck (check if an erofs is
>> correct).
>> > > > +{
>> > > + int ret;
>> > > + int decomp_len;
>> > > + int compressed_len = 0;
>> > > + char *decompress;
>> > > + char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> > > +
>> > > + ret = dev_read(raw, map->m_pa, map->m_plen);
>> > > + if (ret < 0)
>> > > + return -EIO;
>> > > +
>> > > + if (erofs_sb_has_lz4_0padding()) {
>> > > + compressed_len = map->m_plen;
>> > > + } else {
>> > > + // lz4 maximum compression ratio is 255
>> > > + decompress = (char *)malloc(map->m_plen * 255);
>> > > + if (!decompress) {
>> > > + erofs_err("allocate memory for decompress space
>> failed");
>> > > + return -1;
>> > > + }
>> > > + decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> > > + map->m_plen, last_cluster_size,
>> > > + map->m_plen * 10);
>> > > + if (decomp_len < 0) {
>> > > + erofs_err("decompress last cluster to get
>> decompressed size failed");
>> > > + free(decompress);
>> > > + return -1;
>> > > + }
>> > > + compressed_len = LZ4_compress_destSize(decompress, raw,
>> > > + &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> > > + if (compressed_len < 0) {
>> > > + erofs_err("compress to get last extent size failed\n");
>> > > + free(decompress);
>> > > + return -1;
>> > > + }
>> > > + free(decompress);
>> > > + // dut to the use of lz4hc (can use different compress
>> level),
>> > > + // our normal lz4 compress result may be bigger
>> > > + compressed_len = compressed_len < map->m_plen ?
>> > > + compressed_len : map->m_plen;
>> > > + }
>> > > +
>> > > + *last_cluster_compressed_size = compressed_len;
>> > > + return 0;
>> > > +}
>> > > +
>> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> > > + erofs_off_t *size)
>> > > +{
>> > > + int err;
>> > > + erofs_blk_t compressedlcs;
>> > > + erofs_off_t last_cluster_size;
>> > > + erofs_off_t last_cluster_compressed_size;
>> > > + struct erofs_map_blocks map = {
>> > > + .index = UINT_MAX,
>> > > + .m_la = inode->i_size - 1,
>> > > + };
>> > > +
>> > > + err = z_erofs_map_blocks_iter(inode, &map);
>> > (add Jianan here.)
>> > > Can we port the latest erofs kernel fiemap code to erofs-utils,
>> and add
>> > some functionality to get the file distribution as well when the fs
>> isn't
>> > mounted?
>> Hi Xiang,
>>
>> I have sent the patch and verified it with a similar function. Better
>> to use
>> the
>> new interface here.
>
> Yeah, thanks for the patch:
> https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/
>
>
> Hopefully Xuenan could base on this work.
>
> Thanks,
> Gao XIang
>
>>
>> Thanks,
>> Jianan
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
2021-09-11 16:25 ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang
4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
To: xiang, linux-erofs; +Cc: mpiglet
From: mpiglet <mpiglet@outlook.com>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
dump/main.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 200 insertions(+), 2 deletions(-)
diff --git a/dump/main.c b/dump/main.c
index b0acc0b..2389cef 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -19,8 +19,10 @@
struct dumpcfg {
bool print_superblock;
+ bool print_inode;
bool print_statistic;
bool print_version;
+ u64 ino;
};
static struct dumpcfg dumpcfg;
@@ -100,8 +102,9 @@ static void usage(void)
{
fputs("usage: [options] erofs-image \n\n"
"Dump erofs layout from erofs-image, and [options] are:\n"
- "-s print information about superblock\n"
- "-S print statistic information of the erofs-image\n"
+ "-s print information about superblock\n"
+ "-S print statistic information of the erofs-image\n"
+ "-i # print target # inode info\n"
"-v/-V print dump.erofs version info\n"
"-h/--help display this help and exit\n", stderr);
}
@@ -113,6 +116,7 @@ static void dumpfs_print_version(void)
static int dumpfs_parse_options_cfg(int argc, char **argv)
{
int opt;
+ u64 i;
while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
long_options, NULL)) != -1) {
@@ -127,6 +131,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
case 'V':
dumpfs_print_version();
exit(0);
+ case 'i':
+ i = atoll(optarg);
+ dumpcfg.print_inode = true;
+ dumpcfg.ino = i;
+ break;
case 'h':
case 1:
usage();
@@ -293,6 +302,193 @@ static void dumpfs_print_superblock(void)
}
+static int get_path_by_nid(erofs_nid_t nid, erofs_nid_t parent_nid,
+ erofs_nid_t target, char *path, unsigned int pos)
+{
+ int err;
+ struct erofs_inode inode = {.nid = nid};
+ erofs_off_t offset;
+ char buf[EROFS_BLKSIZ];
+
+ path[pos++] = '/';
+ if (target == sbi.root_nid)
+ return 0;
+
+ err = erofs_read_inode_from_disk(&inode);
+ if (err) {
+ erofs_err("read inode %lu failed", nid);
+ return err;
+ }
+
+ offset = 0;
+ while (offset < inode.i_size) {
+ erofs_off_t maxsize = min_t(erofs_off_t,
+ inode.i_size - offset, EROFS_BLKSIZ);
+ struct erofs_dirent *de = (void *)buf;
+ struct erofs_dirent *end;
+ unsigned int nameoff;
+
+ err = erofs_pread(&inode, buf, maxsize, offset);
+ if (err)
+ return err;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ if (nameoff < sizeof(struct erofs_dirent) ||
+ nameoff >= PAGE_SIZE) {
+ erofs_err("invalid de[0].nameoff %u @ nid %llu",
+ nameoff, nid | 0ULL);
+ return -EFSCORRUPTED;
+ }
+
+ end = (void *)buf + nameoff;
+ while (de < end) {
+ const char *dname;
+ unsigned int dname_len;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ dname = (char *)buf + nameoff;
+ if (de + 1 >= end)
+ dname_len = strnlen(dname, maxsize - nameoff);
+ else
+ dname_len = le16_to_cpu(de[1].nameoff)
+ - nameoff;
+
+ /* a corrupted entry is found */
+ if (nameoff + dname_len > maxsize ||
+ dname_len > EROFS_NAME_LEN) {
+ erofs_err("bogus dirent @ nid %llu",
+ le64_to_cpu(de->nid) | 0ULL);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+
+ if (de->nid == target) {
+ memcpy(path + pos, dname, dname_len);
+ return 0;
+ }
+
+ if (de->file_type == EROFS_FT_DIR &&
+ de->nid != parent_nid &&
+ de->nid != nid) {
+ memcpy(path + pos, dname, dname_len);
+ err = get_path_by_nid(de->nid, nid,
+ target, path, pos + dname_len);
+ if (!err)
+ return 0;
+ memset(path + pos, 0, dname_len);
+ }
+ ++de;
+ }
+ offset += maxsize;
+ }
+ return -1;
+}
+
+static void dumpfs_print_inode(void)
+{
+ int err;
+ erofs_off_t size;
+ erofs_nid_t nid = dumpcfg.ino;
+ struct erofs_inode inode = {.nid = nid};
+ char path[PATH_MAX + 1] = {0};
+ time_t t = inode.i_ctime;
+
+ err = erofs_read_inode_from_disk(&inode);
+ if (err) {
+ erofs_err("read inode %lu from disk failed", nid);
+ return;
+ }
+
+ fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
+ switch (inode.inode_isize) {
+ case 32:
+ fprintf(stderr, " File inode is compacted layout\n");
+ break;
+ case 64:
+ fprintf(stderr, " File inode is extended layout\n");
+ break;
+ default:
+ erofs_err("unsupported inode layout\n");
+ }
+ fprintf(stderr, " File size: %lu\n",
+ inode.i_size);
+ fprintf(stderr, " File nid: %lu\n",
+ inode.nid);
+ fprintf(stderr, " File extent size: %u\n",
+ inode.extent_isize);
+ fprintf(stderr, " File xattr size: %u\n",
+ inode.xattr_isize);
+ fprintf(stderr, " File inode size: %u\n",
+ inode.inode_isize);
+ fprintf(stderr, " File type: ");
+ switch (inode.i_mode & S_IFMT) {
+ case S_IFREG:
+ fprintf(stderr, "regular\n");
+ break;
+ case S_IFDIR:
+ fprintf(stderr, "directory\n");
+ break;
+ case S_IFLNK:
+ fprintf(stderr, "link\n");
+ break;
+ case S_IFCHR:
+ fprintf(stderr, "character device\n");
+ break;
+ case S_IFBLK:
+ fprintf(stderr, "block device\n");
+ break;
+ case S_IFIFO:
+ fprintf(stderr, "fifo\n");
+ break;
+ case S_IFSOCK:
+ fprintf(stderr, "sock\n");
+ break;
+ default:
+ break;
+ }
+
+ err = get_file_compressed_size(&inode, &size);
+ if (err) {
+ erofs_err("get file size failed\n");
+ return;
+ }
+
+ fprintf(stderr, " File original size: %lu\n"
+ " File on-disk size: %lu\n",
+ inode.i_size, size);
+ fprintf(stderr, " File compress rate: %.2f%%\n",
+ (double)(100 * size) / (double)(inode.i_size));
+
+ fprintf(stderr, " File datalayout: ");
+ switch (inode.datalayout) {
+ case EROFS_INODE_FLAT_PLAIN:
+ fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
+ break;
+ case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+ fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION_LEGACY\n");
+ break;
+ case EROFS_INODE_FLAT_INLINE:
+ fprintf(stderr, "EROFS_INODE_FLAT_INLINE\n");
+ break;
+ case EROFS_INODE_FLAT_COMPRESSION:
+ fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION\n");
+ break;
+ default:
+ break;
+ }
+
+ fprintf(stderr, " File create time: %s", ctime(&t));
+ fprintf(stderr, " File uid: %u\n", inode.i_uid);
+ fprintf(stderr, " File gid: %u\n", inode.i_gid);
+ fprintf(stderr, " File hard-link count: %u\n", inode.i_nlink);
+
+ err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
+ if (!err)
+ fprintf(stderr, " File path: %s\n", path);
+ else
+ fprintf(stderr, "Path not found\n");
+}
+
static int get_file_type(const char *filename)
{
char *postfix = strrchr(filename, '.');
@@ -611,6 +807,8 @@ int main(int argc, char **argv)
if (dumpcfg.print_statistic)
dumpfs_print_statistic();
+ if (dumpcfg.print_inode)
+ dumpfs_print_inode();
return 0;
}
--
2.25.4
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number
2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
@ 2021-09-11 16:25 ` Gao Xiang
0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:25 UTC (permalink / raw)
To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet
On Sat, Sep 11, 2021 at 09:46:34PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
>
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
> dump/main.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 200 insertions(+), 2 deletions(-)
>
> diff --git a/dump/main.c b/dump/main.c
> index b0acc0b..2389cef 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -19,8 +19,10 @@
>
> struct dumpcfg {
> bool print_superblock;
> + bool print_inode;
> bool print_statistic;
> bool print_version;
> + u64 ino;
> };
> static struct dumpcfg dumpcfg;
>
> @@ -100,8 +102,9 @@ static void usage(void)
> {
> fputs("usage: [options] erofs-image \n\n"
> "Dump erofs layout from erofs-image, and [options] are:\n"
> - "-s print information about superblock\n"
> - "-S print statistic information of the erofs-image\n"
> + "-s print information about superblock\n"
> + "-S print statistic information of the erofs-image\n"
> + "-i # print target # inode info\n"
> "-v/-V print dump.erofs version info\n"
> "-h/--help display this help and exit\n", stderr);
> }
> @@ -113,6 +116,7 @@ static void dumpfs_print_version(void)
> static int dumpfs_parse_options_cfg(int argc, char **argv)
> {
> int opt;
> + u64 i;
>
> while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
> long_options, NULL)) != -1) {
> @@ -127,6 +131,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> case 'V':
> dumpfs_print_version();
> exit(0);
> + case 'i':
> + i = atoll(optarg);
> + dumpcfg.print_inode = true;
> + dumpcfg.ino = i;
> + break;
> case 'h':
> case 1:
> usage();
> @@ -293,6 +302,193 @@ static void dumpfs_print_superblock(void)
>
> }
>
> +static int get_path_by_nid(erofs_nid_t nid, erofs_nid_t parent_nid,
> + erofs_nid_t target, char *path, unsigned int pos)
Can we refactor it as a transversal function (together with a function
in the previous patch)? Also, how to resolve hard links?
> +{
> + int err;
> + struct erofs_inode inode = {.nid = nid};
> + erofs_off_t offset;
> + char buf[EROFS_BLKSIZ];
> +
> + path[pos++] = '/';
> + if (target == sbi.root_nid)
> + return 0;
> +
> + err = erofs_read_inode_from_disk(&inode);
> + if (err) {
> + erofs_err("read inode %lu failed", nid);
> + return err;
> + }
> +
> + offset = 0;
> + while (offset < inode.i_size) {
> + erofs_off_t maxsize = min_t(erofs_off_t,
> + inode.i_size - offset, EROFS_BLKSIZ);
> + struct erofs_dirent *de = (void *)buf;
> + struct erofs_dirent *end;
> + unsigned int nameoff;
> +
> + err = erofs_pread(&inode, buf, maxsize, offset);
> + if (err)
> + return err;
> +
> + nameoff = le16_to_cpu(de->nameoff);
> + if (nameoff < sizeof(struct erofs_dirent) ||
> + nameoff >= PAGE_SIZE) {
> + erofs_err("invalid de[0].nameoff %u @ nid %llu",
> + nameoff, nid | 0ULL);
> + return -EFSCORRUPTED;
> + }
> +
> + end = (void *)buf + nameoff;
> + while (de < end) {
> + const char *dname;
> + unsigned int dname_len;
> +
> + nameoff = le16_to_cpu(de->nameoff);
> + dname = (char *)buf + nameoff;
> + if (de + 1 >= end)
> + dname_len = strnlen(dname, maxsize - nameoff);
> + else
> + dname_len = le16_to_cpu(de[1].nameoff)
> + - nameoff;
> +
> + /* a corrupted entry is found */
> + if (nameoff + dname_len > maxsize ||
> + dname_len > EROFS_NAME_LEN) {
> + erofs_err("bogus dirent @ nid %llu",
> + le64_to_cpu(de->nid) | 0ULL);
> + DBG_BUGON(1);
> + return -EFSCORRUPTED;
> + }
> +
> + if (de->nid == target) {
> + memcpy(path + pos, dname, dname_len);
> + return 0;
> + }
> +
> + if (de->file_type == EROFS_FT_DIR &&
> + de->nid != parent_nid &&
> + de->nid != nid) {
> + memcpy(path + pos, dname, dname_len);
> + err = get_path_by_nid(de->nid, nid,
> + target, path, pos + dname_len);
> + if (!err)
> + return 0;
> + memset(path + pos, 0, dname_len);
> + }
> + ++de;
> + }
> + offset += maxsize;
> + }
> + return -1;
> +}
> +
> +static void dumpfs_print_inode(void)
> +{
> + int err;
> + erofs_off_t size;
> + erofs_nid_t nid = dumpcfg.ino;
> + struct erofs_inode inode = {.nid = nid};
> + char path[PATH_MAX + 1] = {0};
> + time_t t = inode.i_ctime;
> +
> + err = erofs_read_inode_from_disk(&inode);
> + if (err) {
> + erofs_err("read inode %lu from disk failed", nid);
> + return;
> + }
> +
> + fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
> + switch (inode.inode_isize) {
> + case 32:
> + fprintf(stderr, " File inode is compacted layout\n");
It's enough to print "Inode core size: 32/64."
> + break;
> + case 64:
> + fprintf(stderr, " File inode is extended layout\n");
> + break;
> + default:
> + erofs_err("unsupported inode layout\n");
> + }
> + fprintf(stderr, " File size: %lu\n",
> + inode.i_size);
> + fprintf(stderr, " File nid: %lu\n",
> + inode.nid);
> + fprintf(stderr, " File extent size: %u\n",
> + inode.extent_isize);
> + fprintf(stderr, " File xattr size: %u\n",
> + inode.xattr_isize);
> + fprintf(stderr, " File inode size: %u\n",
> + inode.inode_isize);
> + fprintf(stderr, " File type: ");
> + switch (inode.i_mode & S_IFMT) {
> + case S_IFREG:
> + fprintf(stderr, "regular\n");
> + break;
> + case S_IFDIR:
> + fprintf(stderr, "directory\n");
> + break;
> + case S_IFLNK:
> + fprintf(stderr, "link\n");
> + break;
> + case S_IFCHR:
> + fprintf(stderr, "character device\n");
> + break;
> + case S_IFBLK:
> + fprintf(stderr, "block device\n");
> + break;
> + case S_IFIFO:
> + fprintf(stderr, "fifo\n");
> + break;
> + case S_IFSOCK:
> + fprintf(stderr, "sock\n");
> + break;
> + default:
> + break;
> + }
> +
> + err = get_file_compressed_size(&inode, &size);
> + if (err) {
> + erofs_err("get file size failed\n");
> + return;
> + }
> +
> + fprintf(stderr, " File original size: %lu\n"
> + " File on-disk size: %lu\n",
> + inode.i_size, size);
> + fprintf(stderr, " File compress rate: %.2f%%\n",
> + (double)(100 * size) / (double)(inode.i_size));
I think we could use "compressed blocks" instead...
> +
> + fprintf(stderr, " File datalayout: ");
> + switch (inode.datalayout) {
> + case EROFS_INODE_FLAT_PLAIN:
> + fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
> + break;
> + case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> + fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION_LEGACY\n");
> + break;
> + case EROFS_INODE_FLAT_INLINE:
> + fprintf(stderr, "EROFS_INODE_FLAT_INLINE\n");
> + break;
> + case EROFS_INODE_FLAT_COMPRESSION:
> + fprintf(stderr, "EROFS_INODE_FLAT_COMPRESSION\n");
> + break;
Just using a number is fine, since there could be some new types in the
future (also I'd like to rename EROFS_INODE_FLAT_COMPRESSION_LEGACY later.)
> + default:
> + break;
> + }
> +
> + fprintf(stderr, " File create time: %s", ctime(&t));
> + fprintf(stderr, " File uid: %u\n", inode.i_uid);
> + fprintf(stderr, " File gid: %u\n", inode.i_gid);
Lack of Access mode.
> + fprintf(stderr, " File hard-link count: %u\n", inode.i_nlink);
Anyway...How about just using "stat" likewise style and add more fields?
File: erofs.rst
Size: 14035 Blocks: 32 IO Block: 4096 regular file
Device: 10303h/66307d Inode: 7120988 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 1000/hsiangkao) Gid: ( 1000/hsiangkao)
Access: 2021-09-11 00:42:02.748083341 +0800
Modify: 2021-09-03 02:54:32.188031546 +0800
Change: 2021-09-03 02:54:32.188031546 +0800
Birth: -
Thanks,
Gao Xiang
> +
> + err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> + if (!err)
> + fprintf(stderr, " File path: %s\n", path);
> + else
> + fprintf(stderr, "Path not found\n");
> +}
> +
> static int get_file_type(const char *filename)
> {
> char *postfix = strrchr(filename, '.');
> @@ -611,6 +807,8 @@ int main(int argc, char **argv)
> if (dumpcfg.print_statistic)
> dumpfs_print_statistic();
>
> + if (dumpcfg.print_inode)
> + dumpfs_print_inode();
>
> return 0;
> }
> --
> 2.25.4
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
` (2 preceding siblings ...)
2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
@ 2021-09-11 13:46 ` Guo Xuenan
2021-09-11 16:29 ` Gao Xiang
2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang
4 siblings, 1 reply; 13+ messages in thread
From: Guo Xuenan @ 2021-09-11 13:46 UTC (permalink / raw)
To: xiang, linux-erofs; +Cc: mpiglet
From: mpiglet <mpiglet@outlook.com>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Signed-off-by: mpiglet <mpiglet@outlook.com>
---
dump/main.c | 108 +++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 91 insertions(+), 17 deletions(-)
diff --git a/dump/main.c b/dump/main.c
index 2389cef..efce309 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -21,8 +21,10 @@ struct dumpcfg {
bool print_superblock;
bool print_inode;
bool print_statistic;
+ bool print_inode_phy;
bool print_version;
u64 ino;
+ u64 ino_phy;
};
static struct dumpcfg dumpcfg;
@@ -105,6 +107,7 @@ static void usage(void)
"-s print information about superblock\n"
"-S print statistic information of the erofs-image\n"
"-i # print target # inode info\n"
+ "-I # print target # inode on-disk info\n"
"-v/-V print dump.erofs version info\n"
"-h/--help display this help and exit\n", stderr);
}
@@ -136,6 +139,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
dumpcfg.print_inode = true;
dumpcfg.ino = i;
break;
+ case 'I':
+ i = atoll(optarg);
+ dumpcfg.print_inode_phy = true;
+ dumpcfg.ino_phy = i;
+ break;
case 'h':
case 1:
usage();
@@ -402,25 +410,25 @@ static void dumpfs_print_inode(void)
fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
switch (inode.inode_isize) {
case 32:
- fprintf(stderr, " File inode is compacted layout\n");
+ fprintf(stderr, "File inode is compacted layout\n");
break;
case 64:
- fprintf(stderr, " File inode is extended layout\n");
+ fprintf(stderr, "File inode is extended layout\n");
break;
default:
erofs_err("unsupported inode layout\n");
}
- fprintf(stderr, " File size: %lu\n",
+ fprintf(stderr, "File size: %lu\n",
inode.i_size);
- fprintf(stderr, " File nid: %lu\n",
+ fprintf(stderr, "File nid: %lu\n",
inode.nid);
- fprintf(stderr, " File extent size: %u\n",
+ fprintf(stderr, "File extent size: %u\n",
inode.extent_isize);
- fprintf(stderr, " File xattr size: %u\n",
+ fprintf(stderr, "File xattr size: %u\n",
inode.xattr_isize);
- fprintf(stderr, " File inode size: %u\n",
+ fprintf(stderr, "File inode size: %u\n",
inode.inode_isize);
- fprintf(stderr, " File type: ");
+ fprintf(stderr, "File type: ");
switch (inode.i_mode & S_IFMT) {
case S_IFREG:
fprintf(stderr, "regular\n");
@@ -453,13 +461,13 @@ static void dumpfs_print_inode(void)
return;
}
- fprintf(stderr, " File original size: %lu\n"
- " File on-disk size: %lu\n",
+ fprintf(stderr, "File original size: %lu\n"
+ "File on-disk size: %lu\n",
inode.i_size, size);
- fprintf(stderr, " File compress rate: %.2f%%\n",
+ fprintf(stderr, "File compress rate: %.2f%%\n",
(double)(100 * size) / (double)(inode.i_size));
- fprintf(stderr, " File datalayout: ");
+ fprintf(stderr, "File datalayout: ");
switch (inode.datalayout) {
case EROFS_INODE_FLAT_PLAIN:
fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
@@ -477,18 +485,82 @@ static void dumpfs_print_inode(void)
break;
}
- fprintf(stderr, " File create time: %s", ctime(&t));
- fprintf(stderr, " File uid: %u\n", inode.i_uid);
- fprintf(stderr, " File gid: %u\n", inode.i_gid);
- fprintf(stderr, " File hard-link count: %u\n", inode.i_nlink);
+ fprintf(stderr, "File create time: %s", ctime(&t));
+ fprintf(stderr, "File uid: %u\n", inode.i_uid);
+ fprintf(stderr, "File gid: %u\n", inode.i_gid);
+ fprintf(stderr, "File hard-link count: %u\n", inode.i_nlink);
err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
if (!err)
- fprintf(stderr, " File path: %s\n", path);
+ fprintf(stderr, "File path: %s\n", path);
else
fprintf(stderr, "Path not found\n");
}
+static void dumpfs_print_inode_phy(void)
+{
+ int err;
+ erofs_nid_t nid = dumpcfg.ino_phy;
+ struct erofs_inode inode = {.nid = nid};
+ char path[PATH_MAX + 1] = {0};
+
+ err = erofs_read_inode_from_disk(&inode);
+ if (err < 0) {
+ erofs_err("read inode %lu from disk failed", nid);
+ return;
+ }
+
+ const erofs_off_t ibase = iloc(inode.nid);
+ const erofs_off_t pos = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(
+ ibase + inode.inode_isize + inode.xattr_isize);
+ erofs_blk_t blocks = inode.u.i_blocks;
+ erofs_blk_t start = 0;
+ erofs_blk_t end = 0;
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ .m_la = 0,
+ };
+
+ fprintf(stderr, "Inode %lu on-disk info:\n", nid);
+ switch (inode.datalayout) {
+ case EROFS_INODE_FLAT_INLINE:
+ case EROFS_INODE_FLAT_PLAIN:
+ if (inode.u.i_blkaddr == NULL_ADDR)
+ start = end = erofs_blknr(pos);
+ else {
+ start = inode.u.i_blkaddr;
+ end = start + BLK_ROUND_UP(inode.i_size) - 1;
+ }
+ fprintf(stderr, "File size: %lu\n",
+ inode.i_size);
+ fprintf(stderr,
+ " Plain Block Address: %u - %u\n",
+ start, end);
+ break;
+
+ case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+ case EROFS_INODE_FLAT_COMPRESSION:
+ err = z_erofs_map_blocks_iter(&inode, &map);
+ if (err)
+ erofs_err("get file blocks range failed");
+
+ start = erofs_blknr(map.m_pa);
+ end = start - 1 + blocks;
+ fprintf(stderr,
+ " Compressed Block Address: %u - %u\n",
+ start, end);
+ break;
+ }
+
+ err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
+ if (!err)
+ fprintf(stderr, "File Path: %s\n",
+ path);
+ else
+ erofs_err("path not found");
+}
+
+
static int get_file_type(const char *filename)
{
char *postfix = strrchr(filename, '.');
@@ -810,5 +882,7 @@ int main(int argc, char **argv)
if (dumpcfg.print_inode)
dumpfs_print_inode();
+ if (dumpcfg.print_inode_phy)
+ dumpfs_print_inode_phy();
return 0;
}
--
2.25.4
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk
2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
@ 2021-09-11 16:29 ` Gao Xiang
0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 16:29 UTC (permalink / raw)
To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet
On Sat, Sep 11, 2021 at 09:46:35PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
>
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
> ---
> dump/main.c | 108 +++++++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 91 insertions(+), 17 deletions(-)
>
> diff --git a/dump/main.c b/dump/main.c
> index 2389cef..efce309 100644
> --- a/dump/main.c
> +++ b/dump/main.c
> @@ -21,8 +21,10 @@ struct dumpcfg {
> bool print_superblock;
> bool print_inode;
> bool print_statistic;
> + bool print_inode_phy;
> bool print_version;
> u64 ino;
> + u64 ino_phy;
> };
> static struct dumpcfg dumpcfg;
>
> @@ -105,6 +107,7 @@ static void usage(void)
> "-s print information about superblock\n"
> "-S print statistic information of the erofs-image\n"
> "-i # print target # inode info\n"
> + "-I # print target # inode on-disk info\n"
> "-v/-V print dump.erofs version info\n"
> "-h/--help display this help and exit\n", stderr);
> }
> @@ -136,6 +139,11 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> dumpcfg.print_inode = true;
> dumpcfg.ino = i;
> break;
> + case 'I':
> + i = atoll(optarg);
> + dumpcfg.print_inode_phy = true;
> + dumpcfg.ino_phy = i;
> + break;
> case 'h':
> case 1:
> usage();
> @@ -402,25 +410,25 @@ static void dumpfs_print_inode(void)
> fprintf(stderr, "Inode %lu info:\n", dumpcfg.ino);
> switch (inode.inode_isize) {
> case 32:
> - fprintf(stderr, " File inode is compacted layout\n");
> + fprintf(stderr, "File inode is compacted layout\n");
> break;
> case 64:
> - fprintf(stderr, " File inode is extended layout\n");
> + fprintf(stderr, "File inode is extended layout\n");
> break;
> default:
> erofs_err("unsupported inode layout\n");
> }
> - fprintf(stderr, " File size: %lu\n",
> + fprintf(stderr, "File size: %lu\n",
How about folding these in the previous patch?
> inode.i_size);
> - fprintf(stderr, " File nid: %lu\n",
> + fprintf(stderr, "File nid: %lu\n",
> inode.nid);
> - fprintf(stderr, " File extent size: %u\n",
> + fprintf(stderr, "File extent size: %u\n",
> inode.extent_isize);
> - fprintf(stderr, " File xattr size: %u\n",
> + fprintf(stderr, "File xattr size: %u\n",
> inode.xattr_isize);
> - fprintf(stderr, " File inode size: %u\n",
> + fprintf(stderr, "File inode size: %u\n",
> inode.inode_isize);
> - fprintf(stderr, " File type: ");
> + fprintf(stderr, "File type: ");
> switch (inode.i_mode & S_IFMT) {
> case S_IFREG:
> fprintf(stderr, "regular\n");
> @@ -453,13 +461,13 @@ static void dumpfs_print_inode(void)
> return;
> }
>
> - fprintf(stderr, " File original size: %lu\n"
> - " File on-disk size: %lu\n",
> + fprintf(stderr, "File original size: %lu\n"
> + "File on-disk size: %lu\n",
> inode.i_size, size);
> - fprintf(stderr, " File compress rate: %.2f%%\n",
> + fprintf(stderr, "File compress rate: %.2f%%\n",
> (double)(100 * size) / (double)(inode.i_size));
>
> - fprintf(stderr, " File datalayout: ");
> + fprintf(stderr, "File datalayout: ");
> switch (inode.datalayout) {
> case EROFS_INODE_FLAT_PLAIN:
> fprintf(stderr, "EROFS_INODE_FLAT_PLAIN\n");
> @@ -477,18 +485,82 @@ static void dumpfs_print_inode(void)
> break;
> }
>
> - fprintf(stderr, " File create time: %s", ctime(&t));
> - fprintf(stderr, " File uid: %u\n", inode.i_uid);
> - fprintf(stderr, " File gid: %u\n", inode.i_gid);
> - fprintf(stderr, " File hard-link count: %u\n", inode.i_nlink);
> + fprintf(stderr, "File create time: %s", ctime(&t));
> + fprintf(stderr, "File uid: %u\n", inode.i_uid);
> + fprintf(stderr, "File gid: %u\n", inode.i_gid);
> + fprintf(stderr, "File hard-link count: %u\n", inode.i_nlink);
>
> err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> if (!err)
> - fprintf(stderr, " File path: %s\n", path);
> + fprintf(stderr, "File path: %s\n", path);
> else
> fprintf(stderr, "Path not found\n");
> }
>
> +static void dumpfs_print_inode_phy(void)
> +{
> + int err;
> + erofs_nid_t nid = dumpcfg.ino_phy;
> + struct erofs_inode inode = {.nid = nid};
> + char path[PATH_MAX + 1] = {0};
> +
> + err = erofs_read_inode_from_disk(&inode);
> + if (err < 0) {
> + erofs_err("read inode %lu from disk failed", nid);
> + return;
> + }
> +
> + const erofs_off_t ibase = iloc(inode.nid);
> + const erofs_off_t pos = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(
> + ibase + inode.inode_isize + inode.xattr_isize);
> + erofs_blk_t blocks = inode.u.i_blocks;
> + erofs_blk_t start = 0;
> + erofs_blk_t end = 0;
> + struct erofs_map_blocks map = {
> + .index = UINT_MAX,
> + .m_la = 0,
> + };
> +
> + fprintf(stderr, "Inode %lu on-disk info:\n", nid);
> + switch (inode.datalayout) {
> + case EROFS_INODE_FLAT_INLINE:
> + case EROFS_INODE_FLAT_PLAIN:
> + if (inode.u.i_blkaddr == NULL_ADDR)
> + start = end = erofs_blknr(pos);
> + else {
> + start = inode.u.i_blkaddr;
> + end = start + BLK_ROUND_UP(inode.i_size) - 1;
> + }
> + fprintf(stderr, "File size: %lu\n",
> + inode.i_size);
> + fprintf(stderr,
> + " Plain Block Address: %u - %u\n",
> + start, end);
> + break;
> +
> + case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
> + case EROFS_INODE_FLAT_COMPRESSION:
> + err = z_erofs_map_blocks_iter(&inode, &map);
> + if (err)
> + erofs_err("get file blocks range failed");
> +
> + start = erofs_blknr(map.m_pa);
> + end = start - 1 + blocks;
> + fprintf(stderr,
> + " Compressed Block Address: %u - %u\n",
> + start, end);
How about porting/using fiemap code directly instead?
Thanks,
Gao Xiang
> + break;
> + }
> +
> + err = get_path_by_nid(sbi.root_nid, sbi.root_nid, nid, path, 0);
> + if (!err)
> + fprintf(stderr, "File Path: %s\n",
> + path);
> + else
> + erofs_err("path not found");
> +}
> +
> +
> static int get_file_type(const char *filename)
> {
> char *postfix = strrchr(filename, '.');
> @@ -810,5 +882,7 @@ int main(int argc, char **argv)
> if (dumpcfg.print_inode)
> dumpfs_print_inode();
>
> + if (dumpcfg.print_inode_phy)
> + dumpfs_print_inode_phy();
> return 0;
> }
> --
> 2.25.4
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
` (3 preceding siblings ...)
2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
@ 2021-09-11 15:45 ` Gao Xiang
4 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-09-11 15:45 UTC (permalink / raw)
To: Guo Xuenan; +Cc: xiang, linux-erofs, mpiglet
Hi Xuenan,
Thanks for working on dump.erofs! Such functionality was recently
requested by some other folks, it's quite helpful to be resolved
upstream.
Some comments in-line:
On Sat, Sep 11, 2021 at 09:46:31PM +0800, Guo Xuenan wrote:
> From: mpiglet <mpiglet@outlook.com>
mpiglet => "Wang Qi" (according to the name in the source header)
It'd be better to use the real name if possible. ;)
>
> Add dump-tool for erofs to facilitate users directly
> analyzing the erofs image file.
>
> Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> Signed-off-by: mpiglet <mpiglet@outlook.com>
Same here.
> ---
> Makefile.am | 2 +-
> configure.ac | 2 ++
> dump/Makefile.am | 10 ++++++
> dump/main.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
> include/erofs/io.h | 3 ++
> lib/namei.c | 4 +--
> 6 files changed, 102 insertions(+), 3 deletions(-)
> create mode 100644 dump/Makefile.am
> create mode 100644 dump/main.c
>
> diff --git a/Makefile.am b/Makefile.am
> index b804aa9..fedf7b5 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -3,7 +3,7 @@
>
> ACLOCAL_AMFLAGS = -I m4
>
> -SUBDIRS = man lib mkfs
> +SUBDIRS = man lib mkfs dump
> if ENABLE_FUSE
> SUBDIRS += fuse
> endif
> diff --git a/configure.ac b/configure.ac
> index f626064..f4fe548 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -280,6 +280,8 @@ AC_CONFIG_FILES([Makefile
> man/Makefile
> lib/Makefile
> mkfs/Makefile
> + dump/Makefile
> fuse/Makefile])
> +
> AC_OUTPUT
>
> diff --git a/dump/Makefile.am b/dump/Makefile.am
> new file mode 100644
> index 0000000..e664799
> --- /dev/null
> +++ b/dump/Makefile.am
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0+
> +# Makefile.am
> +
> +AUTOMAKE_OPTIONS = foreign
> +bin_PROGRAMS = dump.erofs
> +AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}
Do we really need uuid and selinux libraries for dump.erofs?
> +dump_erofs_SOURCES = main.c
> +dump_erofs_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
> +dump_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} ${liblz4_LIBS}
Same here.
> +
> diff --git a/dump/main.c b/dump/main.c
> new file mode 100644
> index 0000000..8fbc24a
> --- /dev/null
> +++ b/dump/main.c
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * dump/main.c
It could cause some u-boot checkpatch problem...
It'd be better to get rid of the path.
> + *
> + * Copyright (C) 2021-2022 HUAWEI, Inc.
> + * http://www.huawei.com/
> + * Created by Wang Qi <mpiglet@outlook.com>
> + * Guo Xuenan <guoxuenan@huawei.com>
> + */
> +
> +#include <stdlib.h>
> +#include <getopt.h>
> +#include <sys/sysmacros.h>
> +#include <time.h>
> +#include <lz4.h>
> +
> +#include "erofs/print.h"
> +#include "erofs/io.h"
> +
> +static struct option long_options[] = {
> + {"help", no_argument, 0, 1},
> + {0, 0, 0, 0},
> +};
> +
> +static void usage(void)
> +{
> + fputs("usage: [options] erofs-image \n\n"
> + "Dump erofs layout from erofs-image, and [options] are:\n"
> + "-v/-V print dump.erofs version info\n"
How about leaving only one argument here.
It'd be better to keep in sync with dumpe2fs, so:
https://www.man7.org/linux/man-pages/man8/dumpe2fs.8.html
-V print the version number of dump.erofs and exit.
> + "-h/--help display this help and exit\n", stderr);
-h was used by dumpe2fs, so how about leaving --help only here?
> +}
> +static void dumpfs_print_version(void)
> +{
> + fprintf(stderr, "dump.erofs %s\n", cfg.c_version);
> +}
> +
> +static int dumpfs_parse_options_cfg(int argc, char **argv)
> +{
> + int opt;
> +
> + while ((opt = getopt_long(argc, argv, "sSvVi:I:h",
It seems that not all options are used in this patch.
Also, it would be better to sort them all in the alphabetical order.
> + long_options, NULL)) != -1) {
> + switch (opt) {
> + case 'v':
> + case 'V':
> + dumpfs_print_version();
> + exit(0);
> + case 'h':
> + case 1:
> + usage();
> + exit(0);
> + default: /* '?' */
> + return -EINVAL;
> + }
> + }
> +
> + if (optind >= argc)
> + return -EINVAL;
> +
> + cfg.c_img_path = strdup(argv[optind++]);
> + if (!cfg.c_img_path)
> + return -ENOMEM;
> +
> + if (optind < argc) {
> + erofs_err("unexpected argument: %s\n", argv[optind]);
minor nit: memory leak of c_img_path?
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +int main(int argc, char **argv)
> +{
> + int err = 0;
> +
> + erofs_init_configure();
> + err = dumpfs_parse_options_cfg(argc, argv);
> + if (err) {
> + if (err == -EINVAL)
> + usage();
> + return -1;
> + }
> +
> + return 0;
> +}
> diff --git a/include/erofs/io.h b/include/erofs/io.h
> index 5574245..00e5de8 100644
> --- a/include/erofs/io.h
> +++ b/include/erofs/io.h
> @@ -10,6 +10,7 @@
> #define __EROFS_IO_H
>
> #include <unistd.h>
> +#include <sys/types.h>
How about removing "#include <sys/types.h>" in lib/namei.c?
Thanks,
Gao Xiang
> #include "internal.h"
>
> #ifndef O_BINARY
> @@ -25,6 +26,8 @@ int dev_fillzero(u64 offset, size_t len, bool padding);
> int dev_fsync(void);
> int dev_resize(erofs_blk_t nblocks);
> u64 dev_length(void);
> +dev_t erofs_new_decode_dev(u32 dev);
> +int erofs_read_inode_from_disk(struct erofs_inode *vi);
>
> static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
> u32 nblocks)
> diff --git a/lib/namei.c b/lib/namei.c
> index 4e06ba4..21631f1 100644
> --- a/lib/namei.c
> +++ b/lib/namei.c
> @@ -15,7 +15,7 @@
> #include "erofs/print.h"
> #include "erofs/io.h"
>
> -static dev_t erofs_new_decode_dev(u32 dev)
> +dev_t erofs_new_decode_dev(u32 dev)
> {
> const unsigned int major = (dev & 0xfff00) >> 8;
> const unsigned int minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
> @@ -23,7 +23,7 @@ static dev_t erofs_new_decode_dev(u32 dev)
> return makedev(major, minor);
> }
>
> -static int erofs_read_inode_from_disk(struct erofs_inode *vi)
> +int erofs_read_inode_from_disk(struct erofs_inode *vi)
> {
> int ret, ifmt;
> char buf[sizeof(struct erofs_inode_extended)];
> --
> 2.25.4
>
^ permalink raw reply [flat|nested] 13+ messages in thread