linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Gao Xiang <hsiangkao@linux.alibaba.com>
To: Huang Jianan <huangjianan@oppo.com>
Cc: linux-erofs@lists.ozlabs.org, mpiglet@outlook.com
Subject: Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
Date: Mon, 13 Sep 2021 20:46:51 +0800	[thread overview]
Message-ID: <YT9IO9mZpVRkjPnd@B-P7TQMD6M-0146.local> (raw)
In-Reply-To: <7790736c-0aeb-ca52-af44-cc72e168ed0f@oppo.com>

On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
> 在 2021/9/12 0:13, Gao Xiang 写道:
> > (+Cc Jianan.)
> > 
> > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> > > From: mpiglet <mpiglet@outlook.com>
> > > 
> > > Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
> > > Signed-off-by: mpiglet <mpiglet@outlook.com>
> > > ---
> > >   dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >   1 file changed, 474 insertions(+)
> > > 
> > > diff --git a/dump/main.c b/dump/main.c
> > > index 25ac89f..b0acc0b 100644
> > > --- a/dump/main.c
> > > +++ b/dump/main.c
> > > @@ -19,10 +19,78 @@
> > >   struct dumpcfg {
> > >   	bool print_superblock;
> > > +	bool print_statistic;
> > >   	bool print_version;
> > >   };
> > >   static struct dumpcfg dumpcfg;
> > > +static const char chart_format[] = "%-16s	%-11d %8.2f%% |%-50s|\n";
> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> > > +static char *file_types[] = {
> > > +	".so",
> > > +	".png",
> > > +	".jpg",
> > > +	".xml",
> > > +	".html",
> > > +	".odex",
> > > +	".vdex",
> > > +	".apk",
> > > +	".ttf",
> > > +	".jar",
> > > +	".json",
> > > +	".ogg",
> > > +	".oat",
> > > +	".art",
> > > +	".rc",
> > > +	".otf",
> > > +	".txt",
> > > +	"others",
> > > +};
> > > +enum {
> > > +	SOFILETYPE = 0,
> > > +	PNGFILETYPE,
> > > +	JPEGFILETYPE,
> > > +	XMLFILETYPE,
> > > +	HTMLFILETYPE,
> > > +	ODEXFILETYPE,
> > > +	VDEXFILETYPE,
> > > +	APKFILETYPE,
> > > +	TTFFILETYPE,
> > > +	JARFILETYPE,
> > > +	JSONFILETYPE,
> > > +	OGGFILETYPE,
> > > +	OATFILETYPE,
> > > +	ARTFILETYPE,
> > > +	RCFILETYPE,
> > > +	OTFFILETYPE,
> > > +	TXTFILETYPE,
> > > +	OTHERFILETYPE,
> > > +};
> > Why we need enums here? Can these be resolved with some array index?
> > 
> > > +
> > > +#define	FILE_SIZE_BITS	30
> > > +struct statistics {
> > > +	unsigned long blocks;
> > > +	unsigned long files;
> > > +	unsigned long files_total_size;
> > > +	unsigned long files_total_origin_size;
> > > +	double compress_rate;
> > > +	unsigned long compressed_files;
> > > +	unsigned long uncompressed_files;
> > > +
> > > +	unsigned long regular_files;
> > > +	unsigned long dir_files;
> > > +	unsigned long chardev_files;
> > > +	unsigned long blkdev_files;
> > > +	unsigned long fifo_files;
> > > +	unsigned long sock_files;
> > > +	unsigned long symlink_files;
> > > +
> > > +	unsigned int file_type_stat[OTHERFILETYPE + 1];
> > > +	unsigned int file_org_size[FILE_SIZE_BITS];
> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
> > 
> > > +	unsigned int file_comp_size[FILE_SIZE_BITS];
> > > +};
> > > +static struct statistics stats;
> > > +
> > >   static struct option long_options[] = {
> > >   	{"help", no_argument, 0, 1},
> > >   	{0, 0, 0, 0},
> > > @@ -33,6 +101,7 @@ static void usage(void)
> > >   	fputs("usage: [options] erofs-image \n\n"
> > >   		"Dump erofs layout from erofs-image, and [options] are:\n"
> > >   		"-s          print information about superblock\n"
> > > +		"-S      print statistic information of the erofs-image\n"
> > >   		"-v/-V      print dump.erofs version info\n"
> > >   		"-h/--help  display this help and exit\n", stderr);
> > >   }
> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > >   		case 's':
> > >   			dumpcfg.print_superblock = true;
> > >   			break;
> > > +		case 'S':
> > > +			dumpcfg.print_statistic = true;
> > > +			break;
> > >   		case 'v':
> > >   		case 'V':
> > >   			dumpfs_print_version();
> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > >   	return 0;
> > >   }
> > > +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> > > +		erofs_off_t last_cluster_size,
> > > +		erofs_off_t *last_cluster_compressed_size)
> > Hmmm... do we really need the exact compressed bytes?
> > or just compressed blocks is enough?
> > 
> > "compressed blocks" can be gotten in erofs inode.
> > 
> > Btw, although I think it's useful for fsck (check if an erofs is correct).
> > 
> > > +{
> > > +	int ret;
> > > +	int decomp_len;
> > > +	int compressed_len = 0;
> > > +	char *decompress;
> > > +	char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> > > +
> > > +	ret = dev_read(raw, map->m_pa, map->m_plen);
> > > +	if (ret < 0)
> > > +		return -EIO;
> > > +
> > > +	if (erofs_sb_has_lz4_0padding()) {
> > > +		compressed_len = map->m_plen;
> > > +	} else {
> > > +		// lz4 maximum compression ratio is 255
> > > +		decompress = (char *)malloc(map->m_plen * 255);
> > > +		if (!decompress) {
> > > +			erofs_err("allocate memory for decompress space failed");
> > > +			return -1;
> > > +		}
> > > +		decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> > > +				map->m_plen, last_cluster_size,
> > > +				map->m_plen * 10);
> > > +		if (decomp_len < 0) {
> > > +			erofs_err("decompress last cluster to get decompressed size failed");
> > > +			free(decompress);
> > > +			return -1;
> > > +		}
> > > +		compressed_len = LZ4_compress_destSize(decompress, raw,
> > > +				&decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> > > +		if (compressed_len < 0) {
> > > +			erofs_err("compress to get last extent size failed\n");
> > > +			free(decompress);
> > > +			return -1;
> > > +		}
> > > +		free(decompress);
> > > +		// dut to the use of lz4hc (can use different compress level),
> > > +		// our normal lz4 compress result may be bigger
> > > +		compressed_len = compressed_len < map->m_plen ?
> > > +			compressed_len : map->m_plen;
> > > +	}
> > > +
> > > +	*last_cluster_compressed_size = compressed_len;
> > > +	return 0;
> > > +}
> > > +
> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> > > +		erofs_off_t *size)
> > > +{
> > > +	int err;
> > > +	erofs_blk_t compressedlcs;
> > > +	erofs_off_t last_cluster_size;
> > > +	erofs_off_t last_cluster_compressed_size;
> > > +	struct erofs_map_blocks map = {
> > > +		.index = UINT_MAX,
> > > +		.m_la = inode->i_size - 1,
> > > +	};
> > > +
> > > +	err = z_erofs_map_blocks_iter(inode, &map);
> > (add Jianan here.)
> > 
> > Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> > some functionality to get the file distribution as well when the fs isn't
> > mounted?
> Hi Xiang,
> 
> I have sent the patch and verified it with a similar function. Better to use
> the
> new interface here.

Yeah, thanks for the patch:
https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/

Hopefully Xuenan could base on this work.

Thanks,
Gao XIang

> 
> Thanks,
> Jianan

  reply	other threads:[~2021-09-13 12:47 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-11 13:46 [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 2/5] dump.erofs: add "-s" option to dump superblock information Guo Xuenan
2021-09-11 15:58   ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem Guo Xuenan
2021-09-11 16:13   ` Gao Xiang
2021-09-13  4:30     ` Huang Jianan via Linux-erofs
2021-09-13 12:46       ` Gao Xiang [this message]
2021-09-14  2:31         ` Guo Xuenan
2021-09-11 13:46 ` [PATCH v1 4/5] dump.erofs: add -i options to dump file information of specific inode number Guo Xuenan
2021-09-11 16:25   ` Gao Xiang
2021-09-11 13:46 ` [PATCH v1 5/5] dump.erofs: add -I options to dump the layout of a particular inode on disk Guo Xuenan
2021-09-11 16:29   ` Gao Xiang
2021-09-11 15:45 ` [PATCH v1 1/5] erofs-utils: introduce dump.erofs for utils Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YT9IO9mZpVRkjPnd@B-P7TQMD6M-0146.local \
    --to=hsiangkao@linux.alibaba.com \
    --cc=huangjianan@oppo.com \
    --cc=linux-erofs@lists.ozlabs.org \
    --cc=mpiglet@outlook.com \
    --subject='Re: [PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).