linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] erofs-utils: support per-inode compress pcluster
@ 2021-08-16  9:40 Huang Jianan via Linux-erofs
  2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-08-16  9:40 UTC (permalink / raw)
  To: linux-erofs; +Cc: yh, kevin.liw, guoweichao, guanyuwei

Add an option to configure per-inode compression strategy. Each line
of the file should be in the following form:

<Regular-expression> <pcluster-in-bytes>

When pcluster is 0, it means that the file shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
 include/erofs/compress_rule.h |  25 ++++++++
 include/erofs/config.h        |   1 +
 include/erofs/internal.h      |   2 +
 lib/Makefile.am               |   5 +-
 lib/compress.c                |   4 ++
 lib/compress_rule.c           | 106 ++++++++++++++++++++++++++++++++++
 lib/inode.c                   |   7 +++
 man/mkfs.erofs.1              |   2 +
 mkfs/main.c                   |  31 +++++++---
 9 files changed, 172 insertions(+), 11 deletions(-)
 create mode 100644 include/erofs/compress_rule.h
 create mode 100644 lib/compress_rule.c

diff --git a/include/erofs/compress_rule.h b/include/erofs/compress_rule.h
new file mode 100644
index 0000000..2271ab6
--- /dev/null
+++ b/include/erofs/compress_rule.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs-utils/include/erofs/compress_rule.h
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_STRATEGY_H
+#define __EROFS_COMPRESS_STRATEGY_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_rule {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int c_pclusterblks;
+};
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode);
+int erofs_load_compress_rule();
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 8124f3b..50812c9 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -62,6 +62,7 @@ struct erofs_configure {
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	char *compress_rule_file;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 5583861..b9432f4 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -152,6 +152,8 @@ struct erofs_inode {
 	unsigned int xattr_isize;
 	unsigned int extent_isize;
 
+	unsigned int c_pclusterblks;
+
 	erofs_nid_t nid;
 	struct erofs_buffer_head *bh;
 	struct erofs_buffer_head *bh_inline, *bh_data;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b12e2c1..cab912d 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -17,11 +17,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_rule.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_rule.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index 40723a1..01f36d8 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -19,6 +19,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -158,6 +159,9 @@ static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_physical_clusterblks;
 #endif
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return cfg.c_physical_clusterblks;
 }
 
diff --git a/lib/compress_rule.c b/lib/compress_rule.c
new file mode 100644
index 0000000..4ff6205
--- /dev/null
+++ b/lib/compress_rule.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/compress_rule.c
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_rule.h"
+
+static LIST_HEAD(compress_rule_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_rule(const char *s, unsigned int blks)
+{
+	struct erofs_compress_rule *r;
+	int ret = 0;
+
+	r = malloc(sizeof(struct erofs_compress_rule));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err;
+	}
+	r->c_pclusterblks = blks;
+
+	list_add_tail(&r->list, &compress_rule_head);
+	erofs_info("insert compress rule %s: %u", s, blks);
+	return ret;
+
+err:
+	free(r);
+	return ret;
+}
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_rule *r;
+
+	if (inode->c_pclusterblks)
+		return inode->c_pclusterblks;
+
+	s = erofs_fspath(inode->i_srcpath);
+
+	list_for_each_entry(r, &compress_rule_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			inode->c_pclusterblks = r->c_pclusterblks;
+			return r->c_pclusterblks;
+		}
+		if (ret > REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+
+	inode->c_pclusterblks = cfg.c_physical_clusterblks;
+	return cfg.c_physical_clusterblks;
+}
+
+int erofs_load_compress_rule()
+{
+	char buf[PATH_MAX + 100];
+	FILE* f;
+	int ret = 0;
+
+	if (!cfg.compress_rule_file)
+		return 0;
+
+	f = fopen(cfg.compress_rule_file, "r");
+	if (f == NULL)
+		return -errno;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		char* line = buf;
+		char* s;
+		unsigned int blks;
+
+		s = strtok(line, " ");
+		blks = atoi(strtok(NULL, " "));
+		if (blks % EROFS_BLKSIZ) {
+			erofs_err("invalid physical clustersize %u", blks);
+			ret = -EINVAL;
+			goto out;
+		}
+		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
+	}
+
+out:
+	fclose(f);
+	return ret;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 6871d2b..174fb8a 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -329,6 +330,10 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return true;
 }
 
@@ -854,6 +859,8 @@ static struct erofs_inode *erofs_new_inode(void)
 	inode->xattr_isize = 0;
 	inode->extent_isize = 0;
 
+	inode->c_pclusterblks = 0;
+
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
 	return inode;
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d164fa5..42fb663 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,8 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.BI "\-\-compress-rule" file
+Specify a file to configure per-file compression strategy.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 10fe14d..467e875 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,11 +45,12 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-rule", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
-	{"mount-point", required_argument, NULL, 10},
-	{"product-out", required_argument, NULL, 11},
-	{"fs-config-file", required_argument, NULL, 12},
-	{"block-list-file", required_argument, NULL, 13},
+	{"mount-point", required_argument, NULL, 20},
+	{"product-out", required_argument, NULL, 21},
+	{"fs-config-file", required_argument, NULL, 22},
+	{"block-list-file", required_argument, NULL, 23},
 #endif
 	{0, 0, 0, 0},
 };
@@ -89,6 +91,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-rule=X     specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -97,7 +100,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -288,21 +291,24 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
-#ifdef WITH_ANDROID
 		case 10:
+			cfg.compress_rule_file = optarg;
+			break;
+#ifdef WITH_ANDROID
+		case 20:
 			cfg.mount_point = optarg;
 			/* all trailing '/' should be deleted */
 			opt = strlen(cfg.mount_point);
 			if (opt && optarg[opt - 1] == '/')
 				optarg[opt - 1] = '\0';
 			break;
-		case 11:
+		case 21:
 			cfg.target_out_path = optarg;
 			break;
-		case 12:
+		case 22:
 			cfg.fs_config_file = optarg;
 			break;
-		case 13:
+		case 23:
 			cfg.block_list_file = optarg;
 			break;
 #endif
@@ -587,6 +593,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_rule();
+	if (err) {
+		erofs_err("Failed to load compress rule %s",
+			  cfg.compress_rule_file);
+		goto exit;
+	}
+
 #ifdef HAVE_LIBUUID
 	uuid_unparse_lower(sbi.uuid, uuid_str);
 #endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2] erofs-utils: support per-inode compress pcluster
  2021-08-16  9:40 [PATCH] erofs-utils: support per-inode compress pcluster Huang Jianan via Linux-erofs
@ 2021-08-18  4:27 ` Huang Jianan via Linux-erofs
  2021-08-25  1:17   ` Gao Xiang
  2021-08-25  3:35   ` [PATCH v3] " Huang Jianan via Linux-erofs
  0 siblings, 2 replies; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-08-18  4:27 UTC (permalink / raw)
  To: linux-erofs; +Cc: yh, kevin.liw, guoweichao, guanyuwei

Add an option to configure per-inode compression strategy. Each line
of the file should be in the following form:

<Regular-expression> <pcluster-in-bytes>

When pcluster is 0, it means that the file shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
changes since v1:
 - rename c_pclusterblks to c_physical_clusterblks and place it in union
 - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster()
   since it's per-inode compression strategy

 include/erofs/compress_rule.h |  25 ++++++++
 include/erofs/config.h        |   1 +
 include/erofs/internal.h      |   1 +
 lib/Makefile.am               |   5 +-
 lib/compress.c                |  10 ++--
 lib/compress_rule.c           | 106 ++++++++++++++++++++++++++++++++++
 lib/compressor.h              |   1 -
 lib/inode.c                   |   6 ++
 man/mkfs.erofs.1              |   2 +
 mkfs/main.c                   |  31 +++++++---
 10 files changed, 172 insertions(+), 16 deletions(-)
 create mode 100644 include/erofs/compress_rule.h
 create mode 100644 lib/compress_rule.c

diff --git a/include/erofs/compress_rule.h b/include/erofs/compress_rule.h
new file mode 100644
index 0000000..8ad578b
--- /dev/null
+++ b/include/erofs/compress_rule.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs-utils/include/erofs/compress_rule.h
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_STRATEGY_H
+#define __EROFS_COMPRESS_STRATEGY_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_rule {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int c_physical_clusterblks;
+};
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode);
+int erofs_load_compress_rule();
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 8124f3b..50812c9 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -62,6 +62,7 @@ struct erofs_configure {
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	char *compress_rule_file;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 5583861..4da30b3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,6 +164,7 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
+			uint8_t  c_physical_clusterblks;
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b12e2c1..cab912d 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -17,11 +17,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_rule.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_rule.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index a8ebbc1..ea4a756 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -19,6 +19,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -91,8 +92,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 	}
 
 	do {
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
@@ -151,13 +151,15 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
-/* TODO: apply per-(sub)file strategies here */
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_physical_clusterblks;
 #endif
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return cfg.c_physical_clusterblks;
 }
 
@@ -496,7 +498,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	}
 
-	if (cfg.c_physical_clusterblks > 1) {
+	if (erofs_sb_has_big_pcluster()) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
diff --git a/lib/compress_rule.c b/lib/compress_rule.c
new file mode 100644
index 0000000..497d662
--- /dev/null
+++ b/lib/compress_rule.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/compress_rule.c
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_rule.h"
+
+static LIST_HEAD(compress_rule_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_rule(const char *s, unsigned int blks)
+{
+	struct erofs_compress_rule *r;
+	int ret = 0;
+
+	r = malloc(sizeof(struct erofs_compress_rule));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err;
+	}
+	r->c_physical_clusterblks = blks;
+
+	list_add_tail(&r->list, &compress_rule_head);
+	erofs_info("insert compress rule %s: %u", s, blks);
+	return ret;
+
+err:
+	free(r);
+	return ret;
+}
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_rule *r;
+
+	if (inode->c_physical_clusterblks)
+		return inode->c_physical_clusterblks;
+
+	s = erofs_fspath(inode->i_srcpath);
+
+	list_for_each_entry(r, &compress_rule_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			inode->c_physical_clusterblks = r->c_physical_clusterblks;
+			return r->c_physical_clusterblks;
+		}
+		if (ret > REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+
+	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
+	return cfg.c_physical_clusterblks;
+}
+
+int erofs_load_compress_rule()
+{
+	char buf[PATH_MAX + 100];
+	FILE* f;
+	int ret = 0;
+
+	if (!cfg.compress_rule_file)
+		return 0;
+
+	f = fopen(cfg.compress_rule_file, "r");
+	if (f == NULL)
+		return -errno;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		char* line = buf;
+		char* s;
+		unsigned int blks;
+
+		s = strtok(line, " ");
+		blks = atoi(strtok(NULL, " "));
+		if (blks % EROFS_BLKSIZ) {
+			erofs_err("invalid physical clustersize %u", blks);
+			ret = -EINVAL;
+			goto out;
+		}
+		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
+	}
+
+out:
+	fclose(f);
+	return ret;
+}
diff --git a/lib/compressor.h b/lib/compressor.h
index b2471c4..4b7b8c8 100644
--- a/lib/compressor.h
+++ b/lib/compressor.h
@@ -53,4 +53,3 @@ int erofs_compressor_init(struct erofs_compress *c, char *alg_name);
 int erofs_compressor_exit(struct erofs_compress *c);
 
 #endif
-
diff --git a/lib/inode.c b/lib/inode.c
index 6871d2b..ab23ee5 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -329,6 +330,10 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return true;
 }
 
@@ -856,6 +861,7 @@ static struct erofs_inode *erofs_new_inode(void)
 
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
+	inode->c_physical_clusterblks = 0;
 	return inode;
 }
 
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d164fa5..42fb663 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,8 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.BI "\-\-compress-rule" file
+Specify a file to configure per-file compression strategy.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 10fe14d..467e875 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,11 +45,12 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-rule", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
-	{"mount-point", required_argument, NULL, 10},
-	{"product-out", required_argument, NULL, 11},
-	{"fs-config-file", required_argument, NULL, 12},
-	{"block-list-file", required_argument, NULL, 13},
+	{"mount-point", required_argument, NULL, 20},
+	{"product-out", required_argument, NULL, 21},
+	{"fs-config-file", required_argument, NULL, 22},
+	{"block-list-file", required_argument, NULL, 23},
 #endif
 	{0, 0, 0, 0},
 };
@@ -89,6 +91,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-rule=X     specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -97,7 +100,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -288,21 +291,24 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
-#ifdef WITH_ANDROID
 		case 10:
+			cfg.compress_rule_file = optarg;
+			break;
+#ifdef WITH_ANDROID
+		case 20:
 			cfg.mount_point = optarg;
 			/* all trailing '/' should be deleted */
 			opt = strlen(cfg.mount_point);
 			if (opt && optarg[opt - 1] == '/')
 				optarg[opt - 1] = '\0';
 			break;
-		case 11:
+		case 21:
 			cfg.target_out_path = optarg;
 			break;
-		case 12:
+		case 22:
 			cfg.fs_config_file = optarg;
 			break;
-		case 13:
+		case 23:
 			cfg.block_list_file = optarg;
 			break;
 #endif
@@ -587,6 +593,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_rule();
+	if (err) {
+		erofs_err("Failed to load compress rule %s",
+			  cfg.compress_rule_file);
+		goto exit;
+	}
+
 #ifdef HAVE_LIBUUID
 	uuid_unparse_lower(sbi.uuid, uuid_str);
 #endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] erofs-utils: support per-inode compress pcluster
  2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
@ 2021-08-25  1:17   ` Gao Xiang
  2021-08-25  1:27     ` Gao Xiang
  2021-08-25  3:35   ` [PATCH v3] " Huang Jianan via Linux-erofs
  1 sibling, 1 reply; 11+ messages in thread
From: Gao Xiang @ 2021-08-25  1:17 UTC (permalink / raw)
  To: Huang Jianan; +Cc: yh, kevin.liw, guoweichao, linux-erofs, guanyuwei

On Wed, Aug 18, 2021 at 12:27:15PM +0800, Huang Jianan via Linux-erofs wrote:
> Add an option to configure per-inode compression strategy. Each line
> of the file should be in the following form:
> 
> <Regular-expression> <pcluster-in-bytes>
> 
> When pcluster is 0, it means that the file shouldn't be compressed.
> 
> Signed-off-by: Huang Jianan <huangjianan@oppo.com>

Sorry for the delay. Due to busy work, I will look into the details
this weekend. Some comments in advance.

> ---
> changes since v1:
>  - rename c_pclusterblks to c_physical_clusterblks and place it in union
>  - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster()
>    since it's per-inode compression strategy
> 
>  include/erofs/compress_rule.h |  25 ++++++++

How about calling "compress_hints"? Does it sound better?

>  include/erofs/config.h        |   1 +

...

> index 0000000..497d662
> --- /dev/null
> +++ b/lib/compress_rule.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * erofs-utils/lib/compress_rule.c
> + *
> + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
> + * Created by Huang Jianan <huangjianan@oppo.com>
> + */
> +#include <string.h>
> +#include <stdlib.h>
> +#include "erofs/err.h"
> +#include "erofs/list.h"
> +#include "erofs/print.h"
> +#include "erofs/compress_rule.h"
> +
> +static LIST_HEAD(compress_rule_head);
> +
> +static void dump_regerror(int errcode, const char *s, const regex_t *preg)
> +{
> +	char str[512];
> +
> +	regerror(errcode, preg, str, sizeof(str));
> +	erofs_err("invalid regex %s (%s)\n", s, str);
> +}
> +
> +static int erofs_insert_compress_rule(const char *s, unsigned int blks)
> +{
> +	struct erofs_compress_rule *r;
> +	int ret = 0;
> +
> +	r = malloc(sizeof(struct erofs_compress_rule));
> +	if (!r)
> +		return -ENOMEM;
> +
> +	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
> +	if (ret) {
> +		dump_regerror(ret, s, &r->reg);
> +		goto err;
> +	}
> +	r->c_physical_clusterblks = blks;
> +
> +	list_add_tail(&r->list, &compress_rule_head);
> +	erofs_info("insert compress rule %s: %u", s, blks);
> +	return ret;
> +
> +err:
> +	free(r);
> +	return ret;
> +}
> +
> +unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
> +{
> +	const char *s;
> +	struct erofs_compress_rule *r;
> +
> +	if (inode->c_physical_clusterblks)
> +		return inode->c_physical_clusterblks;
> +
> +	s = erofs_fspath(inode->i_srcpath);
> +
> +	list_for_each_entry(r, &compress_rule_head, list) {
> +		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
> +
> +		if (!ret) {
> +			inode->c_physical_clusterblks = r->c_physical_clusterblks;
> +			return r->c_physical_clusterblks;
> +		}
> +		if (ret > REG_NOMATCH)
> +			dump_regerror(ret, s, &r->reg);
> +	}
> +
> +	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
> +	return cfg.c_physical_clusterblks;
> +}
> +
> +int erofs_load_compress_rule()
> +{
> +	char buf[PATH_MAX + 100];
> +	FILE* f;
> +	int ret = 0;
> +
> +	if (!cfg.compress_rule_file)
> +		return 0;
> +
> +	f = fopen(cfg.compress_rule_file, "r");
> +	if (f == NULL)
> +		return -errno;
> +
> +	while (fgets(buf, sizeof(buf), f)) {
> +		char* line = buf;
> +		char* s;
> +		unsigned int blks;
> +
> +		s = strtok(line, " ");
> +		blks = atoi(strtok(NULL, " "));
> +		if (blks % EROFS_BLKSIZ) {

We might need to guarantee these are power of 2.
Also, how about just printing out warning message but using default "-C"
value instead?

> +			erofs_err("invalid physical clustersize %u", blks);
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
> +	}
> +
> +out:
> +	fclose(f);
> +	return ret;
> +}

...

> diff --git a/mkfs/main.c b/mkfs/main.c
> index 10fe14d..467e875 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -23,6 +23,7 @@
>  #include "erofs/xattr.h"
>  #include "erofs/exclude.h"
>  #include "erofs/block_list.h"
> +#include "erofs/compress_rule.h"
>  
>  #ifdef HAVE_LIBUUID
>  #include <uuid.h>
> @@ -44,11 +45,12 @@ static struct option long_options[] = {
>  	{"random-pclusterblks", no_argument, NULL, 8},
>  #endif
>  	{"max-extent-bytes", required_argument, NULL, 9},
> +	{"compress-rule", required_argument, NULL, 10},
>  #ifdef WITH_ANDROID
> -	{"mount-point", required_argument, NULL, 10},
> -	{"product-out", required_argument, NULL, 11},
> -	{"fs-config-file", required_argument, NULL, 12},
> -	{"block-list-file", required_argument, NULL, 13},
> +	{"mount-point", required_argument, NULL, 20},
> +	{"product-out", required_argument, NULL, 21},
> +	{"fs-config-file", required_argument, NULL, 22},
> +	{"block-list-file", required_argument, NULL, 23},

I think we might clean up these first with a separated patch.
Use >= 256 for all of them instead.

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] erofs-utils: support per-inode compress pcluster
  2021-08-25  1:17   ` Gao Xiang
@ 2021-08-25  1:27     ` Gao Xiang
  2021-08-25  2:38       ` Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 11+ messages in thread
From: Gao Xiang @ 2021-08-25  1:27 UTC (permalink / raw)
  To: Huang Jianan; +Cc: yh, kevin.liw, guoweichao, linux-erofs, guanyuwei

On Wed, Aug 25, 2021 at 09:17:38AM +0800, Gao Xiang wrote:
> On Wed, Aug 18, 2021 at 12:27:15PM +0800, Huang Jianan via Linux-erofs wrote:
> > Add an option to configure per-inode compression strategy. Each line
> > of the file should be in the following form:
> > 
> > <Regular-expression> <pcluster-in-bytes>
> > 
> > When pcluster is 0, it means that the file shouldn't be compressed.
> > 
> > Signed-off-by: Huang Jianan <huangjianan@oppo.com>
> 
> Sorry for the delay. Due to busy work, I will look into the details
> this weekend. Some comments in advance.
> 
> > ---
> > changes since v1:
> >  - rename c_pclusterblks to c_physical_clusterblks and place it in union
> >  - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster()
> >    since it's per-inode compression strategy
> > 
> >  include/erofs/compress_rule.h |  25 ++++++++
> 
> How about calling "compress_hints"? Does it sound better?
> 
> >  include/erofs/config.h        |   1 +
> 
> ...
> 
> > index 0000000..497d662
> > --- /dev/null
> > +++ b/lib/compress_rule.c
> > @@ -0,0 +1,106 @@
> > +// SPDX-License-Identifier: GPL-2.0+
> > +/*
> > + * erofs-utils/lib/compress_rule.c
> > + *
> > + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
> > + * Created by Huang Jianan <huangjianan@oppo.com>
> > + */
> > +#include <string.h>
> > +#include <stdlib.h>
> > +#include "erofs/err.h"
> > +#include "erofs/list.h"
> > +#include "erofs/print.h"
> > +#include "erofs/compress_rule.h"
> > +
> > +static LIST_HEAD(compress_rule_head);
> > +
> > +static void dump_regerror(int errcode, const char *s, const regex_t *preg)
> > +{
> > +	char str[512];
> > +
> > +	regerror(errcode, preg, str, sizeof(str));
> > +	erofs_err("invalid regex %s (%s)\n", s, str);
> > +}
> > +
> > +static int erofs_insert_compress_rule(const char *s, unsigned int blks)
> > +{
> > +	struct erofs_compress_rule *r;
> > +	int ret = 0;
> > +
> > +	r = malloc(sizeof(struct erofs_compress_rule));
> > +	if (!r)
> > +		return -ENOMEM;
> > +
> > +	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
> > +	if (ret) {
> > +		dump_regerror(ret, s, &r->reg);
> > +		goto err;
> > +	}
> > +	r->c_physical_clusterblks = blks;
> > +
> > +	list_add_tail(&r->list, &compress_rule_head);
> > +	erofs_info("insert compress rule %s: %u", s, blks);
> > +	return ret;
> > +
> > +err:
> > +	free(r);
> > +	return ret;
> > +}
> > +
> > +unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
> > +{
> > +	const char *s;
> > +	struct erofs_compress_rule *r;
> > +
> > +	if (inode->c_physical_clusterblks)
> > +		return inode->c_physical_clusterblks;
> > +
> > +	s = erofs_fspath(inode->i_srcpath);
> > +
> > +	list_for_each_entry(r, &compress_rule_head, list) {
> > +		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
> > +
> > +		if (!ret) {
> > +			inode->c_physical_clusterblks = r->c_physical_clusterblks;
> > +			return r->c_physical_clusterblks;
> > +		}
> > +		if (ret > REG_NOMATCH)
> > +			dump_regerror(ret, s, &r->reg);
> > +	}
> > +
> > +	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
> > +	return cfg.c_physical_clusterblks;
> > +}
> > +
> > +int erofs_load_compress_rule()
> > +{
> > +	char buf[PATH_MAX + 100];
> > +	FILE* f;
> > +	int ret = 0;
> > +
> > +	if (!cfg.compress_rule_file)
> > +		return 0;
> > +
> > +	f = fopen(cfg.compress_rule_file, "r");
> > +	if (f == NULL)
> > +		return -errno;
> > +
> > +	while (fgets(buf, sizeof(buf), f)) {
> > +		char* line = buf;
> > +		char* s;
> > +		unsigned int blks;
> > +
> > +		s = strtok(line, " ");
> > +		blks = atoi(strtok(NULL, " "));
> > +		if (blks % EROFS_BLKSIZ) {
> 
> We might need to guarantee these are power of 2.

Oh, never mind. It's not necessary to leave pcluster power of 2.
(I need some wake-up coffee...)

> Also, how about just printing out warning message but using default "-C"
> value instead?
> 
> > +			erofs_err("invalid physical clustersize %u", blks);
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
> > +	}
> > +
> > +out:
> > +	fclose(f);
> > +	return ret;
> > +}
> 
> ...
> 
> > diff --git a/mkfs/main.c b/mkfs/main.c
> > index 10fe14d..467e875 100644
> > --- a/mkfs/main.c
> > +++ b/mkfs/main.c
> > @@ -23,6 +23,7 @@
> >  #include "erofs/xattr.h"
> >  #include "erofs/exclude.h"
> >  #include "erofs/block_list.h"
> > +#include "erofs/compress_rule.h"
> >  
> >  #ifdef HAVE_LIBUUID
> >  #include <uuid.h>
> > @@ -44,11 +45,12 @@ static struct option long_options[] = {
> >  	{"random-pclusterblks", no_argument, NULL, 8},
> >  #endif
> >  	{"max-extent-bytes", required_argument, NULL, 9},
> > +	{"compress-rule", required_argument, NULL, 10},
> >  #ifdef WITH_ANDROID
> > -	{"mount-point", required_argument, NULL, 10},
> > -	{"product-out", required_argument, NULL, 11},
> > -	{"fs-config-file", required_argument, NULL, 12},
> > -	{"block-list-file", required_argument, NULL, 13},
> > +	{"mount-point", required_argument, NULL, 20},
> > +	{"product-out", required_argument, NULL, 21},
> > +	{"fs-config-file", required_argument, NULL, 22},
> > +	{"block-list-file", required_argument, NULL, 23},
> 
> I think we might clean up these first with a separated patch.
> Use >= 256 for all of them instead.
> 
> Thanks,
> Gao Xiang

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] erofs-utils: support per-inode compress pcluster
  2021-08-25  1:27     ` Gao Xiang
@ 2021-08-25  2:38       ` Huang Jianan via Linux-erofs
  0 siblings, 0 replies; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-08-25  2:38 UTC (permalink / raw)
  To: Gao Xiang; +Cc: yh, kevin.liw, guoweichao, linux-erofs, guanyuwei



在 2021/8/25 9:27, Gao Xiang 写道:
> On Wed, Aug 25, 2021 at 09:17:38AM +0800, Gao Xiang wrote:
>> On Wed, Aug 18, 2021 at 12:27:15PM +0800, Huang Jianan via Linux-erofs wrote:
>>> Add an option to configure per-inode compression strategy. Each line
>>> of the file should be in the following form:
>>>
>>> <Regular-expression> <pcluster-in-bytes>
>>>
>>> When pcluster is 0, it means that the file shouldn't be compressed.
>>>
>>> Signed-off-by: Huang Jianan <huangjianan@oppo.com>
>> Sorry for the delay. Due to busy work, I will look into the details
>> this weekend. Some comments in advance.
>>
>>> ---
>>> changes since v1:
>>>   - rename c_pclusterblks to c_physical_clusterblks and place it in union
>>>   - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster()
>>>     since it's per-inode compression strategy
>>>
>>>   include/erofs/compress_rule.h |  25 ++++++++
>> How about calling "compress_hints"? Does it sound better?
Sounds good, naming things is quite hard. 🙁
>>
>>>   include/erofs/config.h        |   1 +
>> ...
>>
>>> index 0000000..497d662
>>> --- /dev/null
>>> +++ b/lib/compress_rule.c
>>> @@ -0,0 +1,106 @@
>>> +// SPDX-License-Identifier: GPL-2.0+
>>> +/*
>>> + * erofs-utils/lib/compress_rule.c
>>> + *
>>> + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
>>> + * Created by Huang Jianan <huangjianan@oppo.com>
>>> + */
>>> +#include <string.h>
>>> +#include <stdlib.h>
>>> +#include "erofs/err.h"
>>> +#include "erofs/list.h"
>>> +#include "erofs/print.h"
>>> +#include "erofs/compress_rule.h"
>>> +
>>> +static LIST_HEAD(compress_rule_head);
>>> +
>>> +static void dump_regerror(int errcode, const char *s, const regex_t *preg)
>>> +{
>>> +	char str[512];
>>> +
>>> +	regerror(errcode, preg, str, sizeof(str));
>>> +	erofs_err("invalid regex %s (%s)\n", s, str);
>>> +}
>>> +
>>> +static int erofs_insert_compress_rule(const char *s, unsigned int blks)
>>> +{
>>> +	struct erofs_compress_rule *r;
>>> +	int ret = 0;
>>> +
>>> +	r = malloc(sizeof(struct erofs_compress_rule));
>>> +	if (!r)
>>> +		return -ENOMEM;
>>> +
>>> +	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
>>> +	if (ret) {
>>> +		dump_regerror(ret, s, &r->reg);
>>> +		goto err;
>>> +	}
>>> +	r->c_physical_clusterblks = blks;
>>> +
>>> +	list_add_tail(&r->list, &compress_rule_head);
>>> +	erofs_info("insert compress rule %s: %u", s, blks);
>>> +	return ret;
>>> +
>>> +err:
>>> +	free(r);
>>> +	return ret;
>>> +}
>>> +
>>> +unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
>>> +{
>>> +	const char *s;
>>> +	struct erofs_compress_rule *r;
>>> +
>>> +	if (inode->c_physical_clusterblks)
>>> +		return inode->c_physical_clusterblks;
>>> +
>>> +	s = erofs_fspath(inode->i_srcpath);
>>> +
>>> +	list_for_each_entry(r, &compress_rule_head, list) {
>>> +		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
>>> +
>>> +		if (!ret) {
>>> +			inode->c_physical_clusterblks = r->c_physical_clusterblks;
>>> +			return r->c_physical_clusterblks;
>>> +		}
>>> +		if (ret > REG_NOMATCH)
>>> +			dump_regerror(ret, s, &r->reg);
>>> +	}
>>> +
>>> +	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
>>> +	return cfg.c_physical_clusterblks;
>>> +}
>>> +
>>> +int erofs_load_compress_rule()
>>> +{
>>> +	char buf[PATH_MAX + 100];
>>> +	FILE* f;
>>> +	int ret = 0;
>>> +
>>> +	if (!cfg.compress_rule_file)
>>> +		return 0;
>>> +
>>> +	f = fopen(cfg.compress_rule_file, "r");
>>> +	if (f == NULL)
>>> +		return -errno;
>>> +
>>> +	while (fgets(buf, sizeof(buf), f)) {
>>> +		char* line = buf;
>>> +		char* s;
>>> +		unsigned int blks;
>>> +
>>> +		s = strtok(line, " ");
>>> +		blks = atoi(strtok(NULL, " "));
>>> +		if (blks % EROFS_BLKSIZ) {
>> We might need to guarantee these are power of 2.
> Oh, never mind. It's not necessary to leave pcluster power of 2.
> (I need some wake-up coffee...)
>
>> Also, how about just printing out warning message but using default "-C"
>> value instead?
ok
>>> +			erofs_err("invalid physical clustersize %u", blks);
>>> +			ret = -EINVAL;
>>> +			goto out;
>>> +		}
>>> +		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
>>> +	}
>>> +
>>> +out:
>>> +	fclose(f);
>>> +	return ret;
>>> +}
>> ...
>>
>>> diff --git a/mkfs/main.c b/mkfs/main.c
>>> index 10fe14d..467e875 100644
>>> --- a/mkfs/main.c
>>> +++ b/mkfs/main.c
>>> @@ -23,6 +23,7 @@
>>>   #include "erofs/xattr.h"
>>>   #include "erofs/exclude.h"
>>>   #include "erofs/block_list.h"
>>> +#include "erofs/compress_rule.h"
>>>   
>>>   #ifdef HAVE_LIBUUID
>>>   #include <uuid.h>
>>> @@ -44,11 +45,12 @@ static struct option long_options[] = {
>>>   	{"random-pclusterblks", no_argument, NULL, 8},
>>>   #endif
>>>   	{"max-extent-bytes", required_argument, NULL, 9},
>>> +	{"compress-rule", required_argument, NULL, 10},
>>>   #ifdef WITH_ANDROID
>>> -	{"mount-point", required_argument, NULL, 10},
>>> -	{"product-out", required_argument, NULL, 11},
>>> -	{"fs-config-file", required_argument, NULL, 12},
>>> -	{"block-list-file", required_argument, NULL, 13},
>>> +	{"mount-point", required_argument, NULL, 20},
>>> +	{"product-out", required_argument, NULL, 21},
>>> +	{"fs-config-file", required_argument, NULL, 22},
>>> +	{"block-list-file", required_argument, NULL, 23},
>> I think we might clean up these first with a separated patch.
>> Use >= 256 for all of them instead.
ok, I will send a separated patch first.

Thanks,
Jianan
>> Thanks,
>> Gao Xiang


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] erofs-utils: support per-inode compress pcluster
  2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
  2021-08-25  1:17   ` Gao Xiang
@ 2021-08-25  3:35   ` Huang Jianan via Linux-erofs
  2021-09-05 17:59     ` Gao Xiang
  1 sibling, 1 reply; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-08-25  3:35 UTC (permalink / raw)
  To: linux-erofs; +Cc: yh, kevin.liw, guoweichao, guanyuwei

Add an option to configure per-inode compression strategy. Each line
of the file should be in the following form:

<Regular-expression> <pcluster-in-bytes>

When pcluster is 0, it means that the file shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
changes since v2:
 - change compress_rule to compress_hints for better understanding. (Gao Xiang)
 - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
 - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)

changes since v1:
 - rename c_pclusterblks to c_physical_clusterblks and place it in union.
 - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
   it's per-inode compression strategy.

 include/erofs/compress_hints.h |  25 ++++++++
 include/erofs/config.h         |   1 +
 include/erofs/internal.h       |   1 +
 lib/Makefile.am                |   5 +-
 lib/compress.c                 |  10 ++--
 lib/compress_hints.c           | 105 +++++++++++++++++++++++++++++++++
 lib/inode.c                    |   6 ++
 man/mkfs.erofs.1               |   2 +
 mkfs/main.c                    |  15 ++++-
 9 files changed, 163 insertions(+), 7 deletions(-)
 create mode 100644 include/erofs/compress_hints.h
 create mode 100644 lib/compress_hints.c

diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
new file mode 100644
index 0000000..2937b39
--- /dev/null
+++ b/include/erofs/compress_hints.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs-utils/include/erofs/compress_hints.h
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_HINTS_H
+#define __EROFS_COMPRESS_HINTS_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_hints {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int c_physical_clusterblks;
+};
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode);
+int erofs_load_compress_hints();
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 8124f3b..399da41 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -62,6 +62,7 @@ struct erofs_configure {
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	char *compress_hints_file;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 5583861..4da30b3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,6 +164,7 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
+			uint8_t  c_physical_clusterblks;
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b12e2c1..e1b677b 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -17,11 +17,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_hints.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index a8ebbc1..2d93a10 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -19,6 +19,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -91,8 +92,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 	}
 
 	do {
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
@@ -151,13 +151,15 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
-/* TODO: apply per-(sub)file strategies here */
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_physical_clusterblks;
 #endif
+	if (cfg.compress_hints_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return cfg.c_physical_clusterblks;
 }
 
@@ -496,7 +498,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	}
 
-	if (cfg.c_physical_clusterblks > 1) {
+	if (erofs_sb_has_big_pcluster()) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
new file mode 100644
index 0000000..bc29ebd
--- /dev/null
+++ b/lib/compress_hints.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/compress_hints.c
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_hints.h"
+
+static LIST_HEAD(compress_hints_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+{
+	struct erofs_compress_hints *r;
+	int ret = 0;
+
+	r = malloc(sizeof(struct erofs_compress_hints));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err;
+	}
+	r->c_physical_clusterblks = blks;
+
+	list_add_tail(&r->list, &compress_hints_head);
+	erofs_info("insert compress rule %s: %u", s, blks);
+	return ret;
+
+err:
+	free(r);
+	return ret;
+}
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_hints *r;
+
+	if (inode->c_physical_clusterblks)
+		return inode->c_physical_clusterblks;
+
+	s = erofs_fspath(inode->i_srcpath);
+
+	list_for_each_entry(r, &compress_hints_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			inode->c_physical_clusterblks = r->c_physical_clusterblks;
+			return r->c_physical_clusterblks;
+		}
+		if (ret > REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+
+	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
+	return cfg.c_physical_clusterblks;
+}
+
+int erofs_load_compress_hints()
+{
+	char buf[PATH_MAX + 100];
+	FILE* f;
+
+	if (!cfg.compress_hints_file)
+		return 0;
+
+	f = fopen(cfg.compress_hints_file, "r");
+	if (f == NULL)
+		return -errno;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		char* line = buf;
+		char* s;
+		unsigned int pclustersize;
+
+		s = strtok(line, " ");
+		pclustersize = atoi(strtok(NULL, " "));
+		if (pclustersize % EROFS_BLKSIZ) {
+			erofs_warn("invalid physical clustersize %u, "
+				   "use default c_physical_clusterblks %u",
+				   pclustersize, cfg.c_physical_clusterblks);
+			continue;
+		}
+		erofs_insert_compress_hints(s, pclustersize / EROFS_BLKSIZ);
+	}
+
+	fclose(f);
+	return 0;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 6871d2b..f2ac30a 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -329,6 +330,10 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	if (cfg.compress_hints_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return true;
 }
 
@@ -856,6 +861,7 @@ static struct erofs_inode *erofs_new_inode(void)
 
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
+	inode->c_physical_clusterblks = 0;
 	return inode;
 }
 
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d164fa5..7f36146 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,8 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.BI "\-\-compress-hints" file
+Specify a file to configure per-file compression strategy.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 9369b72..dbe49ec 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,6 +45,7 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-hints", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
 	{"mount-point", required_argument, NULL, 256},
 	{"product-out", required_argument, NULL, 257},
@@ -89,6 +91,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-hints=X    specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -97,7 +100,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -288,6 +291,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 10:
+			cfg.compress_hints_file = optarg;
+			break;
 #ifdef WITH_ANDROID
 		case 256:
 			cfg.mount_point = optarg;
@@ -587,6 +593,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_hints();
+	if (err) {
+		erofs_err("Failed to load compress hints %s",
+			  cfg.compress_hints_file);
+		goto exit;
+	}
+
 #ifdef HAVE_LIBUUID
 	uuid_unparse_lower(sbi.uuid, uuid_str);
 #endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] erofs-utils: support per-inode compress pcluster
  2021-08-25  3:35   ` [PATCH v3] " Huang Jianan via Linux-erofs
@ 2021-09-05 17:59     ` Gao Xiang
  2021-09-06  9:38       ` Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 11+ messages in thread
From: Gao Xiang @ 2021-09-05 17:59 UTC (permalink / raw)
  To: Huang Jianan; +Cc: yh, kevin.liw, guoweichao, linux-erofs, guanyuwei

On Wed, Aug 25, 2021 at 11:35:23AM +0800, Huang Jianan via Linux-erofs wrote:
> Add an option to configure per-inode compression strategy. Each line
> of the file should be in the following form:
> 
> <Regular-expression> <pcluster-in-bytes>
> 
> When pcluster is 0, it means that the file shouldn't be compressed.
> 
> Signed-off-by: Huang Jianan <huangjianan@oppo.com>
> ---
> changes since v2:
>  - change compress_rule to compress_hints for better understanding. (Gao Xiang)
>  - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
>  - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)
> 
> changes since v1:
>  - rename c_pclusterblks to c_physical_clusterblks and place it in union.
>  - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
>    it's per-inode compression strategy.
> 

Hi Jianan,

I sorted out a version this weekend (e.g. bump up max pclustersize if
needed and update the man page), would you mind confirm on your side
as well?

Also, it'd be better to add some functionality testcases to cover this
if you have extra time:

Thanks,
Gao Xiang

From 0e675d679c8732bd39699e5a9b1b6d9d742fb728 Mon Sep 17 00:00:00 2001
From: Huang Jianan <huangjianan@oppo.com>
Date: Wed, 25 Aug 2021 11:35:23 +0800
Subject: [PATCH v4] erofs-utils: support per-inode compress pcluster

Add an option to configure per-inode compression strategy.

Each line of the file should be in the following form:
<pcluster-in-bytes> <match-pattern>

Note that <match-pattern> can be as a regular expression.
If pcluster size is 0, it means that files shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 include/erofs/compress_hints.h |  23 ++++++
 include/erofs/config.h         |   3 +-
 include/erofs/internal.h       |   1 +
 lib/Makefile.am                |   5 +-
 lib/compress.c                 |  24 ++++---
 lib/compress_hints.c           | 128 +++++++++++++++++++++++++++++++++
 lib/config.c                   |   3 +-
 lib/inode.c                    |   4 ++
 man/mkfs.erofs.1               |  11 +++
 mkfs/main.c                    |  19 ++++-
 10 files changed, 205 insertions(+), 16 deletions(-)
 create mode 100644 include/erofs/compress_hints.h
 create mode 100644 lib/compress_hints.c

diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
new file mode 100644
index 000000000000..a5772c72b1c4
--- /dev/null
+++ b/include/erofs/compress_hints.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_HINTS_H
+#define __EROFS_COMPRESS_HINTS_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_hints {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int physical_clusterblks;
+};
+
+bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
+void erofs_cleanup_compress_hints(void);
+int erofs_load_compress_hints(void);
+#endif
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 95fc23e79e26..d5d9b5a751c0 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -50,13 +50,14 @@ struct erofs_configure {
 	/* related arguments for mkfs.erofs */
 	char *c_img_path;
 	char *c_src_path;
+	char *c_compress_hints_file;
 	char *c_compr_alg_master;
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
 
-	u32 c_physical_clusterblks;
+	u32 c_pclusterblks_max, c_pclusterblks_def;
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index b939155ac951..f5eacea5d4d7 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,6 +164,7 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
+			uint8_t  z_physical_clusterblks;
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b5127c439e43..5a33e297c194 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -16,11 +16,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_hints.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index 6df30ea564a3..2806a7edfcb6 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -17,6 +17,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -89,8 +90,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 	}
 
 	do {
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
@@ -149,14 +149,18 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
-/* TODO: apply per-(sub)file strategies here */
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
-		return 1 + rand() % cfg.c_physical_clusterblks;
+		return 1 + rand() % cfg.c_pclusterblks_max;
 #endif
-	return cfg.c_physical_clusterblks;
+	if (cfg.c_compress_hints_file) {
+		z_erofs_apply_compress_hints(inode);
+		DBG_BUGON(!inode->z_physical_clusterblks);
+		return inode->z_physical_clusterblks;
+	}
+	return cfg.c_pclusterblks_def;
 }
 
 static int vle_compress_one(struct erofs_inode *inode,
@@ -493,7 +497,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	}
 
-	if (cfg.c_physical_clusterblks > 1) {
+	if (erofs_sb_has_big_pcluster()) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
@@ -603,7 +607,7 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
 			.lz4 = {
 				.max_distance =
 					cpu_to_le16(sbi.lz4_max_distance),
-				.max_pclusterblks = cfg.c_physical_clusterblks,
+				.max_pclusterblks = cfg.c_pclusterblks_max,
 			}
 		};
 
@@ -655,11 +659,11 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
 	 * to be loaded in order to get those compressed block counts.
 	 */
-	if (cfg.c_physical_clusterblks > 1) {
-		if (cfg.c_physical_clusterblks >
+	if (cfg.c_pclusterblks_max > 1) {
+		if (cfg.c_pclusterblks_max >
 		    Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
 			erofs_err("unsupported clusterblks %u (too large)",
-				  cfg.c_physical_clusterblks);
+				  cfg.c_pclusterblks_max);
 			return -EINVAL;
 		}
 		erofs_sb_set_big_pcluster();
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
new file mode 100644
index 000000000000..81a8ac9ef04f
--- /dev/null
+++ b/lib/compress_hints.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_hints.h"
+
+static LIST_HEAD(compress_hints_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+{
+	struct erofs_compress_hints *r;
+	int ret;
+
+	r = malloc(sizeof(struct erofs_compress_hints));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err_out;
+	}
+	r->physical_clusterblks = blks;
+
+	list_add_tail(&r->list, &compress_hints_head);
+	erofs_info("compress hint %s (%u) is inserted", s, blks);
+	return ret;
+
+err_out:
+	free(r);
+	return ret;
+}
+
+bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_hints *r;
+	unsigned int pclusterblks;
+
+	if (inode->z_physical_clusterblks)
+		return true;
+
+	s = erofs_fspath(inode->i_srcpath);
+	pclusterblks = cfg.c_pclusterblks_def;
+
+	list_for_each_entry(r, &compress_hints_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			pclusterblks = r->physical_clusterblks;
+			break;
+		}
+		if (ret != REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+	inode->z_physical_clusterblks = pclusterblks;
+
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	return !!pclusterblks;
+}
+
+void erofs_cleanup_compress_hints(void)
+{
+	struct erofs_compress_hints *r, *n;
+
+	list_for_each_entry_safe(r, n, &compress_hints_head, list) {
+		list_del(&r->list);
+		free(r);
+	}
+}
+
+int erofs_load_compress_hints(void)
+{
+	char buf[PATH_MAX + 100];
+	FILE *f;
+	unsigned int line, max_pclustersize = 0;
+
+	if (!cfg.c_compress_hints_file)
+		return 0;
+
+	f = fopen(cfg.c_compress_hints_file, "r");
+	if (!f)
+		return -errno;
+
+	for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
+		unsigned int pclustersize;
+		char *pattern;
+
+		pclustersize = atoi(strtok(buf, "\t "));
+		pattern = strtok(NULL, "\n");
+		if (!pattern || *pattern == '\0') {
+			erofs_err("cannot find a match pattern at line %u",
+				  line);
+			return -EINVAL;
+		}
+		if (pclustersize % EROFS_BLKSIZ) {
+			erofs_warn("invalid physical clustersize %u, "
+				   "use default pclusterblks %u",
+				   pclustersize, cfg.c_pclusterblks_def);
+			continue;
+		}
+		erofs_insert_compress_hints(pattern,
+					    pclustersize / EROFS_BLKSIZ);
+
+		if (pclustersize > max_pclustersize)
+			max_pclustersize = pclustersize;
+	}
+	fclose(f);
+	if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) {
+		cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ;
+		erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max);
+	}
+	return 0;
+}
diff --git a/lib/config.c b/lib/config.c
index 4757dbbfdd4c..cc2aa7d0112f 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -24,7 +24,8 @@ void erofs_init_configure(void)
 	cfg.c_unix_timestamp = -1;
 	cfg.c_uid = -1;
 	cfg.c_gid = -1;
-	cfg.c_physical_clusterblks = 1;
+	cfg.c_pclusterblks_max = 1;
+	cfg.c_pclusterblks_def = 1;
 	cfg.c_max_decompressed_extent_bytes = -1;
 }
 
diff --git a/lib/inode.c b/lib/inode.c
index 6024e8c593dd..5bad75e1c550 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -327,6 +328,8 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	if (cfg.c_compress_hints_file)
+		return z_erofs_apply_compress_hints(inode);
 	return true;
 }
 
@@ -849,6 +852,7 @@ static struct erofs_inode *erofs_new_inode(void)
 
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
+	inode->z_physical_clusterblks = 0;
 	return inode;
 }
 
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index bc0a10be72a1..1446cb56db30 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,17 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.TP
+.BI "\-\-compress-hints " file
+If the optional
+.BI "\-\-compress-hints " file
+argument is given,
+.B mkfs.erofs
+uses it to apply the per-file compression strategy. Each line is defined by
+tokens separated by spaces in the following form:
+.RS 1.2i
+<pcluster-in-bytes> <match-pattern>
+.RE
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 40ca94ff8db9..addefcefea38 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -21,6 +21,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -42,6 +43,7 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-hints", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
 	{"mount-point", required_argument, NULL, 512},
 	{"product-out", required_argument, NULL, 513},
@@ -87,6 +89,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-hints=X    specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -95,7 +98,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -286,6 +289,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 10:
+			cfg.c_compress_hints_file = optarg;
+			break;
 #ifdef WITH_ANDROID
 		case 512:
 			cfg.mount_point = optarg;
@@ -312,7 +318,8 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 					  optarg);
 				return -EINVAL;
 			}
-			cfg.c_physical_clusterblks = i / EROFS_BLKSIZ;
+			cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
+			cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
 			break;
 
 		case 1:
@@ -578,6 +585,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_hints();
+	if (err) {
+		erofs_err("Failed to load compress hints %s",
+			  cfg.c_compress_hints_file);
+		goto exit;
+	}
+
 	err = z_erofs_compress_init(sb_bh);
 	if (err) {
 		erofs_err("Failed to initialize compressor: %s",
@@ -626,6 +640,7 @@ exit:
 	erofs_droid_blocklist_fclose();
 #endif
 	dev_close();
+	erofs_cleanup_compress_hints();
 	erofs_cleanup_exclude_rules();
 	erofs_exit_configure();
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] erofs-utils: support per-inode compress pcluster
  2021-09-05 17:59     ` Gao Xiang
@ 2021-09-06  9:38       ` Huang Jianan via Linux-erofs
  2021-09-07  0:12         ` Gao Xiang
  0 siblings, 1 reply; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-09-06  9:38 UTC (permalink / raw)
  To: linux-erofs, yh, kevin.liw, guoweichao, guanyuwei

在 2021/9/6 1:59, Gao Xiang 写道:
> On Wed, Aug 25, 2021 at 11:35:23AM +0800, Huang Jianan via Linux-erofs wrote:
>> Add an option to configure per-inode compression strategy. Each line
>> of the file should be in the following form:
>>
>> <Regular-expression> <pcluster-in-bytes>
>>
>> When pcluster is 0, it means that the file shouldn't be compressed.
>>
>> Signed-off-by: Huang Jianan <huangjianan@oppo.com>
>> ---
>> changes since v2:
>>   - change compress_rule to compress_hints for better understanding. (Gao Xiang)
>>   - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
>>   - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)
>>
>> changes since v1:
>>   - rename c_pclusterblks to c_physical_clusterblks and place it in union.
>>   - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
>>     it's per-inode compression strategy.
>>
> Hi Jianan,
>
> I sorted out a version this weekend (e.g. bump up max pclustersize if
> needed and update the man page), would you mind confirm on your side
> as well?
Hi Xiang,

Thanks for your modification, looks good to me.
> Also, it'd be better to add some functionality testcases to cover this
> if you have extra time:
Ok, should I use the experimental-tests branch now?

Thanks,
Jianan
> Thanks,
> Gao Xiang
>
>  From 0e675d679c8732bd39699e5a9b1b6d9d742fb728 Mon Sep 17 00:00:00 2001
> From: Huang Jianan <huangjianan@oppo.com>
> Date: Wed, 25 Aug 2021 11:35:23 +0800
> Subject: [PATCH v4] erofs-utils: support per-inode compress pcluster
>
> Add an option to configure per-inode compression strategy.
>
> Each line of the file should be in the following form:
> <pcluster-in-bytes> <match-pattern>
>
> Note that <match-pattern> can be as a regular expression.
> If pcluster size is 0, it means that files shouldn't be compressed.
>
> Signed-off-by: Huang Jianan <huangjianan@oppo.com>
> Signed-off-by: Gao Xiang <xiang@kernel.org>
> ---
>   include/erofs/compress_hints.h |  23 ++++++
>   include/erofs/config.h         |   3 +-
>   include/erofs/internal.h       |   1 +
>   lib/Makefile.am                |   5 +-
>   lib/compress.c                 |  24 ++++---
>   lib/compress_hints.c           | 128 +++++++++++++++++++++++++++++++++
>   lib/config.c                   |   3 +-
>   lib/inode.c                    |   4 ++
>   man/mkfs.erofs.1               |  11 +++
>   mkfs/main.c                    |  19 ++++-
>   10 files changed, 205 insertions(+), 16 deletions(-)
>   create mode 100644 include/erofs/compress_hints.h
>   create mode 100644 lib/compress_hints.c
>
> diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
> new file mode 100644
> index 000000000000..a5772c72b1c4
> --- /dev/null
> +++ b/include/erofs/compress_hints.h
> @@ -0,0 +1,23 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
> + * Created by Huang Jianan <huangjianan@oppo.com>
> + */
> +#ifndef __EROFS_COMPRESS_HINTS_H
> +#define __EROFS_COMPRESS_HINTS_H
> +
> +#include "erofs/internal.h"
> +#include <sys/types.h>
> +#include <regex.h>
> +
> +struct erofs_compress_hints {
> +	struct list_head list;
> +
> +	regex_t reg;
> +	unsigned int physical_clusterblks;
> +};
> +
> +bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
> +void erofs_cleanup_compress_hints(void);
> +int erofs_load_compress_hints(void);
> +#endif
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 95fc23e79e26..d5d9b5a751c0 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -50,13 +50,14 @@ struct erofs_configure {
>   	/* related arguments for mkfs.erofs */
>   	char *c_img_path;
>   	char *c_src_path;
> +	char *c_compress_hints_file;
>   	char *c_compr_alg_master;
>   	int c_compr_level_master;
>   	int c_force_inodeversion;
>   	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
>   	int c_inline_xattr_tolerance;
>   
> -	u32 c_physical_clusterblks;
> +	u32 c_pclusterblks_max, c_pclusterblks_def;
>   	u32 c_max_decompressed_extent_bytes;
>   	u64 c_unix_timestamp;
>   	u32 c_uid, c_gid;
> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> index b939155ac951..f5eacea5d4d7 100644
> --- a/include/erofs/internal.h
> +++ b/include/erofs/internal.h
> @@ -164,6 +164,7 @@ struct erofs_inode {
>   			uint16_t z_advise;
>   			uint8_t  z_algorithmtype[2];
>   			uint8_t  z_logical_clusterbits;
> +			uint8_t  z_physical_clusterblks;
>   		};
>   	};
>   #ifdef WITH_ANDROID
> diff --git a/lib/Makefile.am b/lib/Makefile.am
> index b5127c439e43..5a33e297c194 100644
> --- a/lib/Makefile.am
> +++ b/lib/Makefile.am
> @@ -16,11 +16,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
>         $(top_srcdir)/include/erofs/list.h \
>         $(top_srcdir)/include/erofs/print.h \
>         $(top_srcdir)/include/erofs/trace.h \
> -      $(top_srcdir)/include/erofs/xattr.h
> +      $(top_srcdir)/include/erofs/xattr.h \
> +      $(top_srcdir)/include/erofs/compress_hints.h
>   
>   noinst_HEADERS += compressor.h
>   liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
> -		      namei.c data.c compress.c compressor.c zmap.c decompress.c
> +		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
>   liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
>   if ENABLE_LZ4
>   liberofs_la_CFLAGS += ${LZ4_CFLAGS}
> diff --git a/lib/compress.c b/lib/compress.c
> index 6df30ea564a3..2806a7edfcb6 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -17,6 +17,7 @@
>   #include "erofs/compress.h"
>   #include "compressor.h"
>   #include "erofs/block_list.h"
> +#include "erofs/compress_hints.h"
>   
>   static struct erofs_compress compresshandle;
>   static int compressionlevel;
> @@ -89,8 +90,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
>   	}
>   
>   	do {
> -		/* XXX: big pcluster feature should be per-inode */
> -		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
> +		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
>   			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
>   			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
>   					Z_EROFS_VLE_DI_D0_CBLKCNT);
> @@ -149,14 +149,18 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
>   	return count;
>   }
>   
> -/* TODO: apply per-(sub)file strategies here */
>   static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
>   {
>   #ifndef NDEBUG
>   	if (cfg.c_random_pclusterblks)
> -		return 1 + rand() % cfg.c_physical_clusterblks;
> +		return 1 + rand() % cfg.c_pclusterblks_max;
>   #endif
> -	return cfg.c_physical_clusterblks;
> +	if (cfg.c_compress_hints_file) {
> +		z_erofs_apply_compress_hints(inode);
> +		DBG_BUGON(!inode->z_physical_clusterblks);
> +		return inode->z_physical_clusterblks;
> +	}
> +	return cfg.c_pclusterblks_def;
>   }
>   
>   static int vle_compress_one(struct erofs_inode *inode,
> @@ -493,7 +497,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>   		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
>   	}
>   
> -	if (cfg.c_physical_clusterblks > 1) {
> +	if (erofs_sb_has_big_pcluster()) {
>   		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
>   		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
>   			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
> @@ -603,7 +607,7 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
>   			.lz4 = {
>   				.max_distance =
>   					cpu_to_le16(sbi.lz4_max_distance),
> -				.max_pclusterblks = cfg.c_physical_clusterblks,
> +				.max_pclusterblks = cfg.c_pclusterblks_max,
>   			}
>   		};
>   
> @@ -655,11 +659,11 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
>   	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
>   	 * to be loaded in order to get those compressed block counts.
>   	 */
> -	if (cfg.c_physical_clusterblks > 1) {
> -		if (cfg.c_physical_clusterblks >
> +	if (cfg.c_pclusterblks_max > 1) {
> +		if (cfg.c_pclusterblks_max >
>   		    Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
>   			erofs_err("unsupported clusterblks %u (too large)",
> -				  cfg.c_physical_clusterblks);
> +				  cfg.c_pclusterblks_max);
>   			return -EINVAL;
>   		}
>   		erofs_sb_set_big_pcluster();
> diff --git a/lib/compress_hints.c b/lib/compress_hints.c
> new file mode 100644
> index 000000000000..81a8ac9ef04f
> --- /dev/null
> +++ b/lib/compress_hints.c
> @@ -0,0 +1,128 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
> + * Created by Huang Jianan <huangjianan@oppo.com>
> + */
> +#include <string.h>
> +#include <stdlib.h>
> +#include "erofs/err.h"
> +#include "erofs/list.h"
> +#include "erofs/print.h"
> +#include "erofs/compress_hints.h"
> +
> +static LIST_HEAD(compress_hints_head);
> +
> +static void dump_regerror(int errcode, const char *s, const regex_t *preg)
> +{
> +	char str[512];
> +
> +	regerror(errcode, preg, str, sizeof(str));
> +	erofs_err("invalid regex %s (%s)\n", s, str);
> +}
> +
> +static int erofs_insert_compress_hints(const char *s, unsigned int blks)
> +{
> +	struct erofs_compress_hints *r;
> +	int ret;
> +
> +	r = malloc(sizeof(struct erofs_compress_hints));
> +	if (!r)
> +		return -ENOMEM;
> +
> +	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
> +	if (ret) {
> +		dump_regerror(ret, s, &r->reg);
> +		goto err_out;
> +	}
> +	r->physical_clusterblks = blks;
> +
> +	list_add_tail(&r->list, &compress_hints_head);
> +	erofs_info("compress hint %s (%u) is inserted", s, blks);
> +	return ret;
> +
> +err_out:
> +	free(r);
> +	return ret;
> +}
> +
> +bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
> +{
> +	const char *s;
> +	struct erofs_compress_hints *r;
> +	unsigned int pclusterblks;
> +
> +	if (inode->z_physical_clusterblks)
> +		return true;
> +
> +	s = erofs_fspath(inode->i_srcpath);
> +	pclusterblks = cfg.c_pclusterblks_def;
> +
> +	list_for_each_entry(r, &compress_hints_head, list) {
> +		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
> +
> +		if (!ret) {
> +			pclusterblks = r->physical_clusterblks;
> +			break;
> +		}
> +		if (ret != REG_NOMATCH)
> +			dump_regerror(ret, s, &r->reg);
> +	}
> +	inode->z_physical_clusterblks = pclusterblks;
> +
> +	/* pclusterblks is 0 means this file shouldn't be compressed */
> +	return !!pclusterblks;
> +}
> +
> +void erofs_cleanup_compress_hints(void)
> +{
> +	struct erofs_compress_hints *r, *n;
> +
> +	list_for_each_entry_safe(r, n, &compress_hints_head, list) {
> +		list_del(&r->list);
> +		free(r);
> +	}
> +}
> +
> +int erofs_load_compress_hints(void)
> +{
> +	char buf[PATH_MAX + 100];
> +	FILE *f;
> +	unsigned int line, max_pclustersize = 0;
> +
> +	if (!cfg.c_compress_hints_file)
> +		return 0;
> +
> +	f = fopen(cfg.c_compress_hints_file, "r");
> +	if (!f)
> +		return -errno;
> +
> +	for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
> +		unsigned int pclustersize;
> +		char *pattern;
> +
> +		pclustersize = atoi(strtok(buf, "\t "));
> +		pattern = strtok(NULL, "\n");
> +		if (!pattern || *pattern == '\0') {
> +			erofs_err("cannot find a match pattern at line %u",
> +				  line);
> +			return -EINVAL;
> +		}
> +		if (pclustersize % EROFS_BLKSIZ) {
> +			erofs_warn("invalid physical clustersize %u, "
> +				   "use default pclusterblks %u",
> +				   pclustersize, cfg.c_pclusterblks_def);
> +			continue;
> +		}
> +		erofs_insert_compress_hints(pattern,
> +					    pclustersize / EROFS_BLKSIZ);
> +
> +		if (pclustersize > max_pclustersize)
> +			max_pclustersize = pclustersize;
> +	}
> +	fclose(f);
> +	if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) {
> +		cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ;
> +		erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max);
> +	}
> +	return 0;
> +}
> diff --git a/lib/config.c b/lib/config.c
> index 4757dbbfdd4c..cc2aa7d0112f 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -24,7 +24,8 @@ void erofs_init_configure(void)
>   	cfg.c_unix_timestamp = -1;
>   	cfg.c_uid = -1;
>   	cfg.c_gid = -1;
> -	cfg.c_physical_clusterblks = 1;
> +	cfg.c_pclusterblks_max = 1;
> +	cfg.c_pclusterblks_def = 1;
>   	cfg.c_max_decompressed_extent_bytes = -1;
>   }
>   
> diff --git a/lib/inode.c b/lib/inode.c
> index 6024e8c593dd..5bad75e1c550 100644
> --- a/lib/inode.c
> +++ b/lib/inode.c
> @@ -23,6 +23,7 @@
>   #include "erofs/xattr.h"
>   #include "erofs/exclude.h"
>   #include "erofs/block_list.h"
> +#include "erofs/compress_hints.h"
>   
>   #define S_SHIFT                 12
>   static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
> @@ -327,6 +328,8 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
>   /* rules to decide whether a file could be compressed or not */
>   static bool erofs_file_is_compressible(struct erofs_inode *inode)
>   {
> +	if (cfg.c_compress_hints_file)
> +		return z_erofs_apply_compress_hints(inode);
>   	return true;
>   }
>   
> @@ -849,6 +852,7 @@ static struct erofs_inode *erofs_new_inode(void)
>   
>   	inode->bh = inode->bh_inline = inode->bh_data = NULL;
>   	inode->idata = NULL;
> +	inode->z_physical_clusterblks = 0;
>   	return inode;
>   }
>   
> diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
> index bc0a10be72a1..1446cb56db30 100644
> --- a/man/mkfs.erofs.1
> +++ b/man/mkfs.erofs.1
> @@ -88,6 +88,17 @@ Display this help and exit.
>   .TP
>   .B \-\-max-extent-bytes #
>   Specify maximum decompressed extent size # in bytes.
> +.TP
> +.BI "\-\-compress-hints " file
> +If the optional
> +.BI "\-\-compress-hints " file
> +argument is given,
> +.B mkfs.erofs
> +uses it to apply the per-file compression strategy. Each line is defined by
> +tokens separated by spaces in the following form:
> +.RS 1.2i
> +<pcluster-in-bytes> <match-pattern>
> +.RE
>   .SH AUTHOR
>   This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
>   Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 40ca94ff8db9..addefcefea38 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -21,6 +21,7 @@
>   #include "erofs/xattr.h"
>   #include "erofs/exclude.h"
>   #include "erofs/block_list.h"
> +#include "erofs/compress_hints.h"
>   
>   #ifdef HAVE_LIBUUID
>   #include <uuid.h>
> @@ -42,6 +43,7 @@ static struct option long_options[] = {
>   	{"random-pclusterblks", no_argument, NULL, 8},
>   #endif
>   	{"max-extent-bytes", required_argument, NULL, 9},
> +	{"compress-hints", required_argument, NULL, 10},
>   #ifdef WITH_ANDROID
>   	{"mount-point", required_argument, NULL, 512},
>   	{"product-out", required_argument, NULL, 513},
> @@ -87,6 +89,7 @@ static void usage(void)
>   	      " --all-root            make all files owned by root\n"
>   	      " --help                display this help and exit\n"
>   	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
> +	      " --compress-hints=X    specify a file to configure per-file compression strategy\n"
>   #ifndef NDEBUG
>   	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
>   #endif
> @@ -95,7 +98,7 @@ static void usage(void)
>   	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
>   	      " --product-out=X       X=product_out directory\n"
>   	      " --fs-config-file=X    X=fs_config file\n"
> -	      " --block-list-file=X    X=block_list file\n"
> +	      " --block-list-file=X   X=block_list file\n"
>   #endif
>   	      "\nAvailable compressors are: ", stderr);
>   	print_available_compressors(stderr, ", ");
> @@ -286,6 +289,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>   				return -EINVAL;
>   			}
>   			break;
> +		case 10:
> +			cfg.c_compress_hints_file = optarg;
> +			break;
>   #ifdef WITH_ANDROID
>   		case 512:
>   			cfg.mount_point = optarg;
> @@ -312,7 +318,8 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>   					  optarg);
>   				return -EINVAL;
>   			}
> -			cfg.c_physical_clusterblks = i / EROFS_BLKSIZ;
> +			cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
> +			cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
>   			break;
>   
>   		case 1:
> @@ -578,6 +585,13 @@ int main(int argc, char **argv)
>   		goto exit;
>   	}
>   
> +	err = erofs_load_compress_hints();
> +	if (err) {
> +		erofs_err("Failed to load compress hints %s",
> +			  cfg.c_compress_hints_file);
> +		goto exit;
> +	}
> +
>   	err = z_erofs_compress_init(sb_bh);
>   	if (err) {
>   		erofs_err("Failed to initialize compressor: %s",
> @@ -626,6 +640,7 @@ exit:
>   	erofs_droid_blocklist_fclose();
>   #endif
>   	dev_close();
> +	erofs_cleanup_compress_hints();
>   	erofs_cleanup_exclude_rules();
>   	erofs_exit_configure();
>   


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] erofs-utils: support per-inode compress pcluster
  2021-09-06  9:38       ` Huang Jianan via Linux-erofs
@ 2021-09-07  0:12         ` Gao Xiang
  2021-09-15 11:21           ` [PATCH] erofs-utils: tests: check the compress-hints functionality Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 11+ messages in thread
From: Gao Xiang @ 2021-09-07  0:12 UTC (permalink / raw)
  To: Huang Jianan; +Cc: yh, kevin.liw, guoweichao, linux-erofs, guanyuwei

On Mon, Sep 06, 2021 at 05:38:43PM +0800, Huang Jianan via Linux-erofs wrote:
> 在 2021/9/6 1:59, Gao Xiang 写道:
> > On Wed, Aug 25, 2021 at 11:35:23AM +0800, Huang Jianan via Linux-erofs wrote:
> > > Add an option to configure per-inode compression strategy. Each line
> > > of the file should be in the following form:
> > > 
> > > <Regular-expression> <pcluster-in-bytes>
> > > 
> > > When pcluster is 0, it means that the file shouldn't be compressed.
> > > 
> > > Signed-off-by: Huang Jianan <huangjianan@oppo.com>
> > > ---
> > > changes since v2:
> > >   - change compress_rule to compress_hints for better understanding. (Gao Xiang)
> > >   - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
> > >   - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)
> > > 
> > > changes since v1:
> > >   - rename c_pclusterblks to c_physical_clusterblks and place it in union.
> > >   - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
> > >     it's per-inode compression strategy.
> > > 
> > Hi Jianan,
> > 
> > I sorted out a version this weekend (e.g. bump up max pclustersize if
> > needed and update the man page), would you mind confirm on your side
> > as well?
> Hi Xiang,
> 
> Thanks for your modification, looks good to me.
> > Also, it'd be better to add some functionality testcases to cover this
> > if you have extra time:
> Ok, should I use the experimental-tests branch now?

Yeah, please check out this if you have extra time.

Thanks,
Gao Xiang
 
> 
> Thanks,
> Jianan
> > Thanks,
> > Gao Xiang

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] erofs-utils: tests: check the compress-hints functionality
  2021-09-07  0:12         ` Gao Xiang
@ 2021-09-15 11:21           ` Huang Jianan via Linux-erofs
  2021-09-15 15:10             ` Gao Xiang
  0 siblings, 1 reply; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-09-15 11:21 UTC (permalink / raw)
  To: linux-erofs; +Cc: yh, guoweichao, zhangshiming, guanyuwei

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
 tests/Makefile.am   |  5 ++-
 tests/common/rc     |  2 +-
 tests/erofs/017     | 78 +++++++++++++++++++++++++++++++++++++++++++++
 tests/erofs/017.out |  2 ++
 4 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100755 tests/erofs/017
 create mode 100644 tests/erofs/017.out

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 1d73a1b..632dcf5 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -70,9 +70,12 @@ TESTS += erofs/014
 # 015 - regression test for battach on full buffer block
 TESTS += erofs/015
 
-# 006 - verify the uncompressed image with 2-level random files
+# 016 - verify the uncompressed image with 2-level random files
 TESTS += erofs/016
 
+# 017 - check the compress-hints functionality
+TESTS += erofs/017
+
 EXTRA_DIST = common/rc erofs
 
 clean-local: clean-local-check
diff --git a/tests/common/rc b/tests/common/rc
index a6b6014..abd88d1 100644
--- a/tests/common/rc
+++ b/tests/common/rc
@@ -185,7 +185,7 @@ _scratch_cycle_mount()
 
 _get_filesize()
 {
-    stat -c %s "$1"
+	stat -c %s "$1"
 }
 
 _require_fssum()
diff --git a/tests/erofs/017 b/tests/erofs/017
new file mode 100755
index 0000000..a12d1ad
--- /dev/null
+++ b/tests/erofs/017
@@ -0,0 +1,78 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+
+# get standard environment, filters and checks
+. "${srcdir}/common/rc"
+
+cleanup()
+{
+	cd /
+	rm -rf $tmp.*
+}
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+echo "QA output created by $seq"
+
+[ -z "$lz4hc_on" ] && \
+	_notrun "lz4hc compression is disabled, skipped."
+
+if [ -z $SCRATCH_DEV ]; then
+	SCRATCH_DEV=$tmp/erofs_$seq.img
+	rm -f SCRATCH_DEV
+fi
+
+localdir="$tmp/$seq"
+rm -rf $localdir
+mkdir -p $localdir
+
+# collect files pending for verification
+dirs=`find ../ -maxdepth 1 -type d -printf '%p:'`
+IFS=':'
+for d in $dirs; do
+	[ $d = '../' ] && continue
+	[ -z "${d##\.\./tests*}" ] && continue
+	[ -z "${d##\.\./\.*}" ] && continue
+	cp -nR $d $localdir
+done
+unset IFS
+
+# init compress_hints
+compress_hints="$tmp/compress_hints"
+rm -rf $compress_hints
+# ignore warning
+MKFS_OPTIONS="${MKFS_OPTIONS} -d1 -zlz4hc --compress-hints=$compress_hints"
+
+echo "0" > $compress_hints
+_scratch_mkfs $localdir && \
+	_fail "successfully mkfs with invalid compress_hints"
+
+echo "0        \.c$"  >  $compress_hints
+echo "1048577  \.am$" >> $compress_hints
+echo "8192     \.h$"  >> $compress_hints
+_scratch_mkfs $localdir || _fail "failed to mkfs"
+
+# verify lz4hc compressed image
+_require_erofs
+_require_fssum
+
+_scratch_mount 2>>$seqres.full
+
+FSSUM_OPTS="-MAC"
+[ $FSTYP = "erofsfuse" ] && FSSUM_OPTS="${FSSUM_OPTS}T"
+
+sum1=`$FSSUM_PROG $FSSUM_OPTS $localdir`
+echo "$localdir checksum is $sum1" >>$seqres.full
+sum2=`$FSSUM_PROG $FSSUM_OPTS $SCRATCH_MNT`
+echo "$SCRATCH_MNT checksum is $sum2" >>$seqres.full
+
+[ "x$sum1" = "x$sum2" ] || _fail "-->checkMD5 FAILED"
+_scratch_unmount
+
+echo Silence is golden
+status=0
+exit 0
diff --git a/tests/erofs/017.out b/tests/erofs/017.out
new file mode 100644
index 0000000..8222844
--- /dev/null
+++ b/tests/erofs/017.out
@@ -0,0 +1,2 @@
+QA output created by 017
+Silence is golden
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] erofs-utils: tests: check the compress-hints functionality
  2021-09-15 11:21           ` [PATCH] erofs-utils: tests: check the compress-hints functionality Huang Jianan via Linux-erofs
@ 2021-09-15 15:10             ` Gao Xiang
  0 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2021-09-15 15:10 UTC (permalink / raw)
  To: Huang Jianan; +Cc: yh, guoweichao, linux-erofs, zhangshiming, guanyuwei


Jianan,

On Wed, Sep 15, 2021 at 07:21:49PM +0800, Huang Jianan via Linux-erofs wrote:
> Signed-off-by: Huang Jianan <huangjianan@oppo.com>

Thanks for this! I'm fine with the patch, let me fold some
random modification as well.

> ---
>  tests/Makefile.am   |  5 ++-
>  tests/common/rc     |  2 +-
>  tests/erofs/017     | 78 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/erofs/017.out |  2 ++
>  4 files changed, 85 insertions(+), 2 deletions(-)
>  create mode 100755 tests/erofs/017
>  create mode 100644 tests/erofs/017.out
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 1d73a1b..632dcf5 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -70,9 +70,12 @@ TESTS += erofs/014
>  # 015 - regression test for battach on full buffer block
>  TESTS += erofs/015
>  
> -# 006 - verify the uncompressed image with 2-level random files
> +# 016 - verify the uncompressed image with 2-level random files
>  TESTS += erofs/016
>  
> +# 017 - check the compress-hints functionality
> +TESTS += erofs/017
> +
>  EXTRA_DIST = common/rc erofs
>  
>  clean-local: clean-local-check
> diff --git a/tests/common/rc b/tests/common/rc
> index a6b6014..abd88d1 100644
> --- a/tests/common/rc
> +++ b/tests/common/rc
> @@ -185,7 +185,7 @@ _scratch_cycle_mount()
>  
>  _get_filesize()
>  {
> -    stat -c %s "$1"
> +	stat -c %s "$1"
>  }
>  
>  _require_fssum()
> diff --git a/tests/erofs/017 b/tests/erofs/017
> new file mode 100755
> index 0000000..a12d1ad
> --- /dev/null
> +++ b/tests/erofs/017
> @@ -0,0 +1,78 @@
> +#!/bin/sh
> +# SPDX-License-Identifier: GPL-2.0+
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +
> +# get standard environment, filters and checks
> +. "${srcdir}/common/rc"
> +
> +cleanup()
> +{
> +	cd /
> +	rm -rf $tmp.*
> +}
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +echo "QA output created by $seq"
> +
> +[ -z "$lz4hc_on" ] && \
> +	_notrun "lz4hc compression is disabled, skipped."
> +
> +if [ -z $SCRATCH_DEV ]; then
> +	SCRATCH_DEV=$tmp/erofs_$seq.img
> +	rm -f SCRATCH_DEV
> +fi
> +
> +localdir="$tmp/$seq"
> +rm -rf $localdir
> +mkdir -p $localdir
> +
> +# collect files pending for verification
> +dirs=`find ../ -maxdepth 1 -type d -printf '%p:'`
> +IFS=':'
> +for d in $dirs; do
> +	[ $d = '../' ] && continue
> +	[ -z "${d##\.\./tests*}" ] && continue
> +	[ -z "${d##\.\./\.*}" ] && continue
> +	cp -nR $d $localdir
> +done
> +unset IFS
> +
> +# init compress_hints
> +compress_hints="$tmp/compress_hints"
> +rm -rf $compress_hints
> +# ignore warning
> +MKFS_OPTIONS="${MKFS_OPTIONS} -d1 -zlz4hc --compress-hints=$compress_hints"
> +
> +echo "0" > $compress_hints
> +_scratch_mkfs $localdir && \
> +	_fail "successfully mkfs with invalid compress_hints"
> +
> +echo "0        \.c$"  >  $compress_hints
> +echo "1048577  \.am$" >> $compress_hints
> +echo "8192     \.h$"  >> $compress_hints
> +_scratch_mkfs $localdir || _fail "failed to mkfs"
> +
> +# verify lz4hc compressed image
> +_require_erofs
> +_require_fssum
> +
> +_scratch_mount 2>>$seqres.full
> +
> +FSSUM_OPTS="-MAC"
> +[ $FSTYP = "erofsfuse" ] && FSSUM_OPTS="${FSSUM_OPTS}T"
> +
> +sum1=`$FSSUM_PROG $FSSUM_OPTS $localdir`
> +echo "$localdir checksum is $sum1" >>$seqres.full
> +sum2=`$FSSUM_PROG $FSSUM_OPTS $SCRATCH_MNT`
> +echo "$SCRATCH_MNT checksum is $sum2" >>$seqres.full
> +
> +[ "x$sum1" = "x$sum2" ] || _fail "-->checkMD5 FAILED"
> +_scratch_unmount
> +
> +echo Silence is golden
> +status=0
> +exit 0
> diff --git a/tests/erofs/017.out b/tests/erofs/017.out
> new file mode 100644
> index 0000000..8222844
> --- /dev/null
> +++ b/tests/erofs/017.out
> @@ -0,0 +1,2 @@
> +QA output created by 017
> +Silence is golden
> -- 
> 2.25.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-09-15 15:10 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16  9:40 [PATCH] erofs-utils: support per-inode compress pcluster Huang Jianan via Linux-erofs
2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
2021-08-25  1:17   ` Gao Xiang
2021-08-25  1:27     ` Gao Xiang
2021-08-25  2:38       ` Huang Jianan via Linux-erofs
2021-08-25  3:35   ` [PATCH v3] " Huang Jianan via Linux-erofs
2021-09-05 17:59     ` Gao Xiang
2021-09-06  9:38       ` Huang Jianan via Linux-erofs
2021-09-07  0:12         ` Gao Xiang
2021-09-15 11:21           ` [PATCH] erofs-utils: tests: check the compress-hints functionality Huang Jianan via Linux-erofs
2021-09-15 15:10             ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).