linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] erofs-utils: support per-inode compress pcluster
@ 2021-08-16  9:40 Huang Jianan via Linux-erofs
  2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
  0 siblings, 1 reply; 11+ messages in thread
From: Huang Jianan via Linux-erofs @ 2021-08-16  9:40 UTC (permalink / raw)
  To: linux-erofs; +Cc: yh, kevin.liw, guoweichao, guanyuwei

Add an option to configure per-inode compression strategy. Each line
of the file should be in the following form:

<Regular-expression> <pcluster-in-bytes>

When pcluster is 0, it means that the file shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
 include/erofs/compress_rule.h |  25 ++++++++
 include/erofs/config.h        |   1 +
 include/erofs/internal.h      |   2 +
 lib/Makefile.am               |   5 +-
 lib/compress.c                |   4 ++
 lib/compress_rule.c           | 106 ++++++++++++++++++++++++++++++++++
 lib/inode.c                   |   7 +++
 man/mkfs.erofs.1              |   2 +
 mkfs/main.c                   |  31 +++++++---
 9 files changed, 172 insertions(+), 11 deletions(-)
 create mode 100644 include/erofs/compress_rule.h
 create mode 100644 lib/compress_rule.c

diff --git a/include/erofs/compress_rule.h b/include/erofs/compress_rule.h
new file mode 100644
index 0000000..2271ab6
--- /dev/null
+++ b/include/erofs/compress_rule.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs-utils/include/erofs/compress_rule.h
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_STRATEGY_H
+#define __EROFS_COMPRESS_STRATEGY_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_rule {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int c_pclusterblks;
+};
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode);
+int erofs_load_compress_rule();
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 8124f3b..50812c9 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -62,6 +62,7 @@ struct erofs_configure {
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	char *compress_rule_file;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 5583861..b9432f4 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -152,6 +152,8 @@ struct erofs_inode {
 	unsigned int xattr_isize;
 	unsigned int extent_isize;
 
+	unsigned int c_pclusterblks;
+
 	erofs_nid_t nid;
 	struct erofs_buffer_head *bh;
 	struct erofs_buffer_head *bh_inline, *bh_data;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b12e2c1..cab912d 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -17,11 +17,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_rule.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_rule.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index 40723a1..01f36d8 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -19,6 +19,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -158,6 +159,9 @@ static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_physical_clusterblks;
 #endif
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return cfg.c_physical_clusterblks;
 }
 
diff --git a/lib/compress_rule.c b/lib/compress_rule.c
new file mode 100644
index 0000000..4ff6205
--- /dev/null
+++ b/lib/compress_rule.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/compress_rule.c
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_rule.h"
+
+static LIST_HEAD(compress_rule_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_rule(const char *s, unsigned int blks)
+{
+	struct erofs_compress_rule *r;
+	int ret = 0;
+
+	r = malloc(sizeof(struct erofs_compress_rule));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err;
+	}
+	r->c_pclusterblks = blks;
+
+	list_add_tail(&r->list, &compress_rule_head);
+	erofs_info("insert compress rule %s: %u", s, blks);
+	return ret;
+
+err:
+	free(r);
+	return ret;
+}
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_rule *r;
+
+	if (inode->c_pclusterblks)
+		return inode->c_pclusterblks;
+
+	s = erofs_fspath(inode->i_srcpath);
+
+	list_for_each_entry(r, &compress_rule_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			inode->c_pclusterblks = r->c_pclusterblks;
+			return r->c_pclusterblks;
+		}
+		if (ret > REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+
+	inode->c_pclusterblks = cfg.c_physical_clusterblks;
+	return cfg.c_physical_clusterblks;
+}
+
+int erofs_load_compress_rule()
+{
+	char buf[PATH_MAX + 100];
+	FILE* f;
+	int ret = 0;
+
+	if (!cfg.compress_rule_file)
+		return 0;
+
+	f = fopen(cfg.compress_rule_file, "r");
+	if (f == NULL)
+		return -errno;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		char* line = buf;
+		char* s;
+		unsigned int blks;
+
+		s = strtok(line, " ");
+		blks = atoi(strtok(NULL, " "));
+		if (blks % EROFS_BLKSIZ) {
+			erofs_err("invalid physical clustersize %u", blks);
+			ret = -EINVAL;
+			goto out;
+		}
+		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
+	}
+
+out:
+	fclose(f);
+	return ret;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 6871d2b..174fb8a 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -329,6 +330,10 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return true;
 }
 
@@ -854,6 +859,8 @@ static struct erofs_inode *erofs_new_inode(void)
 	inode->xattr_isize = 0;
 	inode->extent_isize = 0;
 
+	inode->c_pclusterblks = 0;
+
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
 	return inode;
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d164fa5..42fb663 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,8 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.BI "\-\-compress-rule" file
+Specify a file to configure per-file compression strategy.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 10fe14d..467e875 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,11 +45,12 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-rule", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
-	{"mount-point", required_argument, NULL, 10},
-	{"product-out", required_argument, NULL, 11},
-	{"fs-config-file", required_argument, NULL, 12},
-	{"block-list-file", required_argument, NULL, 13},
+	{"mount-point", required_argument, NULL, 20},
+	{"product-out", required_argument, NULL, 21},
+	{"fs-config-file", required_argument, NULL, 22},
+	{"block-list-file", required_argument, NULL, 23},
 #endif
 	{0, 0, 0, 0},
 };
@@ -89,6 +91,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-rule=X     specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -97,7 +100,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -288,21 +291,24 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
-#ifdef WITH_ANDROID
 		case 10:
+			cfg.compress_rule_file = optarg;
+			break;
+#ifdef WITH_ANDROID
+		case 20:
 			cfg.mount_point = optarg;
 			/* all trailing '/' should be deleted */
 			opt = strlen(cfg.mount_point);
 			if (opt && optarg[opt - 1] == '/')
 				optarg[opt - 1] = '\0';
 			break;
-		case 11:
+		case 21:
 			cfg.target_out_path = optarg;
 			break;
-		case 12:
+		case 22:
 			cfg.fs_config_file = optarg;
 			break;
-		case 13:
+		case 23:
 			cfg.block_list_file = optarg;
 			break;
 #endif
@@ -587,6 +593,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_rule();
+	if (err) {
+		erofs_err("Failed to load compress rule %s",
+			  cfg.compress_rule_file);
+		goto exit;
+	}
+
 #ifdef HAVE_LIBUUID
 	uuid_unparse_lower(sbi.uuid, uuid_str);
 #endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-09-15 15:10 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16  9:40 [PATCH] erofs-utils: support per-inode compress pcluster Huang Jianan via Linux-erofs
2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
2021-08-25  1:17   ` Gao Xiang
2021-08-25  1:27     ` Gao Xiang
2021-08-25  2:38       ` Huang Jianan via Linux-erofs
2021-08-25  3:35   ` [PATCH v3] " Huang Jianan via Linux-erofs
2021-09-05 17:59     ` Gao Xiang
2021-09-06  9:38       ` Huang Jianan via Linux-erofs
2021-09-07  0:12         ` Gao Xiang
2021-09-15 11:21           ` [PATCH] erofs-utils: tests: check the compress-hints functionality Huang Jianan via Linux-erofs
2021-09-15 15:10             ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).