All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huang Jianan via Linux-erofs <linux-erofs@lists.ozlabs.org>
To: linux-erofs@lists.ozlabs.org
Cc: yh@oppo.com, kevin.liw@oppo.com, guoweichao@oppo.com, guanyuwei@oppo.com
Subject: [PATCH v2] erofs-utils: support per-inode compress pcluster
Date: Wed, 18 Aug 2021 12:27:15 +0800	[thread overview]
Message-ID: <20210818042715.24416-1-huangjianan@oppo.com> (raw)
In-Reply-To: <20210816094043.43772-1-huangjianan@oppo.com>

Add an option to configure per-inode compression strategy. Each line
of the file should be in the following form:

<Regular-expression> <pcluster-in-bytes>

When pcluster is 0, it means that the file shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
changes since v1:
 - rename c_pclusterblks to c_physical_clusterblks and place it in union
 - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster()
   since it's per-inode compression strategy

 include/erofs/compress_rule.h |  25 ++++++++
 include/erofs/config.h        |   1 +
 include/erofs/internal.h      |   1 +
 lib/Makefile.am               |   5 +-
 lib/compress.c                |  10 ++--
 lib/compress_rule.c           | 106 ++++++++++++++++++++++++++++++++++
 lib/compressor.h              |   1 -
 lib/inode.c                   |   6 ++
 man/mkfs.erofs.1              |   2 +
 mkfs/main.c                   |  31 +++++++---
 10 files changed, 172 insertions(+), 16 deletions(-)
 create mode 100644 include/erofs/compress_rule.h
 create mode 100644 lib/compress_rule.c

diff --git a/include/erofs/compress_rule.h b/include/erofs/compress_rule.h
new file mode 100644
index 0000000..8ad578b
--- /dev/null
+++ b/include/erofs/compress_rule.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs-utils/include/erofs/compress_rule.h
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_STRATEGY_H
+#define __EROFS_COMPRESS_STRATEGY_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_rule {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int c_physical_clusterblks;
+};
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode);
+int erofs_load_compress_rule();
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 8124f3b..50812c9 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -62,6 +62,7 @@ struct erofs_configure {
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	char *compress_rule_file;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 5583861..4da30b3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,6 +164,7 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
+			uint8_t  c_physical_clusterblks;
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b12e2c1..cab912d 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -17,11 +17,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_rule.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_rule.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index a8ebbc1..ea4a756 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -19,6 +19,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -91,8 +92,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 	}
 
 	do {
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
@@ -151,13 +151,15 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
-/* TODO: apply per-(sub)file strategies here */
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_physical_clusterblks;
 #endif
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return cfg.c_physical_clusterblks;
 }
 
@@ -496,7 +498,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	}
 
-	if (cfg.c_physical_clusterblks > 1) {
+	if (erofs_sb_has_big_pcluster()) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
diff --git a/lib/compress_rule.c b/lib/compress_rule.c
new file mode 100644
index 0000000..497d662
--- /dev/null
+++ b/lib/compress_rule.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/compress_rule.c
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_rule.h"
+
+static LIST_HEAD(compress_rule_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_rule(const char *s, unsigned int blks)
+{
+	struct erofs_compress_rule *r;
+	int ret = 0;
+
+	r = malloc(sizeof(struct erofs_compress_rule));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err;
+	}
+	r->c_physical_clusterblks = blks;
+
+	list_add_tail(&r->list, &compress_rule_head);
+	erofs_info("insert compress rule %s: %u", s, blks);
+	return ret;
+
+err:
+	free(r);
+	return ret;
+}
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_rule *r;
+
+	if (inode->c_physical_clusterblks)
+		return inode->c_physical_clusterblks;
+
+	s = erofs_fspath(inode->i_srcpath);
+
+	list_for_each_entry(r, &compress_rule_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			inode->c_physical_clusterblks = r->c_physical_clusterblks;
+			return r->c_physical_clusterblks;
+		}
+		if (ret > REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+
+	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
+	return cfg.c_physical_clusterblks;
+}
+
+int erofs_load_compress_rule()
+{
+	char buf[PATH_MAX + 100];
+	FILE* f;
+	int ret = 0;
+
+	if (!cfg.compress_rule_file)
+		return 0;
+
+	f = fopen(cfg.compress_rule_file, "r");
+	if (f == NULL)
+		return -errno;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		char* line = buf;
+		char* s;
+		unsigned int blks;
+
+		s = strtok(line, " ");
+		blks = atoi(strtok(NULL, " "));
+		if (blks % EROFS_BLKSIZ) {
+			erofs_err("invalid physical clustersize %u", blks);
+			ret = -EINVAL;
+			goto out;
+		}
+		erofs_insert_compress_rule(s, blks / EROFS_BLKSIZ);
+	}
+
+out:
+	fclose(f);
+	return ret;
+}
diff --git a/lib/compressor.h b/lib/compressor.h
index b2471c4..4b7b8c8 100644
--- a/lib/compressor.h
+++ b/lib/compressor.h
@@ -53,4 +53,3 @@ int erofs_compressor_init(struct erofs_compress *c, char *alg_name);
 int erofs_compressor_exit(struct erofs_compress *c);
 
 #endif
-
diff --git a/lib/inode.c b/lib/inode.c
index 6871d2b..ab23ee5 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -329,6 +330,10 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	if (cfg.compress_rule_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return true;
 }
 
@@ -856,6 +861,7 @@ static struct erofs_inode *erofs_new_inode(void)
 
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
+	inode->c_physical_clusterblks = 0;
 	return inode;
 }
 
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d164fa5..42fb663 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,8 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.BI "\-\-compress-rule" file
+Specify a file to configure per-file compression strategy.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 10fe14d..467e875 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_rule.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,11 +45,12 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-rule", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
-	{"mount-point", required_argument, NULL, 10},
-	{"product-out", required_argument, NULL, 11},
-	{"fs-config-file", required_argument, NULL, 12},
-	{"block-list-file", required_argument, NULL, 13},
+	{"mount-point", required_argument, NULL, 20},
+	{"product-out", required_argument, NULL, 21},
+	{"fs-config-file", required_argument, NULL, 22},
+	{"block-list-file", required_argument, NULL, 23},
 #endif
 	{0, 0, 0, 0},
 };
@@ -89,6 +91,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-rule=X     specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -97,7 +100,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -288,21 +291,24 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
-#ifdef WITH_ANDROID
 		case 10:
+			cfg.compress_rule_file = optarg;
+			break;
+#ifdef WITH_ANDROID
+		case 20:
 			cfg.mount_point = optarg;
 			/* all trailing '/' should be deleted */
 			opt = strlen(cfg.mount_point);
 			if (opt && optarg[opt - 1] == '/')
 				optarg[opt - 1] = '\0';
 			break;
-		case 11:
+		case 21:
 			cfg.target_out_path = optarg;
 			break;
-		case 12:
+		case 22:
 			cfg.fs_config_file = optarg;
 			break;
-		case 13:
+		case 23:
 			cfg.block_list_file = optarg;
 			break;
 #endif
@@ -587,6 +593,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_rule();
+	if (err) {
+		erofs_err("Failed to load compress rule %s",
+			  cfg.compress_rule_file);
+		goto exit;
+	}
+
 #ifdef HAVE_LIBUUID
 	uuid_unparse_lower(sbi.uuid, uuid_str);
 #endif
-- 
2.25.1


  reply	other threads:[~2021-08-18  4:28 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-16  9:40 [PATCH] erofs-utils: support per-inode compress pcluster Huang Jianan via Linux-erofs
2021-08-18  4:27 ` Huang Jianan via Linux-erofs [this message]
2021-08-25  1:17   ` [PATCH v2] " Gao Xiang
2021-08-25  1:27     ` Gao Xiang
2021-08-25  2:38       ` Huang Jianan via Linux-erofs
2021-08-25  3:35   ` [PATCH v3] " Huang Jianan via Linux-erofs
2021-09-05 17:59     ` Gao Xiang
2021-09-06  9:38       ` Huang Jianan via Linux-erofs
2021-09-07  0:12         ` Gao Xiang
2021-09-15 11:21           ` [PATCH] erofs-utils: tests: check the compress-hints functionality Huang Jianan via Linux-erofs
2021-09-15 15:10             ` Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210818042715.24416-1-huangjianan@oppo.com \
    --to=linux-erofs@lists.ozlabs.org \
    --cc=guanyuwei@oppo.com \
    --cc=guoweichao@oppo.com \
    --cc=huangjianan@oppo.com \
    --cc=kevin.liw@oppo.com \
    --cc=yh@oppo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.