linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Huang Jianan via Linux-erofs <linux-erofs@lists.ozlabs.org>
To: linux-erofs@lists.ozlabs.org
Cc: yh@oppo.com, kevin.liw@oppo.com, guoweichao@oppo.com, guanyuwei@oppo.com
Subject: [PATCH v3] erofs-utils: support per-inode compress pcluster
Date: Wed, 25 Aug 2021 11:35:23 +0800	[thread overview]
Message-ID: <20210825033523.20058-1-huangjianan@oppo.com> (raw)
In-Reply-To: <20210818042715.24416-1-huangjianan@oppo.com>

Add an option to configure per-inode compression strategy. Each line
of the file should be in the following form:

<Regular-expression> <pcluster-in-bytes>

When pcluster is 0, it means that the file shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan@oppo.com>
---
changes since v2:
 - change compress_rule to compress_hints for better understanding. (Gao Xiang)
 - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
 - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)

changes since v1:
 - rename c_pclusterblks to c_physical_clusterblks and place it in union.
 - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
   it's per-inode compression strategy.

 include/erofs/compress_hints.h |  25 ++++++++
 include/erofs/config.h         |   1 +
 include/erofs/internal.h       |   1 +
 lib/Makefile.am                |   5 +-
 lib/compress.c                 |  10 ++--
 lib/compress_hints.c           | 105 +++++++++++++++++++++++++++++++++
 lib/inode.c                    |   6 ++
 man/mkfs.erofs.1               |   2 +
 mkfs/main.c                    |  15 ++++-
 9 files changed, 163 insertions(+), 7 deletions(-)
 create mode 100644 include/erofs/compress_hints.h
 create mode 100644 lib/compress_hints.c

diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
new file mode 100644
index 0000000..2937b39
--- /dev/null
+++ b/include/erofs/compress_hints.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs-utils/include/erofs/compress_hints.h
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_HINTS_H
+#define __EROFS_COMPRESS_HINTS_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_hints {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int c_physical_clusterblks;
+};
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode);
+int erofs_load_compress_hints();
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 8124f3b..399da41 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -62,6 +62,7 @@ struct erofs_configure {
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	char *compress_hints_file;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 5583861..4da30b3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,6 +164,7 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
+			uint8_t  c_physical_clusterblks;
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b12e2c1..e1b677b 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -17,11 +17,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_hints.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index a8ebbc1..2d93a10 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -19,6 +19,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -91,8 +92,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 	}
 
 	do {
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
@@ -151,13 +151,15 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
-/* TODO: apply per-(sub)file strategies here */
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_physical_clusterblks;
 #endif
+	if (cfg.compress_hints_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return cfg.c_physical_clusterblks;
 }
 
@@ -496,7 +498,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	}
 
-	if (cfg.c_physical_clusterblks > 1) {
+	if (erofs_sb_has_big_pcluster()) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
new file mode 100644
index 0000000..bc29ebd
--- /dev/null
+++ b/lib/compress_hints.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/compress_hints.c
+ *
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan@oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_hints.h"
+
+static LIST_HEAD(compress_hints_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+{
+	struct erofs_compress_hints *r;
+	int ret = 0;
+
+	r = malloc(sizeof(struct erofs_compress_hints));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err;
+	}
+	r->c_physical_clusterblks = blks;
+
+	list_add_tail(&r->list, &compress_hints_head);
+	erofs_info("insert compress rule %s: %u", s, blks);
+	return ret;
+
+err:
+	free(r);
+	return ret;
+}
+
+unsigned int erofs_parse_pclusterblks(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_hints *r;
+
+	if (inode->c_physical_clusterblks)
+		return inode->c_physical_clusterblks;
+
+	s = erofs_fspath(inode->i_srcpath);
+
+	list_for_each_entry(r, &compress_hints_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			inode->c_physical_clusterblks = r->c_physical_clusterblks;
+			return r->c_physical_clusterblks;
+		}
+		if (ret > REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+
+	inode->c_physical_clusterblks = cfg.c_physical_clusterblks;
+	return cfg.c_physical_clusterblks;
+}
+
+int erofs_load_compress_hints()
+{
+	char buf[PATH_MAX + 100];
+	FILE* f;
+
+	if (!cfg.compress_hints_file)
+		return 0;
+
+	f = fopen(cfg.compress_hints_file, "r");
+	if (f == NULL)
+		return -errno;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		char* line = buf;
+		char* s;
+		unsigned int pclustersize;
+
+		s = strtok(line, " ");
+		pclustersize = atoi(strtok(NULL, " "));
+		if (pclustersize % EROFS_BLKSIZ) {
+			erofs_warn("invalid physical clustersize %u, "
+				   "use default c_physical_clusterblks %u",
+				   pclustersize, cfg.c_physical_clusterblks);
+			continue;
+		}
+		erofs_insert_compress_hints(s, pclustersize / EROFS_BLKSIZ);
+	}
+
+	fclose(f);
+	return 0;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 6871d2b..f2ac30a 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -329,6 +330,10 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	if (cfg.compress_hints_file)
+		return erofs_parse_pclusterblks(inode);
+
 	return true;
 }
 
@@ -856,6 +861,7 @@ static struct erofs_inode *erofs_new_inode(void)
 
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
+	inode->c_physical_clusterblks = 0;
 	return inode;
 }
 
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d164fa5..7f36146 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,8 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.BI "\-\-compress-hints" file
+Specify a file to configure per-file compression strategy.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 9369b72..dbe49ec 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,6 +45,7 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-hints", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
 	{"mount-point", required_argument, NULL, 256},
 	{"product-out", required_argument, NULL, 257},
@@ -89,6 +91,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-hints=X    specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -97,7 +100,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -288,6 +291,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 10:
+			cfg.compress_hints_file = optarg;
+			break;
 #ifdef WITH_ANDROID
 		case 256:
 			cfg.mount_point = optarg;
@@ -587,6 +593,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_hints();
+	if (err) {
+		erofs_err("Failed to load compress hints %s",
+			  cfg.compress_hints_file);
+		goto exit;
+	}
+
 #ifdef HAVE_LIBUUID
 	uuid_unparse_lower(sbi.uuid, uuid_str);
 #endif
-- 
2.25.1


  parent reply	other threads:[~2021-08-25  3:36 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-16  9:40 [PATCH] erofs-utils: support per-inode compress pcluster Huang Jianan via Linux-erofs
2021-08-18  4:27 ` [PATCH v2] " Huang Jianan via Linux-erofs
2021-08-25  1:17   ` Gao Xiang
2021-08-25  1:27     ` Gao Xiang
2021-08-25  2:38       ` Huang Jianan via Linux-erofs
2021-08-25  3:35   ` Huang Jianan via Linux-erofs [this message]
2021-09-05 17:59     ` [PATCH v3] " Gao Xiang
2021-09-06  9:38       ` Huang Jianan via Linux-erofs
2021-09-07  0:12         ` Gao Xiang
2021-09-15 11:21           ` [PATCH] erofs-utils: tests: check the compress-hints functionality Huang Jianan via Linux-erofs
2021-09-15 15:10             ` Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210825033523.20058-1-huangjianan@oppo.com \
    --to=linux-erofs@lists.ozlabs.org \
    --cc=guanyuwei@oppo.com \
    --cc=guoweichao@oppo.com \
    --cc=huangjianan@oppo.com \
    --cc=kevin.liw@oppo.com \
    --cc=yh@oppo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).