linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Gao Xiang <hsiangkao@linux.alibaba.com>
To: linux-erofs@lists.ozlabs.org
Cc: Gao Xiang <hsiangkao@linux.alibaba.com>
Subject: [PATCH] erofs-utils: support chunk-based sparse files
Date: Fri, 27 Jan 2023 18:08:11 +0800	[thread overview]
Message-ID: <20230127100811.109549-1-hsiangkao@linux.alibaba.com> (raw)

Scan holes for chunk-based inodes if either --chunksize=# or
-Ededupe (without compression) is specified so that sparse files
can be made, which has already been supported since Linux 5.15.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/blobchunk.c | 71 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index 77b0c17..55ea061 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -18,7 +18,7 @@ void erofs_sha256(const unsigned char *in, unsigned long in_size,
 struct erofs_blobchunk {
 	struct hashmap_entry ent;
 	char		sha256[32];
-	unsigned int	chunksize;
+	erofs_off_t	chunksize;
 	erofs_blk_t	blkaddr;
 };
 
@@ -27,9 +27,12 @@ static FILE *blobfile;
 static erofs_blk_t remapped_base;
 static bool multidev;
 static struct erofs_buffer_head *bh_devt;
+struct erofs_blobchunk erofs_holechunk = {
+	.blkaddr = EROFS_NULL_ADDR,
+};
 
 static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
-		unsigned int chunksize)
+		erofs_off_t chunksize)
 {
 	static u8 zeroed[EROFS_BLKSIZ];
 	u8 *chunkdata, sha256[32];
@@ -129,7 +132,11 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode,
 
 		chunk = *(void **)(inode->chunkindexes + src);
 
-		idx.blkaddr = base_blkaddr + chunk->blkaddr;
+		if (chunk->blkaddr != EROFS_NULL_ADDR)
+			idx.blkaddr = base_blkaddr + chunk->blkaddr;
+		else
+			idx.blkaddr = EROFS_NULL_ADDR;
+
 		if (extent_start != EROFS_NULL_ADDR &&
 		    idx.blkaddr == extent_end + 1) {
 			extent_end = idx.blkaddr;
@@ -163,14 +170,28 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode,
 
 int erofs_blob_write_chunked_file(struct erofs_inode *inode)
 {
-	unsigned int chunksize = 1 << cfg.c_chunkbits;
-	unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
+	unsigned int chunkbits = cfg.c_chunkbits;
+	unsigned int count, unit;
 	struct erofs_inode_chunk_index *idx;
-	erofs_off_t pos, len;
-	unsigned int unit;
+	erofs_off_t pos, len, chunksize;
 	int fd, ret;
 
-	inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
+	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+	if (fd < 0)
+		return -errno;
+#ifdef SEEK_DATA
+	/* if the file is fully sparsed, use one big chunk instead */
+	if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) {
+		chunkbits = ilog2(inode->i_size - 1) + 1;
+		if (chunkbits < LOG_BLOCK_SIZE)
+			chunkbits = LOG_BLOCK_SIZE;
+	}
+#endif
+	if (chunkbits - LOG_BLOCK_SIZE > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+		chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + LOG_BLOCK_SIZE;
+	chunksize = 1ULL << chunkbits;
+	count = DIV_ROUND_UP(inode->i_size, chunksize);
+	inode->u.chunkformat |= chunkbits - LOG_BLOCK_SIZE;
 	if (multidev)
 		inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
 
@@ -181,24 +202,41 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode)
 
 	inode->extent_isize = count * unit;
 	idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
-	if (!idx)
+	if (!idx) {
+		close(fd);
 		return -ENOMEM;
-	inode->chunkindexes = idx;
-
-	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
-	if (fd < 0) {
-		ret = -errno;
-		goto err;
 	}
+	inode->chunkindexes = idx;
 
 	for (pos = 0; pos < inode->i_size; pos += len) {
 		struct erofs_blobchunk *chunk;
+#ifdef SEEK_DATA
+		off64_t offset = lseek(fd, pos, SEEK_DATA);
+
+		if (offset < 0) {
+			if (errno != ENXIO)
+				offset = pos;
+			else
+				offset = ((pos >> chunkbits) + 1) << chunkbits;
+		} else {
+			offset &= ~(chunksize - 1);
+		}
+
+		if (offset > pos) {
+			len = 0;
+			do {
+				*(void **)idx++ = &erofs_holechunk;
+				pos += chunksize;
+			} while (pos < offset);
+			DBG_BUGON(pos != offset);
+			continue;
+		}
+#endif
 
 		len = min_t(u64, inode->i_size - pos, chunksize);
 		chunk = erofs_blob_getchunk(fd, len);
 		if (IS_ERR(chunk)) {
 			ret = PTR_ERR(chunk);
-			close(fd);
 			goto err;
 		}
 		*(void **)idx++ = chunk;
@@ -207,6 +245,7 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode)
 	close(fd);
 	return 0;
 err:
+	close(fd);
 	free(inode->chunkindexes);
 	inode->chunkindexes = NULL;
 	return ret;
-- 
2.24.4


             reply	other threads:[~2023-01-27 10:08 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-27 10:08 Gao Xiang [this message]
2023-01-31  5:14 ` [PATCH v2] erofs-utils: support chunk-based sparse files Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230127100811.109549-1-hsiangkao@linux.alibaba.com \
    --to=hsiangkao@linux.alibaba.com \
    --cc=linux-erofs@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).