Linux-EROFS Archive on lore.kernel.org
 help / color / Atom feed
* [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
@ 2019-12-23 17:29 Pratik Shinde
  2019-12-24  3:48 ` Gao Xiang
  0 siblings, 1 reply; 8+ messages in thread
From: Pratik Shinde @ 2019-12-23 17:29 UTC (permalink / raw)
  To: linux-erofs, bluce.liguifu, miaoxie, fangwei1

Made some changes based on comments on previous patch :
1) defined an on disk structure for representing hole.
2) Maintain a list of this structure (per file) and dump this list to
   disk at the time of writing the inode to disk.
---
 include/erofs/internal.h |   8 +++-
 lib/inode.c              | 108 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index e13adda..863ef8a 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -63,7 +63,7 @@ struct erofs_sb_info {
 extern struct erofs_sb_info sbi;
 
 struct erofs_inode {
-	struct list_head i_hash, i_subdirs, i_xattrs;
+	struct list_head i_hash, i_subdirs, i_xattrs, i_holes;
 
 	unsigned int i_count;
 	struct erofs_inode *i_parent;
@@ -93,6 +93,7 @@ struct erofs_inode {
 
 	unsigned int xattr_isize;
 	unsigned int extent_isize;
+	unsigned int holes_isize;
 
 	erofs_nid_t nid;
 	struct erofs_buffer_head *bh;
@@ -139,5 +140,10 @@ static inline const char *erofs_strerror(int err)
 	return msg;
 }
 
+struct erofs_hole {
+	erofs_blk_t st;
+	u32 len;
+	struct list_head next;
+};
 #endif
 
diff --git a/lib/inode.c b/lib/inode.c
index 0e19b11..20bbf06 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -38,6 +38,85 @@ static unsigned char erofs_type_by_mode[S_IFMT >> S_SHIFT] = {
 
 struct list_head inode_hashtable[NR_INODE_HASHTABLE];
 
+
+#define IS_HOLE(start, end) (roundup(start, EROFS_BLKSIZ) == start &&	\
+		             roundup(end, EROFS_BLKSIZ) == end &&	\
+			    (end - start) % EROFS_BLKSIZ == 0)
+#define HOLE_BLK		-1
+unsigned int erofs_detect_holes(struct erofs_inode *inode,
+				struct list_head *holes, unsigned int *htimes)
+{
+	int fd, st, en;
+	unsigned int nholes = 0;
+	erofs_off_t data, hole, len;
+	struct erofs_hole *eh;
+
+	fd = open(inode->i_srcpath, O_RDONLY);
+	if (fd < 0) {
+		return -errno;
+	}
+	len = lseek(fd, 0, SEEK_END);
+	if (lseek(fd, 0, SEEK_SET) == -1)
+		return -errno;
+	data = 0;
+	while (data < len) {
+		hole = lseek(fd, data, SEEK_HOLE);
+		if (hole == len)
+			break;
+		data = lseek(fd, hole, SEEK_DATA);
+		if (data < 0 || hole > data) {
+			return -EINVAL;
+		}
+		if (IS_HOLE(hole, data)) {
+			st = hole >> S_SHIFT;
+			en = data >> S_SHIFT;
+			eh = malloc(sizeof(struct erofs_hole));
+			if (eh == NULL)
+				return -ENOMEM;
+			eh->st = st;
+			eh->len = (en - st);
+			list_add_tail(&eh->next, holes);
+			nholes += eh->len;
+			*htimes += 1;
+		}
+	}
+	return nholes;
+}
+
+bool erofs_ishole(erofs_blk_t blk, struct list_head holes)
+{
+	if (list_empty(&holes))
+		return false;
+	struct erofs_hole *eh;
+	list_for_each_entry(eh, &holes, next) {
+		if (eh->st > blk)
+			return false;
+		if (eh->st <= blk && (eh->st + eh->len - 1) >= blk)
+			return true;
+	}
+	return false;
+}
+
+char *erofs_create_holes_buffer(struct list_head *holes, unsigned int size)
+{
+	struct erofs_hole *eh;
+	char *buf;
+	unsigned int p = 0;
+
+	buf = malloc(size);
+	if (buf == NULL)
+		return ERR_PTR(-ENOMEM);
+	list_for_each_entry(eh, holes, next) {
+		*(__le32 *)(buf + p) = cpu_to_le32(eh->st);
+		p += sizeof(__le32);
+		*(__le32 *)(buf + p) = cpu_to_le32(eh->len);
+		p += sizeof(__le32);
+		list_del(&eh->next);
+		free(eh);
+	}
+	return buf;
+}
+
 void erofs_inode_manager_init(void)
 {
 	unsigned int i;
@@ -304,7 +383,7 @@ static bool erofs_file_is_compressible(struct erofs_inode *inode)
 
 int erofs_write_file(struct erofs_inode *inode)
 {
-	unsigned int nblocks, i;
+	unsigned int nblocks, i, nholes, hitems = 0;
 	int ret, fd;
 
 	if (!inode->i_size) {
@@ -322,16 +401,24 @@ int erofs_write_file(struct erofs_inode *inode)
 	/* fallback to all data uncompressed */
 	inode->datalayout = EROFS_INODE_FLAT_INLINE;
 	nblocks = inode->i_size / EROFS_BLKSIZ;
-
-	ret = __allocate_inode_bh_data(inode, nblocks);
+	nholes = erofs_detect_holes(inode, &inode->i_holes, &hitems);
+	if (nholes < 0)
+		return nholes;
+	inode->holes_isize = (sizeof(struct erofs_hole) -
+			      sizeof(struct list_head)) * hitems;
+	if (nblocks < 0)
+		return nblocks;
+	ret = __allocate_inode_bh_data(inode, nblocks - nholes);
 	if (ret)
 		return ret;
-
 	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
 	if (fd < 0)
 		return -errno;
 
 	for (i = 0; i < nblocks; ++i) {
+		if (erofs_ishole(i, inode->i_holes)) {
+			continue;
+		}
 		char buf[EROFS_BLKSIZ];
 
 		ret = read(fd, buf, EROFS_BLKSIZ);
@@ -479,8 +566,19 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 		if (ret)
 			return false;
 		free(inode->compressmeta);
+		off += inode->extent_isize;
 	}
 
+	if (inode->holes_isize) {
+		char *holes = erofs_create_holes_buffer(&inode->i_holes,
+							inode->holes_isize);
+		if (IS_ERR(holes))
+			return false;
+		ret = dev_write(holes, off, inode->holes_isize);
+		free(holes);
+		if (ret)
+			return false;
+	}
 	inode->bh = NULL;
 	erofs_iput(inode);
 	return erofs_bh_flush_generic_end(bh);
@@ -737,6 +835,7 @@ struct erofs_inode *erofs_new_inode(void)
 
 	init_list_head(&inode->i_subdirs);
 	init_list_head(&inode->i_xattrs);
+	init_list_head(&inode->i_holes);
 
 	inode->idata_size = 0;
 	inode->xattr_isize = 0;
@@ -961,4 +1060,3 @@ struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
 
 	return erofs_mkfs_build_tree(inode);
 }
-
-- 
2.9.3


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-23 17:29 [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files Pratik Shinde
@ 2019-12-24  3:48 ` Gao Xiang
  2019-12-24  9:35   ` Pratik Shinde
  0 siblings, 1 reply; 8+ messages in thread
From: Gao Xiang @ 2019-12-24  3:48 UTC (permalink / raw)
  To: Pratik Shinde; +Cc: miaoxie, linux-erofs

Hi Pratik,

Thanks for keeping the work. :)
Some inlined comments below as before.

On Mon, Dec 23, 2019 at 10:59:38PM +0530, Pratik Shinde wrote:
> Made some changes based on comments on previous patch :
> 1) defined an on disk structure for representing hole.
> 2) Maintain a list of this structure (per file) and dump this list to
>    disk at the time of writing the inode to disk.
> ---
>  include/erofs/internal.h |   8 +++-
>  lib/inode.c              | 108 ++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 110 insertions(+), 6 deletions(-)
> 
> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> index e13adda..863ef8a 100644
> --- a/include/erofs/internal.h
> +++ b/include/erofs/internal.h
> @@ -63,7 +63,7 @@ struct erofs_sb_info {
>  extern struct erofs_sb_info sbi;
>  
>  struct erofs_inode {
> -	struct list_head i_hash, i_subdirs, i_xattrs;
> +	struct list_head i_hash, i_subdirs, i_xattrs, i_holes;
>  
>  	unsigned int i_count;
>  	struct erofs_inode *i_parent;
> @@ -93,6 +93,7 @@ struct erofs_inode {
>  
>  	unsigned int xattr_isize;
>  	unsigned int extent_isize;
> +	unsigned int holes_isize;
>  
>  	erofs_nid_t nid;
>  	struct erofs_buffer_head *bh;
> @@ -139,5 +140,10 @@ static inline const char *erofs_strerror(int err)
>  	return msg;
>  }
>  
> +struct erofs_hole {
> +	erofs_blk_t st;
> +	u32 len;
> +	struct list_head next;
> +};


How about recording all extents rather than holes? since it's more useful
for later random read access.

struct erofs_extent_node {
	struct list_head next;

	erofs_blk_t lblk;	/* logical start address */
	erofs_blk_t pblk;	/* physical start address */
	u32 len;		/* extent length in blocks */
};


>  #endif
>  
> diff --git a/lib/inode.c b/lib/inode.c
> index 0e19b11..20bbf06 100644
> --- a/lib/inode.c
> +++ b/lib/inode.c
> @@ -38,6 +38,85 @@ static unsigned char erofs_type_by_mode[S_IFMT >> S_SHIFT] = {
>  
>  struct list_head inode_hashtable[NR_INODE_HASHTABLE];
>  
> +
> +#define IS_HOLE(start, end) (roundup(start, EROFS_BLKSIZ) == start &&	\
> +		             roundup(end, EROFS_BLKSIZ) == end &&	\
> +			    (end - start) % EROFS_BLKSIZ == 0)
> +#define HOLE_BLK		-1
> +unsigned int erofs_detect_holes(struct erofs_inode *inode,
> +				struct list_head *holes, unsigned int *htimes)
> +{
> +	int fd, st, en;
> +	unsigned int nholes = 0;
> +	erofs_off_t data, hole, len;
> +	struct erofs_hole *eh;
> +
> +	fd = open(inode->i_srcpath, O_RDONLY);
> +	if (fd < 0) {
> +		return -errno;
> +	}
> +	len = lseek(fd, 0, SEEK_END);
> +	if (lseek(fd, 0, SEEK_SET) == -1)
> +		return -errno;
> +	data = 0;
> +	while (data < len) {
> +		hole = lseek(fd, data, SEEK_HOLE);
> +		if (hole == len)
> +			break;
> +		data = lseek(fd, hole, SEEK_DATA);
> +		if (data < 0 || hole > data) {
> +			return -EINVAL;
> +		}
> +		if (IS_HOLE(hole, data)) {
> +			st = hole >> S_SHIFT;
> +			en = data >> S_SHIFT;
> +			eh = malloc(sizeof(struct erofs_hole));
> +			if (eh == NULL)
> +				return -ENOMEM;
> +			eh->st = st;
> +			eh->len = (en - st);
> +			list_add_tail(&eh->next, holes);
> +			nholes += eh->len;
> +			*htimes += 1;
> +		}
> +	}
> +	return nholes;
> +}
> +
> +bool erofs_ishole(erofs_blk_t blk, struct list_head holes)
> +{
> +	if (list_empty(&holes))
> +		return false;
> +	struct erofs_hole *eh;
> +	list_for_each_entry(eh, &holes, next) {
> +		if (eh->st > blk)
> +			return false;
> +		if (eh->st <= blk && (eh->st + eh->len - 1) >= blk)
> +			return true;
> +	}
> +	return false;
> +}
> +
> +char *erofs_create_holes_buffer(struct list_head *holes, unsigned int size)
> +{
> +	struct erofs_hole *eh;
> +	char *buf;
> +	unsigned int p = 0;
> +
> +	buf = malloc(size);
> +	if (buf == NULL)
> +		return ERR_PTR(-ENOMEM);
> +	list_for_each_entry(eh, holes, next) {
> +		*(__le32 *)(buf + p) = cpu_to_le32(eh->st);
> +		p += sizeof(__le32);
> +		*(__le32 *)(buf + p) = cpu_to_le32(eh->len);
> +		p += sizeof(__le32);
> +		list_del(&eh->next);
> +		free(eh);
> +	}

How about introducing some extent header

/* 12-byte alignment */
struct erofs_inline_extent_header {
	/* e.g. we need to record how many total extents here at least. */
	...
};

and some ondisk extent representation.

struct erofs_extent {
	__le32 ee_lblk;
	__le32 ee_pblk;
	/*
	 * most significant 4 bits reserved for flags and should be 0
	 * now, maximum 256M blocks (8TB) for an extent.
	 */
	__le32 ee_len;
};

(see fs/ext4/ext4_extents.h for ext4 ondisk definitions)

And I'd like to call this inline extent representation (for limited
extents) since we could consider some powerful representation
(e.g. using B+ tree) in the future for complicated requirement, so we
have to reserve i_u.raw_blkaddr in erofs_inode to 0 for now (in order
to indicate extra B+-tree blocks).


> +	return buf;
> +}
> +
>  void erofs_inode_manager_init(void)
>  {
>  	unsigned int i;
> @@ -304,7 +383,7 @@ static bool erofs_file_is_compressible(struct erofs_inode *inode)
>  
>  int erofs_write_file(struct erofs_inode *inode)
>  {
> -	unsigned int nblocks, i;
> +	unsigned int nblocks, i, nholes, hitems = 0;
>  	int ret, fd;
>  
>  	if (!inode->i_size) {


The fallback condition from compress file to uncompress file should be updated
and give a new data_mapping for this mode as well, but that is minor for now.

Thanks,
Gao Xiang


> @@ -322,16 +401,24 @@ int erofs_write_file(struct erofs_inode *inode)
>  	/* fallback to all data uncompressed */
>  	inode->datalayout = EROFS_INODE_FLAT_INLINE;
>  	nblocks = inode->i_size / EROFS_BLKSIZ;
> -
> -	ret = __allocate_inode_bh_data(inode, nblocks);
> +	nholes = erofs_detect_holes(inode, &inode->i_holes, &hitems);
> +	if (nholes < 0)
> +		return nholes;
> +	inode->holes_isize = (sizeof(struct erofs_hole) -
> +			      sizeof(struct list_head)) * hitems;
> +	if (nblocks < 0)
> +		return nblocks;
> +	ret = __allocate_inode_bh_data(inode, nblocks - nholes);
>  	if (ret)
>  		return ret;
> -
>  	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
>  	if (fd < 0)
>  		return -errno;
>  
>  	for (i = 0; i < nblocks; ++i) {
> +		if (erofs_ishole(i, inode->i_holes)) {
> +			continue;
> +		}
>  		char buf[EROFS_BLKSIZ];
>  
>  		ret = read(fd, buf, EROFS_BLKSIZ);
> @@ -479,8 +566,19 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
>  		if (ret)
>  			return false;
>  		free(inode->compressmeta);
> +		off += inode->extent_isize;
>  	}
>  
> +	if (inode->holes_isize) {
> +		char *holes = erofs_create_holes_buffer(&inode->i_holes,
> +							inode->holes_isize);
> +		if (IS_ERR(holes))
> +			return false;
> +		ret = dev_write(holes, off, inode->holes_isize);
> +		free(holes);
> +		if (ret)
> +			return false;
> +	}
>  	inode->bh = NULL;
>  	erofs_iput(inode);
>  	return erofs_bh_flush_generic_end(bh);
> @@ -737,6 +835,7 @@ struct erofs_inode *erofs_new_inode(void)
>  
>  	init_list_head(&inode->i_subdirs);
>  	init_list_head(&inode->i_xattrs);
> +	init_list_head(&inode->i_holes);
>  
>  	inode->idata_size = 0;
>  	inode->xattr_isize = 0;
> @@ -961,4 +1060,3 @@ struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
>  
>  	return erofs_mkfs_build_tree(inode);
>  }
> -
> -- 
> 2.9.3
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-24  3:48 ` Gao Xiang
@ 2019-12-24  9:35   ` Pratik Shinde
  2019-12-24 10:05     ` Gao Xiang
  0 siblings, 1 reply; 8+ messages in thread
From: Pratik Shinde @ 2019-12-24  9:35 UTC (permalink / raw)
  To: Gao Xiang; +Cc: miaoxie, linux-erofs

[-- Attachment #1: Type: text/plain, Size: 9584 bytes --]

Hello Gao,

Thanks for the review.
If I understand correctly , you wish to keep track of every extent assigned
to the file.
in case of file without any holes in it, there will single extent
representing the entire file.

Also, the current block no. lookup happens in constant time. (since we only
record the start blk no.)
If we use extent record for finding given block no. it can't be done in
constant time correct ? (maybe in LogN)

I think I don't fully understand reason for recording extents assigned to a
file.Since the current design
is already time and space optimized & there are no deletions happening.
Is it for some future requirement ?

--Pratik.

On Tue, Dec 24, 2019 at 9:18 AM Gao Xiang <gaoxiang25@huawei.com> wrote:

> Hi Pratik,
>
> Thanks for keeping the work. :)
> Some inlined comments below as before.
>
> On Mon, Dec 23, 2019 at 10:59:38PM +0530, Pratik Shinde wrote:
> > Made some changes based on comments on previous patch :
> > 1) defined an on disk structure for representing hole.
> > 2) Maintain a list of this structure (per file) and dump this list to
> >    disk at the time of writing the inode to disk.
> > ---
> >  include/erofs/internal.h |   8 +++-
> >  lib/inode.c              | 108
> ++++++++++++++++++++++++++++++++++++++++++++---
> >  2 files changed, 110 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> > index e13adda..863ef8a 100644
> > --- a/include/erofs/internal.h
> > +++ b/include/erofs/internal.h
> > @@ -63,7 +63,7 @@ struct erofs_sb_info {
> >  extern struct erofs_sb_info sbi;
> >
> >  struct erofs_inode {
> > -     struct list_head i_hash, i_subdirs, i_xattrs;
> > +     struct list_head i_hash, i_subdirs, i_xattrs, i_holes;
> >
> >       unsigned int i_count;
> >       struct erofs_inode *i_parent;
> > @@ -93,6 +93,7 @@ struct erofs_inode {
> >
> >       unsigned int xattr_isize;
> >       unsigned int extent_isize;
> > +     unsigned int holes_isize;
> >
> >       erofs_nid_t nid;
> >       struct erofs_buffer_head *bh;
> > @@ -139,5 +140,10 @@ static inline const char *erofs_strerror(int err)
> >       return msg;
> >  }
> >
> > +struct erofs_hole {
> > +     erofs_blk_t st;
> > +     u32 len;
> > +     struct list_head next;
> > +};
>
>
> How about recording all extents rather than holes? since it's more useful
> for later random read access.
>
> struct erofs_extent_node {
>         struct list_head next;
>
>         erofs_blk_t lblk;       /* logical start address */
>         erofs_blk_t pblk;       /* physical start address */
>         u32 len;                /* extent length in blocks */
> };
>
>
> >  #endif
> >
> > diff --git a/lib/inode.c b/lib/inode.c
> > index 0e19b11..20bbf06 100644
> > --- a/lib/inode.c
> > +++ b/lib/inode.c
> > @@ -38,6 +38,85 @@ static unsigned char erofs_type_by_mode[S_IFMT >>
> S_SHIFT] = {
> >
> >  struct list_head inode_hashtable[NR_INODE_HASHTABLE];
> >
> > +
> > +#define IS_HOLE(start, end) (roundup(start, EROFS_BLKSIZ) == start &&
>       \
> > +                          roundup(end, EROFS_BLKSIZ) == end &&       \
> > +                         (end - start) % EROFS_BLKSIZ == 0)
> > +#define HOLE_BLK             -1
> > +unsigned int erofs_detect_holes(struct erofs_inode *inode,
> > +                             struct list_head *holes, unsigned int
> *htimes)
> > +{
> > +     int fd, st, en;
> > +     unsigned int nholes = 0;
> > +     erofs_off_t data, hole, len;
> > +     struct erofs_hole *eh;
> > +
> > +     fd = open(inode->i_srcpath, O_RDONLY);
> > +     if (fd < 0) {
> > +             return -errno;
> > +     }
> > +     len = lseek(fd, 0, SEEK_END);
> > +     if (lseek(fd, 0, SEEK_SET) == -1)
> > +             return -errno;
> > +     data = 0;
> > +     while (data < len) {
> > +             hole = lseek(fd, data, SEEK_HOLE);
> > +             if (hole == len)
> > +                     break;
> > +             data = lseek(fd, hole, SEEK_DATA);
> > +             if (data < 0 || hole > data) {
> > +                     return -EINVAL;
> > +             }
> > +             if (IS_HOLE(hole, data)) {
> > +                     st = hole >> S_SHIFT;
> > +                     en = data >> S_SHIFT;
> > +                     eh = malloc(sizeof(struct erofs_hole));
> > +                     if (eh == NULL)
> > +                             return -ENOMEM;
> > +                     eh->st = st;
> > +                     eh->len = (en - st);
> > +                     list_add_tail(&eh->next, holes);
> > +                     nholes += eh->len;
> > +                     *htimes += 1;
> > +             }
> > +     }
> > +     return nholes;
> > +}
> > +
> > +bool erofs_ishole(erofs_blk_t blk, struct list_head holes)
> > +{
> > +     if (list_empty(&holes))
> > +             return false;
> > +     struct erofs_hole *eh;
> > +     list_for_each_entry(eh, &holes, next) {
> > +             if (eh->st > blk)
> > +                     return false;
> > +             if (eh->st <= blk && (eh->st + eh->len - 1) >= blk)
> > +                     return true;
> > +     }
> > +     return false;
> > +}
> > +
> > +char *erofs_create_holes_buffer(struct list_head *holes, unsigned int
> size)
> > +{
> > +     struct erofs_hole *eh;
> > +     char *buf;
> > +     unsigned int p = 0;
> > +
> > +     buf = malloc(size);
> > +     if (buf == NULL)
> > +             return ERR_PTR(-ENOMEM);
> > +     list_for_each_entry(eh, holes, next) {
> > +             *(__le32 *)(buf + p) = cpu_to_le32(eh->st);
> > +             p += sizeof(__le32);
> > +             *(__le32 *)(buf + p) = cpu_to_le32(eh->len);
> > +             p += sizeof(__le32);
> > +             list_del(&eh->next);
> > +             free(eh);
> > +     }
>
> How about introducing some extent header
>
> /* 12-byte alignment */
> struct erofs_inline_extent_header {
>         /* e.g. we need to record how many total extents here at least. */
>         ...
> };
>
> and some ondisk extent representation.
>
> struct erofs_extent {
>         __le32 ee_lblk;
>         __le32 ee_pblk;
>         /*
>          * most significant 4 bits reserved for flags and should be 0
>          * now, maximum 256M blocks (8TB) for an extent.
>          */
>         __le32 ee_len;
> };
>
> (see fs/ext4/ext4_extents.h for ext4 ondisk definitions)
>
> And I'd like to call this inline extent representation (for limited
> extents) since we could consider some powerful representation
> (e.g. using B+ tree) in the future for complicated requirement, so we
> have to reserve i_u.raw_blkaddr in erofs_inode to 0 for now (in order
> to indicate extra B+-tree blocks).
>
>
> > +     return buf;
> > +}
> > +
> >  void erofs_inode_manager_init(void)
> >  {
> >       unsigned int i;
> > @@ -304,7 +383,7 @@ static bool erofs_file_is_compressible(struct
> erofs_inode *inode)
> >
> >  int erofs_write_file(struct erofs_inode *inode)
> >  {
> > -     unsigned int nblocks, i;
> > +     unsigned int nblocks, i, nholes, hitems = 0;
> >       int ret, fd;
> >
> >       if (!inode->i_size) {
>
>
> The fallback condition from compress file to uncompress file should be
> updated
> and give a new data_mapping for this mode as well, but that is minor for
> now.
>
> Thanks,
> Gao Xiang
>
>
> > @@ -322,16 +401,24 @@ int erofs_write_file(struct erofs_inode *inode)
> >       /* fallback to all data uncompressed */
> >       inode->datalayout = EROFS_INODE_FLAT_INLINE;
> >       nblocks = inode->i_size / EROFS_BLKSIZ;
> > -
> > -     ret = __allocate_inode_bh_data(inode, nblocks);
> > +     nholes = erofs_detect_holes(inode, &inode->i_holes, &hitems);
> > +     if (nholes < 0)
> > +             return nholes;
> > +     inode->holes_isize = (sizeof(struct erofs_hole) -
> > +                           sizeof(struct list_head)) * hitems;
> > +     if (nblocks < 0)
> > +             return nblocks;
> > +     ret = __allocate_inode_bh_data(inode, nblocks - nholes);
> >       if (ret)
> >               return ret;
> > -
> >       fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
> >       if (fd < 0)
> >               return -errno;
> >
> >       for (i = 0; i < nblocks; ++i) {
> > +             if (erofs_ishole(i, inode->i_holes)) {
> > +                     continue;
> > +             }
> >               char buf[EROFS_BLKSIZ];
> >
> >               ret = read(fd, buf, EROFS_BLKSIZ);
> > @@ -479,8 +566,19 @@ static bool erofs_bh_flush_write_inode(struct
> erofs_buffer_head *bh)
> >               if (ret)
> >                       return false;
> >               free(inode->compressmeta);
> > +             off += inode->extent_isize;
> >       }
> >
> > +     if (inode->holes_isize) {
> > +             char *holes = erofs_create_holes_buffer(&inode->i_holes,
> > +
>  inode->holes_isize);
> > +             if (IS_ERR(holes))
> > +                     return false;
> > +             ret = dev_write(holes, off, inode->holes_isize);
> > +             free(holes);
> > +             if (ret)
> > +                     return false;
> > +     }
> >       inode->bh = NULL;
> >       erofs_iput(inode);
> >       return erofs_bh_flush_generic_end(bh);
> > @@ -737,6 +835,7 @@ struct erofs_inode *erofs_new_inode(void)
> >
> >       init_list_head(&inode->i_subdirs);
> >       init_list_head(&inode->i_xattrs);
> > +     init_list_head(&inode->i_holes);
> >
> >       inode->idata_size = 0;
> >       inode->xattr_isize = 0;
> > @@ -961,4 +1060,3 @@ struct erofs_inode
> *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
> >
> >       return erofs_mkfs_build_tree(inode);
> >  }
> > -
> > --
> > 2.9.3
> >
>

[-- Attachment #2: Type: text/html, Size: 12639 bytes --]

<div dir="ltr"><div>Hello Gao,</div><div><br></div><div>Thanks for the review. <br></div><div>If I understand correctly , you wish to keep track of every extent assigned to the file.</div><div>in case of file without any holes in it, there will single extent representing the entire file.</div><div><br></div><div>Also, the current block no. lookup happens in constant time. (since we only record the start blk no.)</div><div>If we use extent record for finding given block no. it can&#39;t be done in constant time correct ? (maybe in LogN)</div><div><br></div><div>I think I don&#39;t fully understand reason for recording extents assigned to a file.Since the current design</div><div>is already time and space optimized &amp; there are no deletions happening.</div><div>Is it for some future requirement ?</div><div><br></div><div>--Pratik.<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Dec 24, 2019 at 9:18 AM Gao Xiang &lt;<a href="mailto:gaoxiang25@huawei.com">gaoxiang25@huawei.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Hi Pratik,<br>
<br>
Thanks for keeping the work. :)<br>
Some inlined comments below as before.<br>
<br>
On Mon, Dec 23, 2019 at 10:59:38PM +0530, Pratik Shinde wrote:<br>
&gt; Made some changes based on comments on previous patch :<br>
&gt; 1) defined an on disk structure for representing hole.<br>
&gt; 2) Maintain a list of this structure (per file) and dump this list to<br>
&gt;    disk at the time of writing the inode to disk.<br>
&gt; ---<br>
&gt;  include/erofs/internal.h |   8 +++-<br>
&gt;  lib/inode.c              | 108 ++++++++++++++++++++++++++++++++++++++++++++---<br>
&gt;  2 files changed, 110 insertions(+), 6 deletions(-)<br>
&gt; <br>
&gt; diff --git a/include/erofs/internal.h b/include/erofs/internal.h<br>
&gt; index e13adda..863ef8a 100644<br>
&gt; --- a/include/erofs/internal.h<br>
&gt; +++ b/include/erofs/internal.h<br>
&gt; @@ -63,7 +63,7 @@ struct erofs_sb_info {<br>
&gt;  extern struct erofs_sb_info sbi;<br>
&gt;  <br>
&gt;  struct erofs_inode {<br>
&gt; -     struct list_head i_hash, i_subdirs, i_xattrs;<br>
&gt; +     struct list_head i_hash, i_subdirs, i_xattrs, i_holes;<br>
&gt;  <br>
&gt;       unsigned int i_count;<br>
&gt;       struct erofs_inode *i_parent;<br>
&gt; @@ -93,6 +93,7 @@ struct erofs_inode {<br>
&gt;  <br>
&gt;       unsigned int xattr_isize;<br>
&gt;       unsigned int extent_isize;<br>
&gt; +     unsigned int holes_isize;<br>
&gt;  <br>
&gt;       erofs_nid_t nid;<br>
&gt;       struct erofs_buffer_head *bh;<br>
&gt; @@ -139,5 +140,10 @@ static inline const char *erofs_strerror(int err)<br>
&gt;       return msg;<br>
&gt;  }<br>
&gt;  <br>
&gt; +struct erofs_hole {<br>
&gt; +     erofs_blk_t st;<br>
&gt; +     u32 len;<br>
&gt; +     struct list_head next;<br>
&gt; +};<br>
<br>
<br>
How about recording all extents rather than holes? since it&#39;s more useful<br>
for later random read access.<br>
<br>
struct erofs_extent_node {<br>
        struct list_head next;<br>
<br>
        erofs_blk_t lblk;       /* logical start address */<br>
        erofs_blk_t pblk;       /* physical start address */<br>
        u32 len;                /* extent length in blocks */<br>
};<br>
<br>
<br>
&gt;  #endif<br>
&gt;  <br>
&gt; diff --git a/lib/inode.c b/lib/inode.c<br>
&gt; index 0e19b11..20bbf06 100644<br>
&gt; --- a/lib/inode.c<br>
&gt; +++ b/lib/inode.c<br>
&gt; @@ -38,6 +38,85 @@ static unsigned char erofs_type_by_mode[S_IFMT &gt;&gt; S_SHIFT] = {<br>
&gt;  <br>
&gt;  struct list_head inode_hashtable[NR_INODE_HASHTABLE];<br>
&gt;  <br>
&gt; +<br>
&gt; +#define IS_HOLE(start, end) (roundup(start, EROFS_BLKSIZ) == start &amp;&amp;        \<br>
&gt; +                          roundup(end, EROFS_BLKSIZ) == end &amp;&amp;       \<br>
&gt; +                         (end - start) % EROFS_BLKSIZ == 0)<br>
&gt; +#define HOLE_BLK             -1<br>
&gt; +unsigned int erofs_detect_holes(struct erofs_inode *inode,<br>
&gt; +                             struct list_head *holes, unsigned int *htimes)<br>
&gt; +{<br>
&gt; +     int fd, st, en;<br>
&gt; +     unsigned int nholes = 0;<br>
&gt; +     erofs_off_t data, hole, len;<br>
&gt; +     struct erofs_hole *eh;<br>
&gt; +<br>
&gt; +     fd = open(inode-&gt;i_srcpath, O_RDONLY);<br>
&gt; +     if (fd &lt; 0) {<br>
&gt; +             return -errno;<br>
&gt; +     }<br>
&gt; +     len = lseek(fd, 0, SEEK_END);<br>
&gt; +     if (lseek(fd, 0, SEEK_SET) == -1)<br>
&gt; +             return -errno;<br>
&gt; +     data = 0;<br>
&gt; +     while (data &lt; len) {<br>
&gt; +             hole = lseek(fd, data, SEEK_HOLE);<br>
&gt; +             if (hole == len)<br>
&gt; +                     break;<br>
&gt; +             data = lseek(fd, hole, SEEK_DATA);<br>
&gt; +             if (data &lt; 0 || hole &gt; data) {<br>
&gt; +                     return -EINVAL;<br>
&gt; +             }<br>
&gt; +             if (IS_HOLE(hole, data)) {<br>
&gt; +                     st = hole &gt;&gt; S_SHIFT;<br>
&gt; +                     en = data &gt;&gt; S_SHIFT;<br>
&gt; +                     eh = malloc(sizeof(struct erofs_hole));<br>
&gt; +                     if (eh == NULL)<br>
&gt; +                             return -ENOMEM;<br>
&gt; +                     eh-&gt;st = st;<br>
&gt; +                     eh-&gt;len = (en - st);<br>
&gt; +                     list_add_tail(&amp;eh-&gt;next, holes);<br>
&gt; +                     nholes += eh-&gt;len;<br>
&gt; +                     *htimes += 1;<br>
&gt; +             }<br>
&gt; +     }<br>
&gt; +     return nholes;<br>
&gt; +}<br>
&gt; +<br>
&gt; +bool erofs_ishole(erofs_blk_t blk, struct list_head holes)<br>
&gt; +{<br>
&gt; +     if (list_empty(&amp;holes))<br>
&gt; +             return false;<br>
&gt; +     struct erofs_hole *eh;<br>
&gt; +     list_for_each_entry(eh, &amp;holes, next) {<br>
&gt; +             if (eh-&gt;st &gt; blk)<br>
&gt; +                     return false;<br>
&gt; +             if (eh-&gt;st &lt;= blk &amp;&amp; (eh-&gt;st + eh-&gt;len - 1) &gt;= blk)<br>
&gt; +                     return true;<br>
&gt; +     }<br>
&gt; +     return false;<br>
&gt; +}<br>
&gt; +<br>
&gt; +char *erofs_create_holes_buffer(struct list_head *holes, unsigned int size)<br>
&gt; +{<br>
&gt; +     struct erofs_hole *eh;<br>
&gt; +     char *buf;<br>
&gt; +     unsigned int p = 0;<br>
&gt; +<br>
&gt; +     buf = malloc(size);<br>
&gt; +     if (buf == NULL)<br>
&gt; +             return ERR_PTR(-ENOMEM);<br>
&gt; +     list_for_each_entry(eh, holes, next) {<br>
&gt; +             *(__le32 *)(buf + p) = cpu_to_le32(eh-&gt;st);<br>
&gt; +             p += sizeof(__le32);<br>
&gt; +             *(__le32 *)(buf + p) = cpu_to_le32(eh-&gt;len);<br>
&gt; +             p += sizeof(__le32);<br>
&gt; +             list_del(&amp;eh-&gt;next);<br>
&gt; +             free(eh);<br>
&gt; +     }<br>
<br>
How about introducing some extent header<br>
<br>
/* 12-byte alignment */<br>
struct erofs_inline_extent_header {<br>
        /* e.g. we need to record how many total extents here at least. */<br>
        ...<br>
};<br>
<br>
and some ondisk extent representation.<br>
<br>
struct erofs_extent {<br>
        __le32 ee_lblk;<br>
        __le32 ee_pblk;<br>
        /*<br>
         * most significant 4 bits reserved for flags and should be 0<br>
         * now, maximum 256M blocks (8TB) for an extent.<br>
         */<br>
        __le32 ee_len;<br>
};<br>
<br>
(see fs/ext4/ext4_extents.h for ext4 ondisk definitions)<br>
<br>
And I&#39;d like to call this inline extent representation (for limited<br>
extents) since we could consider some powerful representation<br>
(e.g. using B+ tree) in the future for complicated requirement, so we<br>
have to reserve i_u.raw_blkaddr in erofs_inode to 0 for now (in order<br>
to indicate extra B+-tree blocks).<br>
<br>
<br>
&gt; +     return buf;<br>
&gt; +}<br>
&gt; +<br>
&gt;  void erofs_inode_manager_init(void)<br>
&gt;  {<br>
&gt;       unsigned int i;<br>
&gt; @@ -304,7 +383,7 @@ static bool erofs_file_is_compressible(struct erofs_inode *inode)<br>
&gt;  <br>
&gt;  int erofs_write_file(struct erofs_inode *inode)<br>
&gt;  {<br>
&gt; -     unsigned int nblocks, i;<br>
&gt; +     unsigned int nblocks, i, nholes, hitems = 0;<br>
&gt;       int ret, fd;<br>
&gt;  <br>
&gt;       if (!inode-&gt;i_size) {<br>
<br>
<br>
The fallback condition from compress file to uncompress file should be updated<br>
and give a new data_mapping for this mode as well, but that is minor for now.<br>
<br>
Thanks,<br>
Gao Xiang<br>
<br>
<br>
&gt; @@ -322,16 +401,24 @@ int erofs_write_file(struct erofs_inode *inode)<br>
&gt;       /* fallback to all data uncompressed */<br>
&gt;       inode-&gt;datalayout = EROFS_INODE_FLAT_INLINE;<br>
&gt;       nblocks = inode-&gt;i_size / EROFS_BLKSIZ;<br>
&gt; -<br>
&gt; -     ret = __allocate_inode_bh_data(inode, nblocks);<br>
&gt; +     nholes = erofs_detect_holes(inode, &amp;inode-&gt;i_holes, &amp;hitems);<br>
&gt; +     if (nholes &lt; 0)<br>
&gt; +             return nholes;<br>
&gt; +     inode-&gt;holes_isize = (sizeof(struct erofs_hole) -<br>
&gt; +                           sizeof(struct list_head)) * hitems;<br>
&gt; +     if (nblocks &lt; 0)<br>
&gt; +             return nblocks;<br>
&gt; +     ret = __allocate_inode_bh_data(inode, nblocks - nholes);<br>
&gt;       if (ret)<br>
&gt;               return ret;<br>
&gt; -<br>
&gt;       fd = open(inode-&gt;i_srcpath, O_RDONLY | O_BINARY);<br>
&gt;       if (fd &lt; 0)<br>
&gt;               return -errno;<br>
&gt;  <br>
&gt;       for (i = 0; i &lt; nblocks; ++i) {<br>
&gt; +             if (erofs_ishole(i, inode-&gt;i_holes)) {<br>
&gt; +                     continue;<br>
&gt; +             }<br>
&gt;               char buf[EROFS_BLKSIZ];<br>
&gt;  <br>
&gt;               ret = read(fd, buf, EROFS_BLKSIZ);<br>
&gt; @@ -479,8 +566,19 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)<br>
&gt;               if (ret)<br>
&gt;                       return false;<br>
&gt;               free(inode-&gt;compressmeta);<br>
&gt; +             off += inode-&gt;extent_isize;<br>
&gt;       }<br>
&gt;  <br>
&gt; +     if (inode-&gt;holes_isize) {<br>
&gt; +             char *holes = erofs_create_holes_buffer(&amp;inode-&gt;i_holes,<br>
&gt; +                                                     inode-&gt;holes_isize);<br>
&gt; +             if (IS_ERR(holes))<br>
&gt; +                     return false;<br>
&gt; +             ret = dev_write(holes, off, inode-&gt;holes_isize);<br>
&gt; +             free(holes);<br>
&gt; +             if (ret)<br>
&gt; +                     return false;<br>
&gt; +     }<br>
&gt;       inode-&gt;bh = NULL;<br>
&gt;       erofs_iput(inode);<br>
&gt;       return erofs_bh_flush_generic_end(bh);<br>
&gt; @@ -737,6 +835,7 @@ struct erofs_inode *erofs_new_inode(void)<br>
&gt;  <br>
&gt;       init_list_head(&amp;inode-&gt;i_subdirs);<br>
&gt;       init_list_head(&amp;inode-&gt;i_xattrs);<br>
&gt; +     init_list_head(&amp;inode-&gt;i_holes);<br>
&gt;  <br>
&gt;       inode-&gt;idata_size = 0;<br>
&gt;       inode-&gt;xattr_isize = 0;<br>
&gt; @@ -961,4 +1060,3 @@ struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,<br>
&gt;  <br>
&gt;       return erofs_mkfs_build_tree(inode);<br>
&gt;  }<br>
&gt; -<br>
&gt; -- <br>
&gt; 2.9.3<br>
&gt; <br>
</blockquote></div>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-24  9:35   ` Pratik Shinde
@ 2019-12-24 10:05     ` Gao Xiang
  2019-12-24 10:45       ` Pratik Shinde
  0 siblings, 1 reply; 8+ messages in thread
From: Gao Xiang @ 2019-12-24 10:05 UTC (permalink / raw)
  To: Pratik Shinde; +Cc: miaoxie, linux-erofs

Hi Pratik,

On Tue, Dec 24, 2019 at 03:05:53PM +0530, Pratik Shinde wrote:
> Hello Gao,
> 
> Thanks for the review.
> If I understand correctly , you wish to keep track of every extent assigned
> to the file.
> in case of file without any holes in it, there will single extent
> representing the entire file.
> 
> Also, the current block no. lookup happens in constant time. (since we only
> record the start blk no.)
> If we use extent record for finding given block no. it can't be done in
> constant time correct ? (maybe in LogN)


Could I ask a question?
In short, how can we use the proposal approach to read random blocks
in constant time O(1)?

e.g.
 if you have two holes 2...4  7...10 in a file with 12 blocks.

 if we want to random access block 11, only block number 1,5,6,11 were
 saved one by one (maybe p1,p2,p3,p4).

 How can we get the physical address (p4) of block 11 directly without
 scanning the previous holes?

Thanks,
Gao Xiang


> 
> I think I don't fully understand reason for recording extents assigned to a
> file.Since the current design
> is already time and space optimized & there are no deletions happening.
> Is it for some future requirement ?
> 
> --Pratik.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-24 10:05     ` Gao Xiang
@ 2019-12-24 10:45       ` Pratik Shinde
  2019-12-24 11:15         ` Gao Xiang
  0 siblings, 1 reply; 8+ messages in thread
From: Pratik Shinde @ 2019-12-24 10:45 UTC (permalink / raw)
  To: Gao Xiang; +Cc: miaoxie, linux-erofs

[-- Attachment #1: Type: text/plain, Size: 1702 bytes --]

Hi Gao,

No no. What I am saying is - in the current code (excluding all my changes)
the block lookup will happens in constant time. with only hole list it
won't be O(1) time but rather we have to traverse the holes list. (say in
binary search way).
what I don't understand is - what is the purpose of tracking data extents.
hope you get it.

--Pratik.



On Tue, Dec 24, 2019, 3:35 PM Gao Xiang <gaoxiang25@huawei.com> wrote:

> Hi Pratik,
>
> On Tue, Dec 24, 2019 at 03:05:53PM +0530, Pratik Shinde wrote:
> > Hello Gao,
> >
> > Thanks for the review.
> > If I understand correctly , you wish to keep track of every extent
> assigned
> > to the file.
> > in case of file without any holes in it, there will single extent
> > representing the entire file.
> >
> > Also, the current block no. lookup happens in constant time. (since we
> only
> > record the start blk no.)
> > If we use extent record for finding given block no. it can't be done in
> > constant time correct ? (maybe in LogN)
>
>
> Could I ask a question?
> In short, how can we use the proposal approach to read random blocks
> in constant time O(1)?
>
> e.g.
>  if you have two holes 2...4  7...10 in a file with 12 blocks.
>
>  if we want to random access block 11, only block number 1,5,6,11 were
>  saved one by one (maybe p1,p2,p3,p4).
>
>  How can we get the physical address (p4) of block 11 directly without
>  scanning the previous holes?
>
> Thanks,
> Gao Xiang
>
>
> >
> > I think I don't fully understand reason for recording extents assigned
> to a
> > file.Since the current design
> > is already time and space optimized & there are no deletions happening.
> > Is it for some future requirement ?
> >
> > --Pratik.
>
>

[-- Attachment #2: Type: text/html, Size: 2364 bytes --]

<div dir="auto">Hi Gao,<div dir="auto"><br></div><div dir="auto">No no. What I am saying is - in the current code (excluding all my changes) the block lookup will happens in constant time. with only hole list it won&#39;t be O(1) time but rather we have to traverse the holes list. (say in binary search way).</div><div dir="auto">what I don&#39;t understand is - what is the purpose of tracking data extents.</div><div dir="auto">hope you get it.</div><div dir="auto"><br></div><div dir="auto">--Pratik.</div><div dir="auto"><br></div><div dir="auto"><br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Dec 24, 2019, 3:35 PM Gao Xiang &lt;<a href="mailto:gaoxiang25@huawei.com">gaoxiang25@huawei.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi Pratik,<br>
<br>
On Tue, Dec 24, 2019 at 03:05:53PM +0530, Pratik Shinde wrote:<br>
&gt; Hello Gao,<br>
&gt; <br>
&gt; Thanks for the review.<br>
&gt; If I understand correctly , you wish to keep track of every extent assigned<br>
&gt; to the file.<br>
&gt; in case of file without any holes in it, there will single extent<br>
&gt; representing the entire file.<br>
&gt; <br>
&gt; Also, the current block no. lookup happens in constant time. (since we only<br>
&gt; record the start blk no.)<br>
&gt; If we use extent record for finding given block no. it can&#39;t be done in<br>
&gt; constant time correct ? (maybe in LogN)<br>
<br>
<br>
Could I ask a question?<br>
In short, how can we use the proposal approach to read random blocks<br>
in constant time O(1)?<br>
<br>
e.g.<br>
 if you have two holes 2...4  7...10 in a file with 12 blocks.<br>
<br>
 if we want to random access block 11, only block number 1,5,6,11 were<br>
 saved one by one (maybe p1,p2,p3,p4).<br>
<br>
 How can we get the physical address (p4) of block 11 directly without<br>
 scanning the previous holes?<br>
<br>
Thanks,<br>
Gao Xiang<br>
<br>
<br>
&gt; <br>
&gt; I think I don&#39;t fully understand reason for recording extents assigned to a<br>
&gt; file.Since the current design<br>
&gt; is already time and space optimized &amp; there are no deletions happening.<br>
&gt; Is it for some future requirement ?<br>
&gt; <br>
&gt; --Pratik.<br>
<br>
</blockquote></div>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-24 10:45       ` Pratik Shinde
@ 2019-12-24 11:15         ` Gao Xiang
  2019-12-26  5:42           ` Pratik Shinde
  0 siblings, 1 reply; 8+ messages in thread
From: Gao Xiang @ 2019-12-24 11:15 UTC (permalink / raw)
  To: Pratik Shinde; +Cc: miaoxie, linux-erofs

On Tue, Dec 24, 2019 at 04:15:47PM +0530, Pratik Shinde wrote:
> Hi Gao,
> 
> No no. What I am saying is - in the current code (excluding all my changes)
> the block lookup will happens in constant time. with only hole list it

Not only lookup but other interfaces such as fiemap, that is why called
flat mode and fast path.

> won't be O(1) time but rather we have to traverse the holes list. (say in
> binary search way).
> what I don't understand is - what is the purpose of tracking data extents.
> hope you get it.

Mode plain and inline are called flat modes, which is the most common
case of regular and dir files. You can see that's the fastest path for
most file accesses (minimum metadata).

The reason why don't extend the flat modes but introduce another new
sparse mode for 3 main reasons:
 1) introduce a complete enhanced new extent table (or later B+-tree);
 2) we don't even know how many holes in the file if we only read
    inode base metadata, some extra header (no matter extent or hole
    header) need to be readed in advance;
 3) Old kernel backward compatibility need to be considered, not all
    files are sparsed, and we need to get them work properly, and rest
    files are sparsed, we need to block such files from accessed by
    old kernels;

Note that i_format is for such use, so we can introduce sparse mode
with some enhanced on-disk representation (but with more metadata
read amplification than flat modes).

So if files without holes it should be considered as flat modes (fast
path), and then considering the slow path --- upcoming sparse mode.

The purpose of tracking data extents is we could then use it
for deduping, repeated data or data redirect. Hole can only be 0
though.

Thanks,
Gao Xiang

> 
> --Pratik.
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-24 11:15         ` Gao Xiang
@ 2019-12-26  5:42           ` Pratik Shinde
  2019-12-26  6:00             ` Gao Xiang
  0 siblings, 1 reply; 8+ messages in thread
From: Pratik Shinde @ 2019-12-26  5:42 UTC (permalink / raw)
  To: Gao Xiang; +Cc: miaoxie, linux-erofs

[-- Attachment #1: Type: text/plain, Size: 2136 bytes --]

Thanks Gao.

Now I understand the purpose.
So with i_format we will be able to recognize which path to take. i.e fast
path (flat mode) or slow path(i.e to search through extent list).
I am working on it.

--Pratik.

On Tue, Dec 24, 2019 at 4:46 PM Gao Xiang <gaoxiang25@huawei.com> wrote:

> On Tue, Dec 24, 2019 at 04:15:47PM +0530, Pratik Shinde wrote:
> > Hi Gao,
> >
> > No no. What I am saying is - in the current code (excluding all my
> changes)
> > the block lookup will happens in constant time. with only hole list it
>
> Not only lookup but other interfaces such as fiemap, that is why called
> flat mode and fast path.
>
> > won't be O(1) time but rather we have to traverse the holes list. (say in
> > binary search way).
> > what I don't understand is - what is the purpose of tracking data
> extents.
> > hope you get it.
>
> Mode plain and inline are called flat modes, which is the most common
> case of regular and dir files. You can see that's the fastest path for
> most file accesses (minimum metadata).
>
> The reason why don't extend the flat modes but introduce another new
> sparse mode for 3 main reasons:
>  1) introduce a complete enhanced new extent table (or later B+-tree);
>  2) we don't even know how many holes in the file if we only read
>     inode base metadata, some extra header (no matter extent or hole
>     header) need to be readed in advance;
>  3) Old kernel backward compatibility need to be considered, not all
>     files are sparsed, and we need to get them work properly, and rest
>     files are sparsed, we need to block such files from accessed by
>     old kernels;
>
> Note that i_format is for such use, so we can introduce sparse mode
> with some enhanced on-disk representation (but with more metadata
> read amplification than flat modes).
>
> So if files without holes it should be considered as flat modes (fast
> path), and then considering the slow path --- upcoming sparse mode.
>
> The purpose of tracking data extents is we could then use it
> for deduping, repeated data or data redirect. Hole can only be 0
> though.
>
> Thanks,
> Gao Xiang
>
> >
> > --Pratik.
> >
>

[-- Attachment #2: Type: text/html, Size: 2710 bytes --]

<div dir="ltr"><div>Thanks Gao.</div><div><br></div><div>Now I understand the purpose.</div><div>So with i_format we will be able to recognize which path to take. i.e fast path (flat mode) or slow path(i.e to search through extent list).</div><div>I am working on it.</div><div><br></div><div>--Pratik.<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Dec 24, 2019 at 4:46 PM Gao Xiang &lt;<a href="mailto:gaoxiang25@huawei.com">gaoxiang25@huawei.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">On Tue, Dec 24, 2019 at 04:15:47PM +0530, Pratik Shinde wrote:<br>
&gt; Hi Gao,<br>
&gt; <br>
&gt; No no. What I am saying is - in the current code (excluding all my changes)<br>
&gt; the block lookup will happens in constant time. with only hole list it<br>
<br>
Not only lookup but other interfaces such as fiemap, that is why called<br>
flat mode and fast path.<br>
<br>
&gt; won&#39;t be O(1) time but rather we have to traverse the holes list. (say in<br>
&gt; binary search way).<br>
&gt; what I don&#39;t understand is - what is the purpose of tracking data extents.<br>
&gt; hope you get it.<br>
<br>
Mode plain and inline are called flat modes, which is the most common<br>
case of regular and dir files. You can see that&#39;s the fastest path for<br>
most file accesses (minimum metadata).<br>
<br>
The reason why don&#39;t extend the flat modes but introduce another new<br>
sparse mode for 3 main reasons:<br>
 1) introduce a complete enhanced new extent table (or later B+-tree);<br>
 2) we don&#39;t even know how many holes in the file if we only read<br>
    inode base metadata, some extra header (no matter extent or hole<br>
    header) need to be readed in advance;<br>
 3) Old kernel backward compatibility need to be considered, not all<br>
    files are sparsed, and we need to get them work properly, and rest<br>
    files are sparsed, we need to block such files from accessed by<br>
    old kernels;<br>
<br>
Note that i_format is for such use, so we can introduce sparse mode<br>
with some enhanced on-disk representation (but with more metadata<br>
read amplification than flat modes).<br>
<br>
So if files without holes it should be considered as flat modes (fast<br>
path), and then considering the slow path --- upcoming sparse mode.<br>
<br>
The purpose of tracking data extents is we could then use it<br>
for deduping, repeated data or data redirect. Hole can only be 0<br>
though.<br>
<br>
Thanks,<br>
Gao Xiang<br>
<br>
&gt; <br>
&gt; --Pratik.<br>
&gt; <br>
</blockquote></div>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
  2019-12-26  5:42           ` Pratik Shinde
@ 2019-12-26  6:00             ` Gao Xiang
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2019-12-26  6:00 UTC (permalink / raw)
  To: Pratik Shinde; +Cc: miaoxie, linux-erofs

On Thu, Dec 26, 2019 at 11:12:09AM +0530, Pratik Shinde wrote:
> Thanks Gao.
> 
> Now I understand the purpose.
> So with i_format we will be able to recognize which path to take. i.e fast
> path (flat mode) or slow path(i.e to search through extent list).
> I am working on it.

Thanks. Yes, that is the original consideration of i_format design and
sorry for some misrepresentations in the previous email such as what
I meant is "old kernel forward compatibility", etc..

But I think the final i_format new mode number is minor for now (I can
also assign a new number based on your patch) because I'd like to
rearrange this field for better scalability as an extra patch in advance
together with your patch.

we can get the main new extent approach in shape first. :-)

Thanks,
Gao Xiang

> 
> --Pratik.
> 
> On Tue, Dec 24, 2019 at 4:46 PM Gao Xiang <gaoxiang25@huawei.com> wrote:
> 
> > On Tue, Dec 24, 2019 at 04:15:47PM +0530, Pratik Shinde wrote:
> > > Hi Gao,
> > >
> > > No no. What I am saying is - in the current code (excluding all my
> > changes)
> > > the block lookup will happens in constant time. with only hole list it
> >
> > Not only lookup but other interfaces such as fiemap, that is why called
> > flat mode and fast path.
> >
> > > won't be O(1) time but rather we have to traverse the holes list. (say in
> > > binary search way).
> > > what I don't understand is - what is the purpose of tracking data
> > extents.
> > > hope you get it.
> >
> > Mode plain and inline are called flat modes, which is the most common
> > case of regular and dir files. You can see that's the fastest path for
> > most file accesses (minimum metadata).
> >
> > The reason why don't extend the flat modes but introduce another new
> > sparse mode for 3 main reasons:
> >  1) introduce a complete enhanced new extent table (or later B+-tree);
> >  2) we don't even know how many holes in the file if we only read
> >     inode base metadata, some extra header (no matter extent or hole
> >     header) need to be readed in advance;
> >  3) Old kernel backward compatibility need to be considered, not all
> >     files are sparsed, and we need to get them work properly, and rest
> >     files are sparsed, we need to block such files from accessed by
> >     old kernels;
> >
> > Note that i_format is for such use, so we can introduce sparse mode
> > with some enhanced on-disk representation (but with more metadata
> > read amplification than flat modes).
> >
> > So if files without holes it should be considered as flat modes (fast
> > path), and then considering the slow path --- upcoming sparse mode.
> >
> > The purpose of tracking data extents is we could then use it
> > for deduping, repeated data or data redirect. Hole can only be 0
> > though.
> >
> > Thanks,
> > Gao Xiang
> >
> > >
> > > --Pratik.
> > >
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, back to index

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-23 17:29 [RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files Pratik Shinde
2019-12-24  3:48 ` Gao Xiang
2019-12-24  9:35   ` Pratik Shinde
2019-12-24 10:05     ` Gao Xiang
2019-12-24 10:45       ` Pratik Shinde
2019-12-24 11:15         ` Gao Xiang
2019-12-26  5:42           ` Pratik Shinde
2019-12-26  6:00             ` Gao Xiang

Linux-EROFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-erofs/0 linux-erofs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-erofs linux-erofs/ https://lore.kernel.org/linux-erofs \
		linux-erofs@lists.ozlabs.org linux-erofs@ozlabs.org
	public-inbox-index linux-erofs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.ozlabs.lists.linux-erofs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git