All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] e2fsck: improve performance of region_allocate()
@ 2017-08-19  1:16 Doug Porter
  2017-08-22  2:29 ` Theodore Ts'o
  0 siblings, 1 reply; 10+ messages in thread
From: Doug Porter @ 2017-08-19  1:16 UTC (permalink / raw)
  To: Theodore Ts'o, linux-ext4; +Cc: Omar Sandoval, Doug Porter

Use a red-black tree to track allocations instead of a linked list.
This provides a substantial performance boost when the number of
allocations in a region is large.  This change resulted in a reduction
in runtime from 4821s to 6s on one filesystem.

Signed-off-by: Doug Porter <dsp@fb.com>
---
 e2fsck/Makefile.in |   4 +-
 e2fsck/region.c    | 109 ++++++++++++++++++++++++++++-------------------------
 lib/support/dict.c |   6 ---
 3 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/e2fsck/Makefile.in b/e2fsck/Makefile.in
index e43d340..cf60082 100644
--- a/e2fsck/Makefile.in
+++ b/e2fsck/Makefile.in
@@ -148,7 +148,7 @@ tst_region: region.c $(DEPLIBCOM_ERR)
 	$(E) "	LD $@"
 	$(Q) $(CC) -o tst_region $(srcdir)/region.c \
 		$(ALL_CFLAGS) $(ALL_LDFLAGS) -DTEST_PROGRAM \
-		$(LIBCOM_ERR) $(SYSLIBS)
+		$(LIBCOM_ERR) $(LIBSUPPORT) $(SYSLIBS)
 
 fullcheck check:: tst_refcount tst_region tst_problem
 	$(TESTENV) ./tst_refcount
@@ -499,7 +499,7 @@ region.o: $(srcdir)/region.c $(top_builddir)/lib/config.h \
  $(top_srcdir)/lib/ext2fs/ext2_ext_attr.h $(top_srcdir)/lib/ext2fs/bitops.h \
  $(top_srcdir)/lib/support/profile.h $(top_builddir)/lib/support/prof_err.h \
  $(top_srcdir)/lib/support/quotaio.h $(top_srcdir)/lib/support/dqblk_v2.h \
- $(top_srcdir)/lib/support/quotaio_tree.h
+ $(top_srcdir)/lib/support/quotaio_tree.h $(top_srcdir)/lib/support/dict.h
 sigcatcher.o: $(srcdir)/sigcatcher.c $(top_builddir)/lib/config.h \
  $(top_builddir)/lib/dirpaths.h $(srcdir)/e2fsck.h \
  $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h \
diff --git a/e2fsck/region.c b/e2fsck/region.c
index e32f89d..41bdc9f 100644
--- a/e2fsck/region.c
+++ b/e2fsck/region.c
@@ -19,19 +19,37 @@
 #undef ENABLE_NLS
 #endif
 #include "e2fsck.h"
+#include "support/dict.h"
 
 struct region_el {
 	region_addr_t	start;
 	region_addr_t	end;
-	struct region_el *next;
 };
 
 struct region_struct {
 	region_addr_t	min;
 	region_addr_t	max;
-	struct region_el *allocated;
+	dict_t dict;
 };
 
+static int region_dict_cmp(const void *a, const void *b)
+{
+	if (*(region_addr_t *)a > *(region_addr_t *)b)
+		return 1;
+	if (*(region_addr_t *)a < *(region_addr_t *)b)
+		return -1;
+	return 0;
+}
+
+static void region_dnode_free(dnode_t *node, void *context)
+{
+	void *data;
+
+	data = dnode_get(node);
+	free(data);
+	free(node);
+}
+
 region_t region_create(region_addr_t min, region_addr_t max)
 {
 	region_t	region;
@@ -42,24 +60,23 @@ region_t region_create(region_addr_t min, region_addr_t max)
 	memset(region, 0, sizeof(struct region_struct));
 	region->min = min;
 	region->max = max;
+
+	dict_init(&region->dict, DICTCOUNT_T_MAX, region_dict_cmp);
+	dict_set_allocator(&region->dict, NULL, region_dnode_free, NULL);
+
 	return region;
 }
 
 void region_free(region_t region)
 {
-	struct region_el	*r, *next;
-
-	for (r = region->allocated; r; r = next) {
-		next = r->next;
-		free(r);
-	}
-	memset(region, 0, sizeof(struct region_struct));
+	dict_free_nodes(&region->dict);
 	free(region);
 }
 
 int region_allocate(region_t region, region_addr_t start, int n)
 {
-	struct region_el	*r, *new_region, *prev, *next;
+	struct dnode_t *lower_node = NULL, *upper_node = NULL;
+	struct region_el *new_region, *lower = NULL, *upper = NULL;
 	region_addr_t end;
 
 	end = start+n;
@@ -68,52 +85,38 @@ int region_allocate(region_t region, region_addr_t start, int n)
 	if (n == 0)
 		return 1;
 
-	/*
-	 * Search through the linked list.  If we find that it
-	 * conflicts witih something that's already allocated, return
-	 * 1; if we can find an existing region which we can grow, do
-	 * so.  Otherwise, stop when we find the appropriate place
-	 * insert a new region element into the linked list.
-	 */
-	for (r = region->allocated, prev=NULL; r; prev = r, r = r->next) {
-		if (((start >= r->start) && (start < r->end)) ||
-		    ((end > r->start) && (end <= r->end)) ||
-		    ((start <= r->start) && (end >= r->end)))
+	/* Return 1 if we conflict with something that's already allocated. */
+	lower_node = dict_upper_bound(&region->dict, &start);
+	if (lower_node) {
+		lower = dnode_get(lower_node);
+		if (start < lower->end)
 			return 1;
-		if (end == r->start) {
-			r->start = start;
-			return 0;
-		}
-		if (start == r->end) {
-			if ((next = r->next)) {
-				if (end > next->start)
-					return 1;
-				if (end == next->start) {
-					r->end = next->end;
-					r->next = next->next;
-					free(next);
-					return 0;
-				}
-			}
-			r->end = end;
-			return 0;
-		}
-		if (start < r->start)
-			break;
 	}
-	/*
-	 * Insert a new region element structure into the linked list
-	 */
+	upper_node = dict_lower_bound(&region->dict, &start);
+	if (upper_node) {
+		upper = dnode_get(upper_node);
+		if (end > upper->start)
+			return 1;
+	}
+
+	/* Consolidate continguous allocations. */
+	if (lower && start == lower->end) {
+		start = lower->start;
+		dict_delete_free(&region->dict, lower_node);
+	}
+	if (upper && end == upper->start) {
+		end = upper->end;
+		dict_delete_free(&region->dict, upper_node);
+	}
+
+	/* Add new allocation. */
 	new_region = malloc(sizeof(struct region_el));
 	if (!new_region)
 		return -1;
 	new_region->start = start;
-	new_region->end = start + n;
-	new_region->next = r;
-	if (prev)
-		prev->next = new_region;
-	else
-		region->allocated = new_region;
+	new_region->end = end;
+	if (!dict_alloc_insert(&region->dict, &new_region->start, new_region))
+		return -1;
 	return 0;
 }
 
@@ -156,15 +159,19 @@ int bcode_program[] = {
 
 void region_print(region_t region, FILE *f)
 {
+	dnode_t *node = NULL;
 	struct region_el	*r;
 	int	i = 0;
 
 	fprintf(f, "Printing region (min=%llu. max=%llu)\n\t", region->min,
 		region->max);
-	for (r = region->allocated; r; r = r->next) {
+	node = dict_first(&region->dict);
+	while (node) {
+		r = dnode_get(node);
 		fprintf(f, "(%llu, %llu)  ", r->start, r->end);
 		if (++i >= 8)
 			fprintf(f, "\n\t");
+		node = dict_next(&region->dict, node);
 	}
 	fprintf(f, "\n");
 }
diff --git a/lib/support/dict.c b/lib/support/dict.c
index 6a5c15c..e3b2972 100644
--- a/lib/support/dict.c
+++ b/lib/support/dict.c
@@ -490,7 +490,6 @@ dnode_t *dict_lookup(dict_t *dict, const void *key)
     return NULL;
 }
 
-#ifdef E2FSCK_NOTUSED
 /*
  * Look for the node corresponding to the lowest key that is equal to or
  * greater than the given key.  If there is no such node, return null.
@@ -554,7 +553,6 @@ dnode_t *dict_upper_bound(dict_t *dict, const void *key)
 
     return tentative;
 }
-#endif
 
 /*
  * Insert a node into the dictionary. The node should have been
@@ -655,7 +653,6 @@ void dict_insert(dict_t *dict, dnode_t *node, const void *key)
     dict_assert (dict_verify(dict));
 }
 
-#ifdef E2FSCK_NOTUSED
 /*
  * Delete the given node from the dictionary. If the given node does not belong
  * to the given dictionary, undefined behavior results.  A pointer to the
@@ -830,7 +827,6 @@ dnode_t *dict_delete(dict_t *dict, dnode_t *delete)
 
     return delete;
 }
-#endif /* E2FSCK_NOTUSED */
 
 /*
  * Allocate a node using the dictionary's allocator routine, give it
@@ -849,13 +845,11 @@ int dict_alloc_insert(dict_t *dict, const void *key, void *data)
     return 0;
 }
 
-#ifdef E2FSCK_NOTUSED
 void dict_delete_free(dict_t *dict, dnode_t *node)
 {
     dict_delete(dict, node);
     dict->freenode(node, dict->context);
 }
-#endif
 
 /*
  * Return the node with the lowest (leftmost) key. If the dictionary is empty
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: improve performance of region_allocate()
  2017-08-19  1:16 [PATCH] e2fsck: improve performance of region_allocate() Doug Porter
@ 2017-08-22  2:29 ` Theodore Ts'o
  2017-08-22  9:36   ` Jaco Kroon
  0 siblings, 1 reply; 10+ messages in thread
From: Theodore Ts'o @ 2017-08-22  2:29 UTC (permalink / raw)
  To: Doug Porter; +Cc: linux-ext4, Omar Sandoval, Jaco Kroon

On Fri, Aug 18, 2017 at 06:16:35PM -0700, Doug Porter wrote:
> Use a red-black tree to track allocations instead of a linked list.
> This provides a substantial performance boost when the number of
> allocations in a region is large.  This change resulted in a reduction
> in runtime from 4821s to 6s on one filesystem.
> 
> Signed-off-by: Doug Porter <dsp@fb.com>

Hi Doug, as it turns out, Jaco Kroon and I had been debugging the same
problem as oen you were working on.  We came up with a different way
of solving this problem (see below).  It works by observing that
unless the extent tree is terribly corrupted, region_allocate() will
always be appending to the very end of the list.

However, it has since occurred to me that since we are doing an
breadth-first traversal of the extent tree, the starting lba for each
leaf node *must* always be monotonically increasing, and we already
check to make sure this is true within an extent tree block.  So I
think it might be possible to dispense with using any kind of data
structure, whether it's an rbtree or a linked list, and instead just
simply make sure we enforce the start+len of the last entry in an
extent tree block is < the starting lba of the first entry in the next
extent tree block.

We are already checking all of the necessary other conditions in
scan_extent_node, so with this additional check, I believe that using
the region tracking code in scan_extent_node (which was originally
written to make sure that extended attribute block did not have any
parts of a string shared between more than one EA key or value pair)
can be made entirely unnecessary for scan_extent_node().

I haven't had a chance to code this alternate fix, but I believe it
should be superior to either your patch or the one which I had drafted
below.  Does this make sense to you?

							- Ted

commit 8a48ce07a5923242fecc5dc04d6e30dd59a8f07d
Author: Theodore Ts'o <tytso@mit.edu>
Date:   Mon Aug 14 19:52:39 2017 -0400

    e2fsck: add optimization for large, fragmented sparse files
    
    The code which checks for overlapping logical blocks in an extent tree
    is O(h*e) in time, where h is the number of holes in the file, and e
    is the number of extents in the file.  So a file with a large number
    of holes can take e2fsck a long time process.  Optimize this taking
    advantage of the fact the vast majority of the time, region_allocate()
    is called with increasing logical block numbers, so we are almost
    always append onto the end of the region list.
    
    Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/e2fsck/region.c b/e2fsck/region.c
index e32f89db0..95d23be4f 100644
--- a/e2fsck/region.c
+++ b/e2fsck/region.c
@@ -30,6 +30,7 @@ struct region_struct {
 	region_addr_t	min;
 	region_addr_t	max;
 	struct region_el *allocated;
+	struct region_el *last;
 };
 
 region_t region_create(region_addr_t min, region_addr_t max)
@@ -42,6 +43,7 @@ region_t region_create(region_addr_t min, region_addr_t max)
 	memset(region, 0, sizeof(struct region_struct));
 	region->min = min;
 	region->max = max;
+	region->last = NULL;
 	return region;
 }
 
@@ -68,6 +70,18 @@ int region_allocate(region_t region, region_addr_t start, int n)
 	if (n == 0)
 		return 1;
 
+	if (region->last && region->last->end == start &&
+	    !region->last->next) {
+		region->last->end = end;
+		return 0;
+	}
+	if (region->last && start > region->last->end &&
+	    !region->last->next) {
+		r = NULL;
+		prev = region->last;
+		goto append_to_list;
+	}
+
 	/*
 	 * Search through the linked list.  If we find that it
 	 * conflicts witih something that's already allocated, return
@@ -92,6 +106,8 @@ int region_allocate(region_t region, region_addr_t start, int n)
 					r->end = next->end;
 					r->next = next->next;
 					free(next);
+					if (!r->next)
+						region->last = r;
 					return 0;
 				}
 			}
@@ -104,12 +120,15 @@ int region_allocate(region_t region, region_addr_t start, int n)
 	/*
 	 * Insert a new region element structure into the linked list
 	 */
+append_to_list:
 	new_region = malloc(sizeof(struct region_el));
 	if (!new_region)
 		return -1;
 	new_region->start = start;
 	new_region->end = start + n;
 	new_region->next = r;
+	if (!new_region->next)
+		region->last = new_region;
 	if (prev)
 		prev->next = new_region;
 	else

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: improve performance of region_allocate()
  2017-08-22  2:29 ` Theodore Ts'o
@ 2017-08-22  9:36   ` Jaco Kroon
  2017-08-22 12:45     ` Theodore Ts'o
  2017-08-23 23:21     ` [PATCH] e2fsck: improve performance of region_allocate() Theodore Ts'o
  0 siblings, 2 replies; 10+ messages in thread
From: Jaco Kroon @ 2017-08-22  9:36 UTC (permalink / raw)
  To: Theodore Ts'o, Doug Porter; +Cc: linux-ext4, Omar Sandoval

[-- Attachment #1: Type: text/plain, Size: 5865 bytes --]

Hi Ted, Doug,

Ted, I already emailed you the patch for the latter discussion here, 
including my rudimentary benchmarks.

Unfortunately I'm having trouble with inline formatting of the patch, so 
I have attached it instead of providing inline (sorry - I know that 
makes discussion difficult).  Was originally emailed to you as a series 
of three independent patches, with the below as 0001.  We still need to 
discuss the other optimization.

The attached improves CPU performance from O(e*h) to O(e) and memory 
from O(h) to O(1).  The patch below does similar for CPU but nothing for 
memory (In my case it took fsck down by a significant margin).

Previously my fsck got stuck on 0.5% (we both know it got stuck on a 
single 340GB file with numerous holes in it, of which I have multiple 
such files on my filesystem) and stayed there for a few hours.  With 
this (and the memory map link-count for pass2) I managed to finish a 
fsck on a 40TB filesystem in 508 minutes.

Ted - the provided patch by Doug may still improve performance for the 
other uses of region.c as well, but the impact will probably not be as 
severe since (as far as I know) there are usually not a great many 
number of EAs for each file.

Kind Regards,
Jaco


On 22/08/2017 04:29, Theodore Ts'o wrote:
> On Fri, Aug 18, 2017 at 06:16:35PM -0700, Doug Porter wrote:
>> Use a red-black tree to track allocations instead of a linked list.
>> This provides a substantial performance boost when the number of
>> allocations in a region is large.  This change resulted in a reduction
>> in runtime from 4821s to 6s on one filesystem.
>>
>> Signed-off-by: Doug Porter <dsp@fb.com>
> Hi Doug, as it turns out, Jaco Kroon and I had been debugging the same
> problem as oen you were working on.  We came up with a different way
> of solving this problem (see below).  It works by observing that
> unless the extent tree is terribly corrupted, region_allocate() will
> always be appending to the very end of the list.
>
> However, it has since occurred to me that since we are doing an
> breadth-first traversal of the extent tree, the starting lba for each
> leaf node *must* always be monotonically increasing, and we already
> check to make sure this is true within an extent tree block.  So I
> think it might be possible to dispense with using any kind of data
> structure, whether it's an rbtree or a linked list, and instead just
> simply make sure we enforce the start+len of the last entry in an
> extent tree block is < the starting lba of the first entry in the next
> extent tree block.
>
> We are already checking all of the necessary other conditions in
> scan_extent_node, so with this additional check, I believe that using
> the region tracking code in scan_extent_node (which was originally
> written to make sure that extended attribute block did not have any
> parts of a string shared between more than one EA key or value pair)
> can be made entirely unnecessary for scan_extent_node().
>
> I haven't had a chance to code this alternate fix, but I believe it
> should be superior to either your patch or the one which I had drafted
> below.  Does this make sense to you?
>
> 							- Ted
>
> commit 8a48ce07a5923242fecc5dc04d6e30dd59a8f07d
> Author: Theodore Ts'o <tytso@mit.edu>
> Date:   Mon Aug 14 19:52:39 2017 -0400
>
>      e2fsck: add optimization for large, fragmented sparse files
>      
>      The code which checks for overlapping logical blocks in an extent tree
>      is O(h*e) in time, where h is the number of holes in the file, and e
>      is the number of extents in the file.  So a file with a large number
>      of holes can take e2fsck a long time process.  Optimize this taking
>      advantage of the fact the vast majority of the time, region_allocate()
>      is called with increasing logical block numbers, so we are almost
>      always append onto the end of the region list.
>      
>      Signed-off-by: Theodore Ts'o <tytso@mit.edu>
>
> diff --git a/e2fsck/region.c b/e2fsck/region.c
> index e32f89db0..95d23be4f 100644
> --- a/e2fsck/region.c
> +++ b/e2fsck/region.c
> @@ -30,6 +30,7 @@ struct region_struct {
>   	region_addr_t	min;
>   	region_addr_t	max;
>   	struct region_el *allocated;
> +	struct region_el *last;
>   };
>   
>   region_t region_create(region_addr_t min, region_addr_t max)
> @@ -42,6 +43,7 @@ region_t region_create(region_addr_t min, region_addr_t max)
>   	memset(region, 0, sizeof(struct region_struct));
>   	region->min = min;
>   	region->max = max;
> +	region->last = NULL;
>   	return region;
>   }
>   
> @@ -68,6 +70,18 @@ int region_allocate(region_t region, region_addr_t start, int n)
>   	if (n == 0)
>   		return 1;
>   
> +	if (region->last && region->last->end == start &&
> +	    !region->last->next) {
> +		region->last->end = end;
> +		return 0;
> +	}
> +	if (region->last && start > region->last->end &&
> +	    !region->last->next) {
> +		r = NULL;
> +		prev = region->last;
> +		goto append_to_list;
> +	}
> +
>   	/*
>   	 * Search through the linked list.  If we find that it
>   	 * conflicts witih something that's already allocated, return
> @@ -92,6 +106,8 @@ int region_allocate(region_t region, region_addr_t start, int n)
>   					r->end = next->end;
>   					r->next = next->next;
>   					free(next);
> +					if (!r->next)
> +						region->last = r;
>   					return 0;
>   				}
>   			}
> @@ -104,12 +120,15 @@ int region_allocate(region_t region, region_addr_t start, int n)
>   	/*
>   	 * Insert a new region element structure into the linked list
>   	 */
> +append_to_list:
>   	new_region = malloc(sizeof(struct region_el));
>   	if (!new_region)
>   		return -1;
>   	new_region->start = start;
>   	new_region->end = start + n;
>   	new_region->next = r;
> +	if (!new_region->next)
> +		region->last = new_region;
>   	if (prev)
>   		prev->next = new_region;
>   	else


[-- Attachment #2: 0003-e2fsk-Optimize-out-the-use-of-region.c-for-logical-b.patch --]
[-- Type: text/x-patch, Size: 2580 bytes --]

>From 1cb50ef22658798f3934b15f7f4be06a7ef4d5ff Mon Sep 17 00:00:00 2001
From: Jaco Kroon <jaco@uls.co.za>
Date: Fri, 18 Aug 2017 15:37:51 +0200
Subject: [PATCH 3/3] e2fsk:  Optimize out the use of region.c for logical
 block overlap detection.

Since extents have a guarantee of being monotonically increasing we
merely need to check that block n+1 starts after block n.  This is a
simple enough check and we can perform this by calculating the next
expected block
---
 e2fsck/pass1.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 97dd79c4..b78c4416 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -103,7 +103,7 @@ struct process_block_struct {
 	struct problem_context *pctx;
 	ext2fs_block_bitmap fs_meta_blocks;
 	e2fsck_t	ctx;
-	region_t	region;
+	blk64_t		next_logical_block;
 	struct extent_tree_info	eti;
 };
 
@@ -2819,9 +2819,16 @@ static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 			  (1U << (21 - ctx->fs->super->s_log_block_size))))
 			problem = PR_1_TOOBIG_DIR;
 
-		if (is_leaf && problem == 0 && extent.e_len > 0 &&
-		    region_allocate(pb->region, extent.e_lblk, extent.e_len))
-			problem = PR_1_EXTENT_COLLISION;
+		if (is_leaf && problem == 0 && extent.e_len > 0) {
+#if 0
+			printf("extent_region(ino=%u, expect=%llu, lblk=%llu, len=%u)\n",
+					pb->ino, pb->next_logical_block, extent.e_lblk, extent.e_len);
+#endif
+			if (extent.e_lblk < pb->next_logical_block)
+				problem = PR_1_EXTENT_COLLISION;
+			else if (extent.e_lblk + extent.e_len > pb->next_logical_block)
+				pb->next_logical_block = extent.e_lblk + extent.e_len;
+		}
 
 		/*
 		 * Uninitialized blocks in a directory?  Clear the flag and
@@ -3170,13 +3177,7 @@ static void check_blocks_extents(e2fsck_t ctx, struct problem_context *pctx,
 	memset(pb->eti.ext_info, 0, sizeof(pb->eti.ext_info));
 	pb->eti.ino = pb->ino;
 
-	pb->region = region_create(0, info.max_lblk);
-	if (!pb->region) {
-		ext2fs_extent_free(ehandle);
-		fix_problem(ctx, PR_1_EXTENT_ALLOC_REGION_ABORT, pctx);
-		ctx->flags |= E2F_FLAG_ABORT;
-		return;
-	}
+	pb->next_logical_block = 0;
 
 	eof_lblk = ((EXT2_I_SIZE(inode) + fs->blocksize - 1) >>
 		EXT2_BLOCK_SIZE_BITS(fs->super)) - 1;
@@ -3189,8 +3190,6 @@ static void check_blocks_extents(e2fsck_t ctx, struct problem_context *pctx,
 				   "check_blocks_extents");
 		pctx->errcode = 0;
 	}
-	region_free(pb->region);
-	pb->region = NULL;
 	ext2fs_extent_free(ehandle);
 
 	/* Rebuild unless it's a dir and we're rehashing it */
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: improve performance of region_allocate()
  2017-08-22  9:36   ` Jaco Kroon
@ 2017-08-22 12:45     ` Theodore Ts'o
  2017-08-22 13:47       ` *** SPAM *** " Jaco Kroon
  2017-08-23 23:21     ` [PATCH] e2fsck: improve performance of region_allocate() Theodore Ts'o
  1 sibling, 1 reply; 10+ messages in thread
From: Theodore Ts'o @ 2017-08-22 12:45 UTC (permalink / raw)
  To: Jaco Kroon; +Cc: Doug Porter, linux-ext4, Omar Sandoval

On Tue, Aug 22, 2017 at 11:36:54AM +0200, Jaco Kroon wrote:
> 
> Unfortunately I'm having trouble with inline formatting of the patch, so I
> have attached it instead of providing inline (sorry - I know that makes
> discussion difficult).  Was originally emailed to you as a series of three
> independent patches, with the below as 0001.  We still need to discuss the
> other optimization.

Yeah, sorry for not getting back to you.  I took a long weekend
vacation and this week has been super busy at work, so I haven't had a
chance to pursue the approach I've outlined in an earlier message ---
that is, instead of optimizing the region code (your patch and my
patch, where your patch is more general, but mine is simpler and only
optimizes the one case that is important for optimizing CPU
performance) or replacing it with an rbtree (Doug's patch), but
instead removing it altogether and replacing it with some better check
that avoids needing the memory usage altogether.

> The attached improves CPU performance from O(e*h) to O(e) and memory from
> O(h) to O(1).  The patch below does similar for CPU but nothing for memory
> (In my case it took fsck down by a significant margin).

I thought you were saying you had some false positives (where it was
causing e2fsck to complain about some valid extent trees in your file
system)?  That's why I've not acted on your proposed patch until I had
time to study the code more closely to understand what was going on
with the errors I thought you had reported.

> Ted - the provided patch by Doug may still improve performance for the other
> uses of region.c as well, but the impact will probably not be as severe
> since (as far as I know) there are usually not a great many number of EAs
> for each file.

Correct; we are limited to at most 64k if the new (experimental)
ea_inode feature is enabled; and without that feature, we are limited
to the 4k block size.  So I'm not particularly worried about the xattr
region use case.

Indeed, the region code was originally designed and implemented for
the xattr use case, and the use of a linked list approach was
deliberately chosen for simplicity because normally there are only a
handful of xattrs, and how they are laid out (keys contiguously
growing from one direction, values contiguously grown from the other
direction) is very friendly for that particular use case.  Hence, in
the common, non-error case, there are only going to be two entries on
the linked list when handling xattrs.


As far as the other (icount) optimization is concerned, that's on my
todo list but I'm trying to understand how much of a win it's going to
be on a densly hard linked file system, and whether the complexity due
to the handling the potential increased memory usage is worth it.  If
we leave to be something that has to be manually enabled using
/etc/e2fsck.conf, very few people will use it.  If we need to somehow
figure out how it's safe / necessary to activate the icount
optimization, especially if there are potentially multiple fsck's
running in parallel, this starts to get really complicated.  So
perhaps all we can do is leave it as a manually enabled option, but I
still need to think about that a bit.

Cheers,

						- Ted

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: *** SPAM *** Re: [PATCH] e2fsck: improve performance of region_allocate()
  2017-08-22 12:45     ` Theodore Ts'o
@ 2017-08-22 13:47       ` Jaco Kroon
  2017-08-23 23:23         ` [PATCH] e2fsck: add optimization for heavily hard-linked file systems Theodore Ts'o
  0 siblings, 1 reply; 10+ messages in thread
From: Jaco Kroon @ 2017-08-22 13:47 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: Doug Porter, linux-ext4, Omar Sandoval

[-- Attachment #1: Type: text/plain, Size: 5091 bytes --]

Hi Ted,

On 22/08/2017 14:45, Theodore Ts'o wrote:
>> The attached improves CPU performance from O(e*h) to O(e) and memory from
>> O(h) to O(1).  The patch below does similar for CPU but nothing for memory
>> (In my case it took fsck down by a significant margin).
> I thought you were saying you had some false positives (where it was
> causing e2fsck to complain about some valid extent trees in your file
> system)?  That's why I've not acted on your proposed patch until I had
> time to study the code more closely to understand what was going on
> with the errors I thought you had reported.
I did in fact manage to clear it, but raised it so that you could 
confirm.  With all three patches I sent you directly applied:

338 tests succeeded     0 tests failed

> As far as the other (icount) optimization is concerned, that's on my
> todo list but I'm trying to understand how much of a win it's going to
> be on a densly hard linked file system, and whether the complexity due
> to the handling the potential increased memory usage is worth it.  If
> we leave to be something that has to be manually enabled using
> /etc/e2fsck.conf, very few people will use it.  If we need to somehow
> figure out how it's safe / necessary to activate the icount
> optimization, especially if there are potentially multiple fsck's
> running in parallel, this starts to get really complicated.  So
> perhaps all we can do is leave it as a manually enabled option, but I
> still need to think about that a bit.
My (purely theoretical) assessment on that (attached - proof-of-concept 
quality at best) below.

n = the number of files with a link count > 1;
t = the total number of possible inodes;
g = sum of link counts for all inodes with link count >1; and
c = the average link-count for inodes with link count >1 (g/n).

my case n = ~100 million and t = ~2.8 billion.  I don't have an estimate 
of g for my system, but reckon it can be in the range of 2 billion quite 
trivially.

Pass1 assessment:  insertions are always onto the end of the list 
(inodes traversed sequentially), and cpu and memory wise reduces to O(n) 
(memory = 96/8 * n = 12n bytes, 1.2GB in my case).  I don't think there 
is improvements to be had during pass1.

Pass2, current code:  we clone the list of inodes with link-count>1, 
avoiding expensive mid-array insertions, this reduces to a really fast 
O(n).  "increments" are expensive, and each increment results in a 
binary search for the correct entry in the array, costing O(log(n)) - 27 
comparison ops for my 40TB filesystem.  We perform this g number of 
times, so overall cpu is O(g.log(n)).  memory remains identical to above.

Pass2, full map (possible "optimization"):

We allocate an array of __u16 for t inodes.  In my use-case this 
requires 2.8bn index size, or 5.6GB RAM.  Memory *normally* goes up to O(t).

Only if n > 16.7% of t does memory usage decrease - based on the fact 
that my filesystem has average file size of 394KB, and is <1TB remaining 
on a 40TB file system at 107m files, n/t in my case = 3.5%, I find this 
use-case extremely unlikely.  We can thus argue we ALWAYS sacrifice 
memory, from O(n) to O(t).

In terms of CPU however the increase operation now becomes O(1), from 
O(log(n)).  Depending on the value of g this can be a huge gain, and 
since the O(1) here is a VERY FAST O(1) I suspect the 27x factor in my 
use-case is an underestimate of the actual factor (I suspect even if we 
have only 1 entry in the list the fact that we have to go through just 
one search iteration is at least 3-5x more expensive in terms of the 
constant, thus I *suspect* that a sensible cpu speedup for this check 
for comparing link counts in inodes compared to actual directory 
structures is probably around 100x during pass2.  I could very well be 
wrong.

That said, assuming that we will need to go out to disk to retrieve 
directory structures we're probably going to be IO bound at this point 
anyway, but you're the better person to comment on how disk/cpu bound 
pass2 is in general at this stage.

Performing CPU benchmarks that doesn't require disk should be relatively 
trivial (iterate through a set of values for n and g and generate random 
data - at no point does the cpu utilization depend on the value of t, so 
we can measure the effective difference on even super-crazy values of n 
and g (constrained by g <= n*Max_Link_Count).  If this is something that 
would be interesting I'll happily see if I can generate something.

My opinion: if pass2 is currently generally CPU bound for such use-cases 
we should look at this further, if disk bound then there is no point.  
We can easily calculate the values of n, t and g during pass1 (or even 
as setup code for pass2, everything we need is available to icount.c 
during construction time of pass2 data structure).  Perhaps it makes 
sense to switch to full_map implementation heuristically based on those 
values, but we'd need to understand the potential impact.  We can 
trivially fail back to the current implementation if the memory 
allocation fails.

Kind Regards,
Jaco

[-- Attachment #2: 0002-e2fsck-add-optimization-for-heavy-hard-linked-pass2.patch --]
[-- Type: text/x-patch, Size: 7007 bytes --]

>From b6e80fedf35a9953d0c00b5ca2353d3bec1121f0 Mon Sep 17 00:00:00 2001
From: Jaco Kroon <jaco@uls.co.za>
Date: Tue, 15 Aug 2017 18:38:47 +0200
Subject: [PATCH 2/3] e2fsck: add optimization for heavy-hard-linked pass2.

In the case of heave hard-linking pass2 can slow down rapidly due to
binary search lookup of inode numbers.  This implements a memory
trade-off (storing 2 bytes in-memory for each inode to keep counts).

For a 40TB filesystem with 2.8bn inodes this map alone requires 5.7GB of
RAM.  We don't activate this for pass1 since the gain CPU gain is nearly
nil for that pass and the sacrificed memory does not justify the
increase in RAM.

It could be that during pass1 if more than 17% if possible inodes has
link_count>1 (466m inodes in the 40TB with 2.8bn possible inodes case)
then it becomes more memory efficient to use the full map implementation
in terms of memory.

The author of this patch believes that to be an extremely unlikely
use-case scenario.
---
 e2fsck/pass1.c      |  6 ++++-
 lib/ext2fs/ext2fs.h |  1 +
 lib/ext2fs/icount.c | 64 +++++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 47a8b27c..97dd79c4 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -817,6 +817,7 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
 	errcode_t		retval;
 	char			*tdb_dir;
 	int			enable;
+	int			full_map;
 
 	*ret = 0;
 
@@ -826,6 +827,8 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
 			 "numdirs_threshold", 0, 0, &threshold);
 	profile_get_boolean(ctx->profile, "scratch_files",
 			    "icount", 0, 1, &enable);
+	profile_get_boolean(ctx->profile, "full_map",
+			"enable", 0, 1, &full_map);
 
 	retval = ext2fs_get_num_dirs(ctx->fs, &num_dirs);
 	if (retval)
@@ -840,7 +843,8 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
 	}
 	e2fsck_set_bitmap_type(ctx->fs, EXT2FS_BMAP64_RBTREE, icount_name,
 			       &save_type);
-	retval = ext2fs_create_icount2(ctx->fs, flags, 0, hint, ret);
+	retval = ext2fs_create_icount2(ctx->fs, flags | (full_map ? EXT2_ICOUNT_OPT_FULLMAP : 0),
+			0, hint, ret);
 	ctx->fs->default_bitmap_type = save_type;
 	return retval;
 }
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index a2c8edaa..a4feaccc 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -546,6 +546,7 @@ typedef struct ext2_struct_inode_scan *ext2_inode_scan;
  * ext2_icount_t abstraction
  */
 #define EXT2_ICOUNT_OPT_INCREMENT	0x01
+#define EXT2_ICOUNT_OPT_FULLMAP		0x02
 
 typedef struct ext2_icount *ext2_icount_t;
 
diff --git a/lib/ext2fs/icount.c b/lib/ext2fs/icount.c
index 594b1cc2..7fcd0432 100644
--- a/lib/ext2fs/icount.c
+++ b/lib/ext2fs/icount.c
@@ -61,6 +61,7 @@ struct ext2_icount {
 	char			*tdb_fn;
 	TDB_CONTEXT		*tdb;
 #endif
+	__u16			*fullmap;
 };
 
 /*
@@ -93,6 +94,9 @@ void ext2fs_free_icount(ext2_icount_t icount)
 	}
 #endif
 
+	if (icount->fullmap)
+		ext2fs_free_mem(&icount->fullmap);
+
 	ext2fs_free_mem(&icount);
 }
 
@@ -108,10 +112,20 @@ static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
 		return retval;
 	memset(icount, 0, sizeof(struct ext2_icount));
 
+	if (flags & EXT2_ICOUNT_OPT_FULLMAP && flags & EXT2_ICOUNT_OPT_INCREMENT) {
+		retval = ext2fs_get_mem(sizeof(__u32) * fs->super->s_inodes_count, &icount->fullmap);
+		/* If we can't allocate, fall back */
+		if (!retval) {
+			memset(icount->fullmap, 0, sizeof(__u32) * fs->super->s_inodes_count);
+			goto successout;
+		}
+	}
+
 	retval = ext2fs_allocate_inode_bitmap(fs, "icount", &icount->single);
 	if (retval)
 		goto errout;
 
+
 	if (flags & EXT2_ICOUNT_OPT_INCREMENT) {
 		retval = ext2fs_allocate_inode_bitmap(fs, "icount_inc",
 						      &icount->multiple);
@@ -120,6 +134,7 @@ static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
 	} else
 		icount->multiple = 0;
 
+successout:
 	icount->magic = EXT2_ET_MAGIC_ICOUNT;
 	icount->num_inodes = fs->super->s_inodes_count;
 
@@ -256,6 +271,9 @@ errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
 	if (retval)
 		return retval;
 
+	if (icount->fullmap)
+		goto successout;
+
 	if (size) {
 		icount->size = size;
 	} else {
@@ -295,6 +313,7 @@ errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
 		icount->count = hint->count;
 	}
 
+successout:
 	*ret = icount;
 	return 0;
 
@@ -433,6 +452,11 @@ static errcode_t set_inode_count(ext2_icount_t icount, ext2_ino_t ino,
 		return 0;
 	}
 #endif
+	if (icount->fullmap) {
+		icount->fullmap[ino] = icount_16_xlate(count);
+		return 0;
+	}
+
 	el = get_icount_el(icount, ino, 1);
 	if (!el)
 		return EXT2_ET_NO_MEMORY;
@@ -463,6 +487,11 @@ static errcode_t get_inode_count(ext2_icount_t icount, ext2_ino_t ino,
 		return 0;
 	}
 #endif
+	if (icount->fullmap) {
+		*count = icount->fullmap[ino];
+		return 0;
+	}
+
 	el = get_icount_el(icount, ino, 0);
 	if (!el) {
 		*count = 0;
@@ -504,14 +533,16 @@ errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, __u16 *ret)
 	if (!ino || (ino > icount->num_inodes))
 		return EXT2_ET_INVALID_ARGUMENT;
 
-	if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
-		*ret = 1;
-		return 0;
-	}
-	if (icount->multiple &&
-	    !ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
-		*ret = 0;
-		return 0;
+	if (!icount->fullmap) {
+		if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
+			*ret = 1;
+			return 0;
+		}
+		if (icount->multiple &&
+			!ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
+			*ret = 0;
+			return 0;
+		}
 	}
 	get_inode_count(icount, ino, &val);
 	*ret = icount_16_xlate(val);
@@ -528,7 +559,9 @@ errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino,
 	if (!ino || (ino > icount->num_inodes))
 		return EXT2_ET_INVALID_ARGUMENT;
 
-	if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
+	if (icount->fullmap) {
+		curr_value = icount->fullmap[ino] = icount_16_xlate(icount->fullmap[ino] + 1);
+	} else if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
 		/*
 		 * If the existing count is 1, then we know there is
 		 * no entry in the list.
@@ -585,6 +618,16 @@ errcode_t ext2fs_icount_decrement(ext2_icount_t icount, ext2_ino_t ino,
 
 	EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
 
+	if (icount->fullmap) {
+		if (!icount->fullmap[ino])
+			return EXT2_ET_INVALID_ARGUMENT;
+
+		curr_value = --icount->fullmap[ino];
+		if (ret)
+			*ret = icount_16_xlate(curr_value);
+		return 0;
+	}
+
 	if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
 		ext2fs_unmark_inode_bitmap2(icount->single, ino);
 		if (icount->multiple)
@@ -626,6 +669,9 @@ errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino,
 
 	EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
 
+	if (icount->fullmap)
+		return set_inode_count(icount, ino, count);
+
 	if (count == 1) {
 		ext2fs_mark_inode_bitmap2(icount->single, ino);
 		if (icount->multiple)
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: improve performance of region_allocate()
  2017-08-22  9:36   ` Jaco Kroon
  2017-08-22 12:45     ` Theodore Ts'o
@ 2017-08-23 23:21     ` Theodore Ts'o
  2017-08-24  8:18       ` Jaco Kroon
  1 sibling, 1 reply; 10+ messages in thread
From: Theodore Ts'o @ 2017-08-23 23:21 UTC (permalink / raw)
  To: Jaco Kroon; +Cc: Doug Porter, linux-ext4, Omar Sandoval

Jaco, here's the version of your patch I plan to use.  Please take a
look.

The only real changes are mostly whitespace and formatting.

By the way, it would be nice if you could send me permission to add a

Signed-off-by: Jaco Kroon <jaco@uls.co.za>

See http://elinux.org/Developer_Certificate_Of_Origin for an
explanation of what this means.

						- Ted

commit 320d436c006dc2550ebd01084b6e823f0cea8bc2
Author: Jaco Kroon <jaco@uls.co.za>
Date:   Wed Aug 23 13:54:25 2017 -0400

    e2fsck: optimize out the use region_t in scan_extent_node()
    
    Since extents have a guarantee of being monotonically increasing we
    merely need to check that block n+1 starts after block n.  This is a
    simple enough check and we can perform this by calculating the next
    expected logical block instead of using the region usage tracking data
    abstraction.
    
    Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 8044beed6..bc26beb99 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -96,7 +96,7 @@ struct process_block_struct {
 	struct problem_context *pctx;
 	ext2fs_block_bitmap fs_meta_blocks;
 	e2fsck_t	ctx;
-	region_t	region;
+	blk64_t		next_lblock;
 	struct extent_tree_info	eti;
 };
 
@@ -2628,9 +2628,18 @@ static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 			  (1U << (21 - ctx->fs->super->s_log_block_size))))
 			problem = PR_1_TOOBIG_DIR;
 
-		if (is_leaf && problem == 0 && extent.e_len > 0 &&
-		    region_allocate(pb->region, extent.e_lblk, extent.e_len))
-			problem = PR_1_EXTENT_COLLISION;
+		if (is_leaf && problem == 0 && extent.e_len > 0) {
+#if 0
+			printf("extent_region(ino=%u, expect=%llu, "
+			       "lblk=%llu, len=%u)\n",
+			       pb->ino, pb->next_lblock,
+			       extent.e_lblk, extent.e_len);
+#endif
+			if (extent.e_lblk < pb->next_lblock)
+				problem = PR_1_EXTENT_COLLISION;
+			else if (extent.e_lblk + extent.e_len > pb->next_lblock)
+				pb->next_lblock = extent.e_lblk + extent.e_len;
+		}
 
 		/*
 		 * Uninitialized blocks in a directory?  Clear the flag and
@@ -2963,13 +2972,7 @@ static void check_blocks_extents(e2fsck_t ctx, struct problem_context *pctx,
 	memset(pb->eti.ext_info, 0, sizeof(pb->eti.ext_info));
 	pb->eti.ino = pb->ino;
 
-	pb->region = region_create(0, info.max_lblk);
-	if (!pb->region) {
-		ext2fs_extent_free(ehandle);
-		fix_problem(ctx, PR_1_EXTENT_ALLOC_REGION_ABORT, pctx);
-		ctx->flags |= E2F_FLAG_ABORT;
-		return;
-	}
+	pb->next_lblock = 0;
 
 	eof_lblk = ((EXT2_I_SIZE(inode) + fs->blocksize - 1) >>
 		EXT2_BLOCK_SIZE_BITS(fs->super)) - 1;
@@ -2982,8 +2985,6 @@ static void check_blocks_extents(e2fsck_t ctx, struct problem_context *pctx,
 				   "check_blocks_extents");
 		pctx->errcode = 0;
 	}
-	region_free(pb->region);
-	pb->region = NULL;
 	ext2fs_extent_free(ehandle);
 
 	/* Rebuild unless it's a dir and we're rehashing it */

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH] e2fsck: add optimization for heavily hard-linked file systems
  2017-08-22 13:47       ` *** SPAM *** " Jaco Kroon
@ 2017-08-23 23:23         ` Theodore Ts'o
       [not found]           ` <48660f5d-2e53-f0ff-e0e5-169a1d2d631f@uls.co.za>
  0 siblings, 1 reply; 10+ messages in thread
From: Theodore Ts'o @ 2017-08-23 23:23 UTC (permalink / raw)
  To: Jaco Kroon; +Cc: Doug Porter, linux-ext4, Omar Sandoval

Jaco,

Here's my revised version of your patch.  The main differences are in
how to enable this optimization, and the fact that this optimizatoin
is disabled by default.  Users will have to either explicitly request
it by using e2fsck -E inode_count_fullmap or via /etc/e2fsck.conf.

Please take a look and let me know what you think.  (And again, if you
could give me permission to add your Signed-off-by, I would appreciate
it.)

Thanks,

					- Ted

commit 75941780c27190c6507aa7ca813caa79638926c8
Author: Jaco Kroon <jaco@uls.co.za>
Date:   Wed Aug 23 14:21:43 2017 -0400

    e2fsck: add optimization for heavily hard-linked file systems
    
    In the case of file system with large number of hard links, e2fsck can
    take a large amount of time in pass 2 due to binary search lookup of
    inode numbers.  This implements a memory trade-off (storing 2 bytes
    in-memory for each inode to store inode counts).
    
    For a 40TB filesystem with 2.8bn inodes this map alone requires 5.7GB
    of RAM.  For this reason, we don't enable this optimization by
    default.  It can be enabled using either an extended option to e2fsck
    or via a seting in e2fsck.conf.
    
    Even when the fullmap optimization is enabled, we don't use this for
    the icount structure in pass 1.  This is because the gain CPU gain is
    nearly nil for that pass and the sacrificed memory does not justify
    the increase in RAM.
    
    (It could be that during pass 1, if more than 17% if possible inodes
    has link_count>1 (466m inodes in the 40TB with 2.8bn possible inodes
    case) then it becomes more memory efficient to use the full map
    implementation in terms of memory.  However, this is extremely
    unlikely given that most file systems are heavily over-provisioned in
    terms of the number of inodes in the system.)
    
    Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/e2fsck/e2fsck.8.in b/e2fsck/e2fsck.8.in
index 786eb15e7..4c29943d3 100644
--- a/e2fsck/e2fsck.8.in
+++ b/e2fsck/e2fsck.8.in
@@ -228,7 +228,29 @@ exactly the opposite of discard option. This is set as default.
 .TP
 .BI no_optimize_extents
 Do not offer to optimize the extent tree by eliminating unnecessary
-width or depth.
+width or depth.  This can also be enabled in the options section of
+.BR /etc/e2fsck.conf .
+.TP
+.BI optimize_extents
+Offer to optimize the extent tree by eliminating unnecessary
+width or depth.  This is the default unless otherwise specified in
+.BR /etc/e2fsck.conf .
+.TP
+.BI inode_count_fullmap
+Trade off using memory for speed when checking a file system with a
+large number of hard-linked files.  The amount of memory required is
+proportional to the number of inodes in the file system.  For large file
+systems, this can be gigabytes of memory.  (For example, a 40TB file system
+with 2.8 billion inodes will consume an additional 5.7 GB memory if this
+optimization is enabled.)  This optimization can also be enabled in the
+options section of
+.BR /etc/e2fsck.conf .
+.TP
+.BI no_inode_count_fullmap
+Disable the
+.B inode_count_fullmap
+optimization.  This is the default unless otherwise specified in
+.BR /etc/e2fsck.conf .
 .TP
 .BI readahead_kb
 Use this many KiB of memory to pre-fetch metadata in the hopes of reducing
diff --git a/e2fsck/e2fsck.conf.5.in b/e2fsck/e2fsck.conf.5.in
index fbde7ef0b..d8205bcf1 100644
--- a/e2fsck/e2fsck.conf.5.in
+++ b/e2fsck/e2fsck.conf.5.in
@@ -157,6 +157,15 @@ the average fill ratio of directories can be maintained at a
 higher, more efficient level.  This relation defaults to 20
 percent.
 .TP
+.I inode_count_fullmap
+If this boolean relation is true, trade off using memory for speed when
+checking a file system with a large number of hard-linked files.  The
+amount of memory required is proportional to the number of inodes in the
+file system.  For large file systems, this can be gigabytes of memory.
+(For example a 40TB file system with 2.8 billion inodes will consume an
+additional 5.7 GB memory if this optimization is enabled.)  This setting
+defaults to false.
+.TP
 .I log_dir
 If the
 .I log_filename
@@ -206,8 +215,8 @@ of that type are squelched.  This can be useful if the console is slow
 end up delaying the boot process for a long time (potentially hours).
 .TP
 .I no_optimize_extents
-Do not offer to optimize the extent tree by eliminating unnecessary
-width or depth.
+If this boolean relation is true, do not offer to optimize the extent
+tree by reducing the tree's width or depth.  This setting defaults to false.
 .TP
 .I readahead_mem_pct
 Use this percentage of memory to try to read in metadata blocks ahead of the
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 81c09d7eb..12855453d 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -170,6 +170,7 @@ struct resource_track {
 #define E2F_OPT_CONVERT_BMAP	0x4000 /* convert blockmap to extent */
 #define E2F_OPT_FIXES_ONLY	0x8000 /* skip all optimizations */
 #define E2F_OPT_NOOPT_EXTENTS	0x10000 /* don't optimize extents */
+#define E2F_OPT_ICOUNT_FULLMAP	0x20000 /* don't optimize extents */
 
 /*
  * E2fsck flags
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index bc26beb99..6442c9449 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -711,6 +711,7 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
 	errcode_t		retval;
 	char			*tdb_dir;
 	int			enable;
+	int			full_map;
 
 	*ret = 0;
 
@@ -734,6 +735,8 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
 	}
 	e2fsck_set_bitmap_type(ctx->fs, EXT2FS_BMAP64_RBTREE, icount_name,
 			       &save_type);
+	if (ctx->options & E2F_OPT_ICOUNT_FULLMAP)
+		flags |= EXT2_ICOUNT_OPT_FULLMAP;
 	retval = ext2fs_create_icount2(ctx->fs, flags, 0, hint, ret);
 	ctx->fs->default_bitmap_type = save_type;
 	return retval;
diff --git a/e2fsck/unix.c b/e2fsck/unix.c
index ff961483b..939862f1a 100644
--- a/e2fsck/unix.c
+++ b/e2fsck/unix.c
@@ -709,9 +709,18 @@ static void parse_extended_opts(e2fsck_t ctx, const char *opts)
 		} else if (strcmp(token, "nodiscard") == 0) {
 			ctx->options &= ~E2F_OPT_DISCARD;
 			continue;
+		} else if (strcmp(token, "optimize_extents") == 0) {
+			ctx->options &= ~E2F_OPT_NOOPT_EXTENTS;
+			continue;
 		} else if (strcmp(token, "no_optimize_extents") == 0) {
 			ctx->options |= E2F_OPT_NOOPT_EXTENTS;
 			continue;
+		} else if (strcmp(token, "inode_count_fullmap") == 0) {
+			ctx->options |= E2F_OPT_ICOUNT_FULLMAP;
+			continue;
+		} else if (strcmp(token, "no_inode_count_fullmap") == 0) {
+			ctx->options &= ~E2F_OPT_ICOUNT_FULLMAP;
+			continue;
 		} else if (strcmp(token, "log_filename") == 0) {
 			if (!arg)
 				extended_usage++;
@@ -733,17 +742,21 @@ static void parse_extended_opts(e2fsck_t ctx, const char *opts)
 	free(buf);
 
 	if (extended_usage) {
-		fputs(("\nExtended options are separated by commas, "
+		fputs(_("\nExtended options are separated by commas, "
 		       "and may take an argument which\n"
 		       "is set off by an equals ('=') sign.  "
-		       "Valid extended options are:\n"), stderr);
-		fputs(("\tea_ver=<ea_version (1 or 2)>\n"), stderr);
-		fputs(("\tfragcheck\n"), stderr);
-		fputs(("\tjournal_only\n"), stderr);
-		fputs(("\tdiscard\n"), stderr);
-		fputs(("\tnodiscard\n"), stderr);
-		fputs(("\treadahead_kb=<buffer size>\n"), stderr);
-		fputs(("\tbmap2extent\n"), stderr);
+		       "Valid extended options are:\n\n"), stderr);
+		fputs(_("\tea_ver=<ea_version (1 or 2)>\n"), stderr);
+		fputs("\tfragcheck\n", stderr);
+		fputs("\tjournal_only\n", stderr);
+		fputs("\tdiscard\n", stderr);
+		fputs("\tnodiscard\n", stderr);
+		fputs("\toptimize_extents\n", stderr);
+		fputs("\tno_optimize_extents\n", stderr);
+		fputs("\tinode_count_fullmap\n", stderr);
+		fputs("\tno_inode_count_fullmap\n", stderr);
+		fputs(_("\treadahead_kb=<buffer size>\n"), stderr);
+		fputs("\tbmap2extent\n", stderr);
 		fputc('\n', stderr);
 		exit(1);
 	}
@@ -1015,6 +1028,11 @@ static errcode_t PRS(int argc, char *argv[], e2fsck_t *ret_ctx)
 	if (c)
 		ctx->options |= E2F_OPT_NOOPT_EXTENTS;
 
+	profile_get_boolean(ctx->profile, "options", "inode_count_fullmap",
+			    0, 0, &c);
+	if (c)
+		ctx->options |= E2F_OPT_ICOUNT_FULLMAP;
+
 	if (ctx->readahead_kb == ~0ULL) {
 		profile_get_integer(ctx->profile, "options",
 				    "readahead_mem_pct", 0, -1, &c);
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 8ff49ca66..68d0e2a57 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -532,6 +532,7 @@ typedef struct ext2_struct_inode_scan *ext2_inode_scan;
  * ext2_icount_t abstraction
  */
 #define EXT2_ICOUNT_OPT_INCREMENT	0x01
+#define EXT2_ICOUNT_OPT_FULLMAP		0x02
 
 typedef struct ext2_icount *ext2_icount_t;
 
diff --git a/lib/ext2fs/icount.c b/lib/ext2fs/icount.c
index 594b1cc2b..65420e9f3 100644
--- a/lib/ext2fs/icount.c
+++ b/lib/ext2fs/icount.c
@@ -61,6 +61,7 @@ struct ext2_icount {
 	char			*tdb_fn;
 	TDB_CONTEXT		*tdb;
 #endif
+	__u16			*fullmap;
 };
 
 /*
@@ -93,6 +94,9 @@ void ext2fs_free_icount(ext2_icount_t icount)
 	}
 #endif
 
+	if (icount->fullmap)
+		ext2fs_free_mem(&icount->fullmap);
+
 	ext2fs_free_mem(&icount);
 }
 
@@ -108,10 +112,24 @@ static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
 		return retval;
 	memset(icount, 0, sizeof(struct ext2_icount));
 
+	if ((flags & EXT2_ICOUNT_OPT_FULLMAP) &&
+	    (flags & EXT2_ICOUNT_OPT_INCREMENT)) {
+		retval = ext2fs_get_mem((sizeof(__u32) *
+					 fs->super->s_inodes_count),
+					&icount->fullmap);
+		/* If we can't allocate, fall back */
+		if (!retval) {
+			memset(icount->fullmap, 0,
+			       sizeof(__u32) * fs->super->s_inodes_count);
+			goto successout;
+		}
+	}
+
 	retval = ext2fs_allocate_inode_bitmap(fs, "icount", &icount->single);
 	if (retval)
 		goto errout;
 
+
 	if (flags & EXT2_ICOUNT_OPT_INCREMENT) {
 		retval = ext2fs_allocate_inode_bitmap(fs, "icount_inc",
 						      &icount->multiple);
@@ -120,6 +138,7 @@ static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
 	} else
 		icount->multiple = 0;
 
+successout:
 	icount->magic = EXT2_ET_MAGIC_ICOUNT;
 	icount->num_inodes = fs->super->s_inodes_count;
 
@@ -256,6 +275,9 @@ errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
 	if (retval)
 		return retval;
 
+	if (icount->fullmap)
+		goto successout;
+
 	if (size) {
 		icount->size = size;
 	} else {
@@ -295,6 +317,7 @@ errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
 		icount->count = hint->count;
 	}
 
+successout:
 	*ret = icount;
 	return 0;
 
@@ -433,6 +456,11 @@ static errcode_t set_inode_count(ext2_icount_t icount, ext2_ino_t ino,
 		return 0;
 	}
 #endif
+	if (icount->fullmap) {
+		icount->fullmap[ino] = icount_16_xlate(count);
+		return 0;
+	}
+
 	el = get_icount_el(icount, ino, 1);
 	if (!el)
 		return EXT2_ET_NO_MEMORY;
@@ -463,6 +491,11 @@ static errcode_t get_inode_count(ext2_icount_t icount, ext2_ino_t ino,
 		return 0;
 	}
 #endif
+	if (icount->fullmap) {
+		*count = icount->fullmap[ino];
+		return 0;
+	}
+
 	el = get_icount_el(icount, ino, 0);
 	if (!el) {
 		*count = 0;
@@ -504,14 +537,16 @@ errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, __u16 *ret)
 	if (!ino || (ino > icount->num_inodes))
 		return EXT2_ET_INVALID_ARGUMENT;
 
-	if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
-		*ret = 1;
-		return 0;
-	}
-	if (icount->multiple &&
-	    !ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
-		*ret = 0;
-		return 0;
+	if (!icount->fullmap) {
+		if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
+			*ret = 1;
+			return 0;
+		}
+		if (icount->multiple &&
+			!ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
+			*ret = 0;
+			return 0;
+		}
 	}
 	get_inode_count(icount, ino, &val);
 	*ret = icount_16_xlate(val);
@@ -528,7 +563,10 @@ errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino,
 	if (!ino || (ino > icount->num_inodes))
 		return EXT2_ET_INVALID_ARGUMENT;
 
-	if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
+	if (icount->fullmap) {
+		curr_value = icount_16_xlate(icount->fullmap[ino] + 1);
+		icount->fullmap[ino] = curr_value;
+	} else if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
 		/*
 		 * If the existing count is 1, then we know there is
 		 * no entry in the list.
@@ -585,6 +623,16 @@ errcode_t ext2fs_icount_decrement(ext2_icount_t icount, ext2_ino_t ino,
 
 	EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
 
+	if (icount->fullmap) {
+		if (!icount->fullmap[ino])
+			return EXT2_ET_INVALID_ARGUMENT;
+
+		curr_value = --icount->fullmap[ino];
+		if (ret)
+			*ret = icount_16_xlate(curr_value);
+		return 0;
+	}
+
 	if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
 		ext2fs_unmark_inode_bitmap2(icount->single, ino);
 		if (icount->multiple)
@@ -626,6 +674,9 @@ errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino,
 
 	EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
 
+	if (icount->fullmap)
+		return set_inode_count(icount, ino, count);
+
 	if (count == 1) {
 		ext2fs_mark_inode_bitmap2(icount->single, ino);
 		if (icount->multiple)

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: improve performance of region_allocate()
  2017-08-23 23:21     ` [PATCH] e2fsck: improve performance of region_allocate() Theodore Ts'o
@ 2017-08-24  8:18       ` Jaco Kroon
  0 siblings, 0 replies; 10+ messages in thread
From: Jaco Kroon @ 2017-08-24  8:18 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: Doug Porter, linux-ext4, Omar Sandoval

Hi Ted,

On 24/08/2017 01:21, Theodore Ts'o wrote:
> Jaco, here's the version of your patch I plan to use.  Please take a
> look.
>
> The only real changes are mostly whitespace and formatting.
Variable name change + formatting :).
>
> By the way, it would be nice if you could send me permission to add a
>
> Signed-off-by: Jaco Kroon <jaco@uls.co.za>
Approved.

Kind Regards,
Jaco

>
> See http://elinux.org/Developer_Certificate_Of_Origin for an
> explanation of what this means.
>
> 						- Ted
>
> commit 320d436c006dc2550ebd01084b6e823f0cea8bc2
> Author: Jaco Kroon <jaco@uls.co.za>
> Date:   Wed Aug 23 13:54:25 2017 -0400
>
>      e2fsck: optimize out the use region_t in scan_extent_node()
>      
>      Since extents have a guarantee of being monotonically increasing we
>      merely need to check that block n+1 starts after block n.  This is a
>      simple enough check and we can perform this by calculating the next
>      expected logical block instead of using the region usage tracking data
>      abstraction.
>      
>      Signed-off-by: Theodore Ts'o <tytso@mit.edu>
>
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index 8044beed6..bc26beb99 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -96,7 +96,7 @@ struct process_block_struct {
>   	struct problem_context *pctx;
>   	ext2fs_block_bitmap fs_meta_blocks;
>   	e2fsck_t	ctx;
> -	region_t	region;
> +	blk64_t		next_lblock;
>   	struct extent_tree_info	eti;
>   };
>   
> @@ -2628,9 +2628,18 @@ static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
>   			  (1U << (21 - ctx->fs->super->s_log_block_size))))
>   			problem = PR_1_TOOBIG_DIR;
>   
> -		if (is_leaf && problem == 0 && extent.e_len > 0 &&
> -		    region_allocate(pb->region, extent.e_lblk, extent.e_len))
> -			problem = PR_1_EXTENT_COLLISION;
> +		if (is_leaf && problem == 0 && extent.e_len > 0) {
> +#if 0
> +			printf("extent_region(ino=%u, expect=%llu, "
> +			       "lblk=%llu, len=%u)\n",
> +			       pb->ino, pb->next_lblock,
> +			       extent.e_lblk, extent.e_len);
> +#endif
> +			if (extent.e_lblk < pb->next_lblock)
> +				problem = PR_1_EXTENT_COLLISION;
> +			else if (extent.e_lblk + extent.e_len > pb->next_lblock)
> +				pb->next_lblock = extent.e_lblk + extent.e_len;
> +		}
>   
>   		/*
>   		 * Uninitialized blocks in a directory?  Clear the flag and
> @@ -2963,13 +2972,7 @@ static void check_blocks_extents(e2fsck_t ctx, struct problem_context *pctx,
>   	memset(pb->eti.ext_info, 0, sizeof(pb->eti.ext_info));
>   	pb->eti.ino = pb->ino;
>   
> -	pb->region = region_create(0, info.max_lblk);
> -	if (!pb->region) {
> -		ext2fs_extent_free(ehandle);
> -		fix_problem(ctx, PR_1_EXTENT_ALLOC_REGION_ABORT, pctx);
> -		ctx->flags |= E2F_FLAG_ABORT;
> -		return;
> -	}
> +	pb->next_lblock = 0;
>   
>   	eof_lblk = ((EXT2_I_SIZE(inode) + fs->blocksize - 1) >>
>   		EXT2_BLOCK_SIZE_BITS(fs->super)) - 1;
> @@ -2982,8 +2985,6 @@ static void check_blocks_extents(e2fsck_t ctx, struct problem_context *pctx,
>   				   "check_blocks_extents");
>   		pctx->errcode = 0;
>   	}
> -	region_free(pb->region);
> -	pb->region = NULL;
>   	ext2fs_extent_free(ehandle);
>   
>   	/* Rebuild unless it's a dir and we're rehashing it */
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: add optimization for heavily hard-linked file systems
       [not found]           ` <48660f5d-2e53-f0ff-e0e5-169a1d2d631f@uls.co.za>
@ 2017-08-26 15:33             ` Theodore Ts'o
  2017-09-11 10:19               ` Jaco Kroon
  0 siblings, 1 reply; 10+ messages in thread
From: Theodore Ts'o @ 2017-08-26 15:33 UTC (permalink / raw)
  To: Jaco Kroon; +Cc: Doug Porter, linux-ext4, Omar Sandoval

On Thu, Aug 24, 2017 at 11:08:52AM +0200, Jaco Kroon wrote:
> > +#define E2F_OPT_ICOUNT_FULLMAP	0x20000 /* don't optimize extents */
> description here seems wrong?  Perhaps "use fullmap for inode link count" ?

Oops, fixed.

> > --- a/e2fsck/pass1.c
> > +++ b/e2fsck/pass1.c
> > @@ -711,6 +711,7 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
> >   	errcode_t		retval;
> >   	char			*tdb_dir;
> >   	int			enable;
> > +	int			full_map;
> >   	*ret = 0;
> > @@ -734,6 +735,8 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
> >   	}
> >   	e2fsck_set_bitmap_type(ctx->fs, EXT2FS_BMAP64_RBTREE, icount_name,
> >   			       &save_type);
> > +	if (ctx->options & E2F_OPT_ICOUNT_FULLMAP)
> > +		flags |= EXT2_ICOUNT_OPT_FULLMAP;
> >   	retval = ext2fs_create_icount2(ctx->fs, flags, 0, hint, ret);
> >   	ctx->fs->default_bitmap_type = save_type;
> >   	return retval;
> This isn't used for pass1 (I did originally use full_map either way then
> decided to only use it for pass2) - and we to receive flags coming into this
> function - shouldn't this perhaps rather go into pass2.c (e2fsck_pass2
> function to be more specific)?  This really is cosmetic imho.

The e2fsck_setup_icount() function is used for both pass1 and pass2.

> > diff --git a/e2fsck/unix.c b/e2fsck/unix.c
> > index ff961483b..939862f1a 100644
> > --- a/e2fsck/unix.c
> > +++ b/e2fsck/unix.c
> > @@ -709,9 +709,18 @@ static void parse_extended_opts(e2fsck_t ctx, const char *opts)
> >   		} else if (strcmp(token, "nodiscard") == 0) {
> >   			ctx->options &= ~E2F_OPT_DISCARD;
> >   			continue;
> > +		} else if (strcmp(token, "optimize_extents") == 0) {
> > +			ctx->options &= ~E2F_OPT_NOOPT_EXTENTS;
> > +			continue;
> unrelated, but makes sense :).

Yes, this was a cleanup I was doing as I weant.

> > +		if (!retval) {
> > +			memset(icount->fullmap, 0,
> > +			*sizeof(__u32)*  * fs->super->s_inodes_count); <-------
> this of course should be sizeof(__u16), or sizeof(*count->fullmap) which
> will track the type pointed to.  It might be better practice to store
> "sizeof(*icount->fullmap) * fs->super->s_inodes_count" to a variable and
> just re-use it.

Good point.  Fixed.

> >   		goto errout;
> > +
> Extra line can be removed.
> >   	if (flags & EXT2_ICOUNT_OPT_INCREMENT) {

Removed.

Thanks for taking a look at the patch!

						- Ted

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] e2fsck: add optimization for heavily hard-linked file systems
  2017-08-26 15:33             ` Theodore Ts'o
@ 2017-09-11 10:19               ` Jaco Kroon
  0 siblings, 0 replies; 10+ messages in thread
From: Jaco Kroon @ 2017-09-11 10:19 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: Doug Porter, linux-ext4, Omar Sandoval

Hi Theodore,

I got severely side-tracked after the previous look at this.

With the mentions below, ACK.

Kind Regards,
Jaco


On 26/08/2017 17:33, Theodore Ts'o wrote:
> On Thu, Aug 24, 2017 at 11:08:52AM +0200, Jaco Kroon wrote:
>>> +#define E2F_OPT_ICOUNT_FULLMAP	0x20000 /* don't optimize extents */
>> description here seems wrong?  Perhaps "use fullmap for inode link count" ?
> Oops, fixed.
>
>>> --- a/e2fsck/pass1.c
>>> +++ b/e2fsck/pass1.c
>>> @@ -711,6 +711,7 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
>>>    	errcode_t		retval;
>>>    	char			*tdb_dir;
>>>    	int			enable;
>>> +	int			full_map;
>>>    	*ret = 0;
>>> @@ -734,6 +735,8 @@ extern errcode_t e2fsck_setup_icount(e2fsck_t ctx, const char *icount_name,
>>>    	}
>>>    	e2fsck_set_bitmap_type(ctx->fs, EXT2FS_BMAP64_RBTREE, icount_name,
>>>    			       &save_type);
>>> +	if (ctx->options & E2F_OPT_ICOUNT_FULLMAP)
>>> +		flags |= EXT2_ICOUNT_OPT_FULLMAP;
>>>    	retval = ext2fs_create_icount2(ctx->fs, flags, 0, hint, ret);
>>>    	ctx->fs->default_bitmap_type = save_type;
>>>    	return retval;
>> This isn't used for pass1 (I did originally use full_map either way then
>> decided to only use it for pass2) - and we to receive flags coming into this
>> function - shouldn't this perhaps rather go into pass2.c (e2fsck_pass2
>> function to be more specific)?  This really is cosmetic imho.
> The e2fsck_setup_icount() function is used for both pass1 and pass2.
>
>>> diff --git a/e2fsck/unix.c b/e2fsck/unix.c
>>> index ff961483b..939862f1a 100644
>>> --- a/e2fsck/unix.c
>>> +++ b/e2fsck/unix.c
>>> @@ -709,9 +709,18 @@ static void parse_extended_opts(e2fsck_t ctx, const char *opts)
>>>    		} else if (strcmp(token, "nodiscard") == 0) {
>>>    			ctx->options &= ~E2F_OPT_DISCARD;
>>>    			continue;
>>> +		} else if (strcmp(token, "optimize_extents") == 0) {
>>> +			ctx->options &= ~E2F_OPT_NOOPT_EXTENTS;
>>> +			continue;
>> unrelated, but makes sense :).
> Yes, this was a cleanup I was doing as I weant.
>
>>> +		if (!retval) {
>>> +			memset(icount->fullmap, 0,
>>> +			*sizeof(__u32)*  * fs->super->s_inodes_count); <-------
>> this of course should be sizeof(__u16), or sizeof(*count->fullmap) which
>> will track the type pointed to.  It might be better practice to store
>> "sizeof(*icount->fullmap) * fs->super->s_inodes_count" to a variable and
>> just re-use it.
> Good point.  Fixed.
>
>>>    		goto errout;
>>> +
>> Extra line can be removed.
>>>    	if (flags & EXT2_ICOUNT_OPT_INCREMENT) {
> Removed.
>
> Thanks for taking a look at the patch!
>
> 						- Ted

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-09-11 10:19 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-19  1:16 [PATCH] e2fsck: improve performance of region_allocate() Doug Porter
2017-08-22  2:29 ` Theodore Ts'o
2017-08-22  9:36   ` Jaco Kroon
2017-08-22 12:45     ` Theodore Ts'o
2017-08-22 13:47       ` *** SPAM *** " Jaco Kroon
2017-08-23 23:23         ` [PATCH] e2fsck: add optimization for heavily hard-linked file systems Theodore Ts'o
     [not found]           ` <48660f5d-2e53-f0ff-e0e5-169a1d2d631f@uls.co.za>
2017-08-26 15:33             ` Theodore Ts'o
2017-09-11 10:19               ` Jaco Kroon
2017-08-23 23:21     ` [PATCH] e2fsck: improve performance of region_allocate() Theodore Ts'o
2017-08-24  8:18       ` Jaco Kroon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.