linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
To: Theodore Ts'o <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org,
	Harshad Shirwadkar <harshadshirwadkar@gmail.com>,
	Wang Shilong <wshilong@ddn.com>,
	Andreas Dilger <adilger.kernel@dilger.ca>, Li Xi <lixi@ddn.com>,
	Ritesh Harjani <ritesh.list@gmail.com>
Subject: [RFCv1 39/72] e2fsck: merge dx_dir_info after threads finish
Date: Mon,  7 Nov 2022 17:51:27 +0530	[thread overview]
Message-ID: <ce0d19896608c68e0e2adabc754961f9ffeb8e27.1667822611.git.ritesh.list@gmail.com> (raw)
In-Reply-To: <cover.1667822611.git.ritesh.list@gmail.com>

From: Wang Shilong <wshilong@ddn.com>

Merge properly.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
---
 e2fsck/dx_dirinfo.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
 e2fsck/e2fsck.h     |  1 +
 e2fsck/pass1.c      | 23 ++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/e2fsck/dx_dirinfo.c b/e2fsck/dx_dirinfo.c
index caca3e30..91954572 100644
--- a/e2fsck/dx_dirinfo.c
+++ b/e2fsck/dx_dirinfo.c
@@ -5,6 +5,7 @@
  * under the terms of the GNU Public License.
  */
 
+#include <assert.h>
 #include "config.h"
 #include "e2fsck.h"
 
@@ -79,6 +80,69 @@ void e2fsck_add_dx_dir(e2fsck_t ctx, ext2_ino_t ino, struct ext2_inode *inode,
 				       "dx_block info array");
 }
 
+/*
+ * Merge two sorted dir info to @dest
+ */
+void e2fsck_merge_dx_dir(e2fsck_t global_ctx, e2fsck_t thread_ctx)
+{
+	struct dx_dir_info *src_array = thread_ctx->dx_dir_info;
+	struct dx_dir_info *dest_array = global_ctx->dx_dir_info;
+	size_t size_dx_info = sizeof(struct dx_dir_info);
+	ext2_ino_t size = global_ctx->dx_dir_info_size;
+	ext2_ino_t src_count = thread_ctx->dx_dir_info_count;
+	ext2_ino_t dest_count = global_ctx->dx_dir_info_count;
+	ext2_ino_t total_count = src_count + dest_count;
+	struct dx_dir_info *array;
+	struct dx_dir_info *array_ptr;
+	ext2_ino_t src_index = 0, dest_index = 0;
+
+	if (thread_ctx->dx_dir_info_count == 0)
+		return;
+
+	if (size < total_count)
+		size = total_count;
+
+	array = e2fsck_allocate_memory(global_ctx, size * size_dx_info,
+				       "directory map");
+	array_ptr = array;
+	/*
+	 * This can be improved by binary search and memcpy, but codes
+	 * would be more complex. And if the groups distributed to each
+	 * thread are strided, this implementation won't be too bad
+	 * comparing to the optimiztion.
+	 */
+	while (src_index < src_count || dest_index < dest_count) {
+		if (src_index >= src_count) {
+			memcpy(array_ptr, &dest_array[dest_index],
+			       (dest_count - dest_index) * size_dx_info);
+			break;
+		}
+		if (dest_index >= dest_count) {
+			memcpy(array_ptr, &src_array[src_index],
+			       (src_count - src_index) * size_dx_info);
+			break;
+		}
+		if (src_array[src_index].ino < dest_array[dest_index].ino) {
+			*array_ptr = src_array[src_index];
+			src_index++;
+		} else {
+			assert(src_array[src_index].ino >
+			       dest_array[dest_index].ino);
+			*array_ptr = dest_array[dest_index];
+			dest_index++;
+		}
+		array_ptr++;
+	}
+
+	if (global_ctx->dx_dir_info)
+		ext2fs_free_mem(&global_ctx->dx_dir_info);
+	if (thread_ctx->dx_dir_info)
+		ext2fs_free_mem(&thread_ctx->dx_dir_info);
+	global_ctx->dx_dir_info = array;
+	global_ctx->dx_dir_info_size = size;
+	global_ctx->dx_dir_info_count = total_count;
+}
+
 /*
  * get_dx_dir_info() --- given an inode number, try to find the directory
  * information entry for it.
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 9b0f5067..26c3b8a5 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -578,6 +578,7 @@ extern int e2fsck_dir_info_get_parent(e2fsck_t ctx, ext2_ino_t ino,
 				      ext2_ino_t *parent);
 extern int e2fsck_dir_info_get_dotdot(e2fsck_t ctx, ext2_ino_t ino,
 				      ext2_ino_t *dotdot);
+extern void e2fsck_merge_dx_dir(e2fsck_t global_ctx, e2fsck_t thread_ctx);
 
 /* dx_dirinfo.c */
 extern void e2fsck_add_dx_dir(e2fsck_t ctx, ext2_ino_t ino,
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 8b502307..f998590e 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -2341,6 +2341,22 @@ static void e2fsck_pass1_merge_dir_info(e2fsck_t global_ctx, e2fsck_t thread_ctx
 			      global_ctx->dir_info);
 }
 
+static void e2fsck_pass1_merge_dx_dir(e2fsck_t global_ctx, e2fsck_t thread_ctx)
+{
+	if (thread_ctx->dx_dir_info == NULL)
+		return;
+
+	if (global_ctx->dx_dir_info == NULL) {
+		global_ctx->dx_dir_info = thread_ctx->dx_dir_info;
+		global_ctx->dx_dir_info_size = thread_ctx->dx_dir_info_size;
+		global_ctx->dx_dir_info_count = thread_ctx->dx_dir_info_count;
+		thread_ctx->dx_dir_info = NULL;
+		return;
+	}
+
+	e2fsck_merge_dx_dir(global_ctx, thread_ctx);
+}
+
 static inline errcode_t
 e2fsck_pass1_merge_icount(ext2_icount_t *dest_icount,
 			  ext2_icount_t *src_icount)
@@ -2386,6 +2402,7 @@ static int e2fsck_pass1_thread_join_one(e2fsck_t global_ctx, e2fsck_t thread_ctx
 	ext2_filsys global_fs = global_ctx->fs;
 	ext2fs_inode_bitmap inode_bad_map = global_ctx->inode_bad_map;
 	struct dir_info_db *dir_info = global_ctx->dir_info;
+	struct dx_dir_info *dx_dir_info = global_ctx->dx_dir_info;
 	ext2fs_inode_bitmap inode_used_map = global_ctx->inode_used_map;
 	ext2fs_inode_bitmap inode_dir_map = global_ctx->inode_dir_map;
 	ext2fs_inode_bitmap inode_bb_map = global_ctx->inode_bb_map;
@@ -2415,6 +2432,8 @@ static int e2fsck_pass1_thread_join_one(e2fsck_t global_ctx, e2fsck_t thread_ctx
 	__u32 fs_fragmented = global_ctx->fs_fragmented;
 	__u32 fs_fragmented_dir = global_ctx->fs_fragmented_dir;
 	__u32 large_files = global_ctx->large_files;
+	ext2_ino_t dx_dir_info_size = global_ctx->dx_dir_info_size;
+	ext2_ino_t dx_dir_info_count = global_ctx->dx_dir_info_count;
 
 #ifdef HAVE_SETJMP_H
 	jmp_buf old_jmp;
@@ -2439,6 +2458,10 @@ static int e2fsck_pass1_thread_join_one(e2fsck_t global_ctx, e2fsck_t thread_ctx
 	global_ctx->block_metadata_map = block_metadata_map;
 	global_ctx->dir_info = dir_info;
 	e2fsck_pass1_merge_dir_info(global_ctx, thread_ctx);
+	global_ctx->dx_dir_info = dx_dir_info;
+	global_ctx->dx_dir_info_count = dx_dir_info_count;
+	global_ctx->dx_dir_info_size = dx_dir_info_size;
+	e2fsck_pass1_merge_dx_dir(global_ctx, thread_ctx);
 	global_ctx->inode_count = inode_count;
 	global_ctx->inode_link_info = inode_link_info;
 	global_ctx->fs_directory_count += fs_directory_count;
-- 
2.37.3


  parent reply	other threads:[~2022-11-07 12:26 UTC|newest]

Thread overview: 104+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-07 12:20 [RFCv1 00/72] e2fsprogs: Parallel fsck support Ritesh Harjani (IBM)
2022-11-07 12:20 ` [RFCv1 01/72] e2fsck: Fix unbalanced mutex unlock for BOUNCE_MTX Ritesh Harjani (IBM)
2022-11-17 16:02   ` Theodore Ts'o
2022-11-17 18:45     ` Ritesh Harjani (IBM)
2022-11-18 10:34   ` Andreas Dilger
2022-11-18 11:37     ` Ritesh Harjani (IBM)
2022-11-18 13:20       ` Andreas Dilger
2022-11-19  3:46         ` Ritesh Harjani (IBM)
2023-01-24 16:40   ` Theodore Ts'o
2022-11-07 12:20 ` [RFCv1 02/72] gen_bitmaps: Fix ext2fs_compare_generic_bmap/bitmap logic Ritesh Harjani (IBM)
2022-11-23  5:04   ` Andreas Dilger
2023-01-24 16:59     ` Theodore Ts'o
2022-11-07 12:20 ` [RFCv1 03/72] blkmap64_ba: Add common helper for bits size calculation Ritesh Harjani (IBM)
2022-11-18 10:40   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 04/72] badblocks: Remove unused badblocks_flags Ritesh Harjani (IBM)
2022-11-18 13:26   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 05/72] badblocks: Add badblocks merge logic Ritesh Harjani (IBM)
2022-11-18 13:31   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 06/72] dblist: add dblist " Ritesh Harjani (IBM)
2022-11-18 13:34   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 07/72] libext2fs: Add rbtree bitmap " Ritesh Harjani (IBM)
2022-11-07 12:20 ` [RFCv1 08/72] libext2fs: Add bitmaps merge ops Ritesh Harjani (IBM)
2022-11-18 13:36   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 09/72] libext2fs: Add flush cleanup API Ritesh Harjani (IBM)
2022-11-18 13:39   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 10/72] libext2fs: merge icounts after thread finishes Ritesh Harjani (IBM)
2022-11-18 13:40   ` Andreas Dilger
2022-11-07 12:20 ` [RFCv1 11/72] libext2fs: merge quota context after threads finish Ritesh Harjani (IBM)
2022-11-18 13:42   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 12/72] libext2fs: dupfs: Add fs clone & merge api Ritesh Harjani (IBM)
2022-11-18 19:46   ` Andreas Dilger
2022-11-19  5:02     ` Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 13/72] tst_badblocks: Add unit test to verify badblocks list " Ritesh Harjani (IBM)
2022-12-12 20:35   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 14/72] tst_bitmaps_standalone: Add copy and merge bitmaps test Ritesh Harjani (IBM)
2022-12-12 20:40   ` Andreas Dilger
2022-12-14  5:12     ` Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 15/72] tst_bitmaps_pthread: Add merge bitmaps test using pthreads Ritesh Harjani (IBM)
2022-12-14 21:15   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 16/72] tst_libext2fs_pthread: Add libext2fs merge/clone unit tests Ritesh Harjani (IBM)
2022-12-14 21:17   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 17/72] libext2fs: Add support for ext2fs_test_block_bitmap_range2_valid() Ritesh Harjani (IBM)
2022-12-14 21:21   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 18/72] libext2fs: Add support to get average group count Ritesh Harjani (IBM)
2022-12-14 21:24   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 19/72] libext2fs: Misc fixes for struct_ext2_filsys Ritesh Harjani (IBM)
2022-12-14 21:22   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 20/72] libext2fs: avoid too much memory allocation in case fs_num_threads Ritesh Harjani (IBM)
2022-11-18 13:37   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 21/72] e2fsck: add -m option for multithread Ritesh Harjani (IBM)
2022-12-14 21:32   ` Andreas Dilger
2022-11-07 12:21 ` [RFCv1 22/72] e2fsck: copy context when using multi-thread fsck Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 23/72] e2fsck: create logs for multi-threads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 24/72] e2fsck: configure one pfsck thread Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 25/72] e2fsck: Add e2fsck_pass1_thread_join return value Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 26/72] e2fsck: Use merge/clone apis of libext2fs Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 27/72] e2fsck: Add e2fsck_pass1_merge_bitmap() api Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 28/72] e2fsck: Add asserts in open_channel_fs Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 29/72] e2fsck: add start/end group for thread Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 30/72] e2fsck: split groups to different threads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 31/72] e2fsck: print thread log properly Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 32/72] e2fsck: do not change global variables Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 33/72] e2fsck: optimize the inserting of dir_info_db Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 34/72] e2fsck: merge dir_info after thread finishes Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 35/72] e2fsck: rbtree bitmap for dir Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 36/72] e2fsck: merge icounts after thread finishes Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 37/72] e2fsck: add debug codes for multiple threads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 38/72] e2fsck: merge counts after threads finish Ritesh Harjani (IBM)
2022-11-07 12:21 ` Ritesh Harjani (IBM) [this message]
2022-11-07 12:21 ` [RFCv1 40/72] e2fsck: merge dirs_to_hash when " Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 41/72] e2fsck: merge context flags properly Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 42/72] e2fsck: merge quota context after threads finish Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 43/72] e2fsck: serialize fix operations Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 44/72] e2fsck: move some fixes out of parallel pthreads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 45/72] e2fsck: split and merge invalid bitmaps Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 46/72] e2fsck: merge EA blocks properly Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 47/72] e2fsck: kickoff mutex lock for block found map Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 48/72] e2fsck: allow admin specify number of threads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 49/72] e2fsck: adjust " Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 50/72] e2fsck: fix readahead for pfsck of pass1 Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 51/72] e2fsck: merge options after threads finish Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 52/72] e2fsck: reset lost_and_found " Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 53/72] e2fsck: merge extent depth count " Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 54/72] e2fsck: simplify e2fsck context merging codes Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 55/72] e2fsck: set E2F_FLAG_ALLOC_OK after threads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 56/72] e2fsck: wait fix thread finish before checking Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 57/72] e2fsck: cleanup e2fsck_pass1_thread_join() Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 58/72] e2fsck: make default smallest RA size to 1M Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 59/72] e2fsck: update mmp block in one thread Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 60/72] e2fsck: reset @inodes_to_rebuild if restart Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 61/72] tests: add pfsck test Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 62/72] e2fsck: fix memory leaks with pfsck enabled Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 63/72] e2fsck: misc cleanups for pfsck Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 64/72] e2fsck: propagate number of threads Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 65/72] e2fsck: Annotating fields in e2fsck_struct Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 66/72] e2fsck: merge casefolded dir lists after thread finish Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 67/72] sec: support encrypted files handling in pfsck mode Ritesh Harjani (IBM)
2022-11-07 19:22   ` Eric Biggers
2022-11-07 12:21 ` [RFCv1 68/72] e2fsck: Fix io->align assert check Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 69/72] e2fsck: Fix double free of inodes_to_process Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 70/72] e2fsck: Fix and simplify update_mmp in case of pfsck Ritesh Harjani (IBM)
2022-11-07 12:21 ` [RFCv1 71/72] e2fsck: Make threads call log_out after pthread_join Ritesh Harjani (IBM)
2022-11-07 12:22 ` [RFCv1 72/72] tests/f_multithread: Fix f_multithread related tests Ritesh Harjani (IBM)
     [not found] ` <B4ED1C86-D3EC-4A0A-97B3-CFCB46617E1A@dilger.ca>
2022-11-19  5:39   ` [RFCv1 00/72] e2fsprogs: Parallel fsck support Ritesh Harjani

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ce0d19896608c68e0e2adabc754961f9ffeb8e27.1667822611.git.ritesh.list@gmail.com \
    --to=ritesh.list@gmail.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=harshadshirwadkar@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=lixi@ddn.com \
    --cc=tytso@mit.edu \
    --cc=wshilong@ddn.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).