linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Fwd: [PATCH] [RFC] ext2fs: parallel bitmap loading
       [not found] <CA+OwuSj-WjaPbfOSDpg5Mz2tm_W0p40N-L=meiWEDZ6j1ccq=Q@mail.gmail.com>
@ 2020-09-04 21:34 ` Andreas Dilger
  2020-09-16 21:03   ` Theodore Y. Ts'o
  0 siblings, 1 reply; 4+ messages in thread
From: Andreas Dilger @ 2020-09-04 21:34 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Wang Shilong

[-- Attachment #1: Type: text/plain, Size: 10482 bytes --]

This is a patch that is part of the parallel e2fsck series that Shilong is working on,
and does not work by itself, but was requested during discussion on the ext4
concall today.


Cheers, Andreas
========================================

From dba9e324999727e6cc2ca158cc01f0053a701db9 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong@ddn.com>
Date: Thu, 3 Sep 2020 10:51:49 +0800
Subject: [PATCH] RFC ext2fs: parallel bitmap loading

In our benchmarking for PiB size filesystem, pass5 takes
10446s to finish and 99.5% of time taken on reading bitmaps.

It makes sense to read the bitmaps using multiple threads,
a quickly benchmark show 10446s to 883s with 64 threads.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
---
 lib/ext2fs/rw_bitmaps.c | 260 ++++++++++++++++++++++++++++++++++------
 1 file changed, 224 insertions(+), 36 deletions(-)

diff --git a/lib/ext2fs/rw_bitmaps.c b/lib/ext2fs/rw_bitmaps.c
index d80c9eb8..323949f5 100644
--- a/lib/ext2fs/rw_bitmaps.c
+++ b/lib/ext2fs/rw_bitmaps.c
@@ -23,6 +23,7 @@
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
+#include <pthread.h>

 #include "ext2_fs.h"
 #include "ext2fs.h"
@@ -205,22 +206,12 @@ static int bitmap_tail_verify(unsigned char *bitmap, int first, int last)
 	return 1;
 }

-static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+static errcode_t read_bitmaps_range_prepare(ext2_filsys fs, int do_inode, int do_block)
 {
-	dgrp_t i;
-	char *block_bitmap = 0, *inode_bitmap = 0;
-	char *buf;
 	errcode_t retval;
 	int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
 	int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
-	int tail_flags = 0;
-	int csum_flag;
-	unsigned int	cnt;
-	blk64_t	blk;
-	blk64_t	blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
-	blk64_t   blk_cnt;
-	ext2_ino_t ino_itr = 1;
-	ext2_ino_t ino_cnt;
+	char *buf;

 	EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);

@@ -230,11 +221,10 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)

 	fs->write_bitmaps = ext2fs_write_bitmaps;

-	csum_flag = ext2fs_has_group_desc_csum(fs);
-
 	retval = ext2fs_get_mem(strlen(fs->device_name) + 80, &buf);
 	if (retval)
 		return retval;
+
 	if (do_block) {
 		if (fs->block_map)
 			ext2fs_free_block_bitmap(fs->block_map);
@@ -243,11 +233,8 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		retval = ext2fs_allocate_block_bitmap(fs, buf, &fs->block_map);
 		if (retval)
 			goto cleanup;
-		retval = io_channel_alloc_buf(fs->io, 0, &block_bitmap);
-		if (retval)
-			goto cleanup;
-	} else
-		block_nbytes = 0;
+	}
+
 	if (do_inode) {
 		if (fs->inode_map)
 			ext2fs_free_inode_bitmap(fs->inode_map);
@@ -256,12 +243,60 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		retval = ext2fs_allocate_inode_bitmap(fs, buf, &fs->inode_map);
 		if (retval)
 			goto cleanup;
+	}
+	ext2fs_free_mem(&buf);
+
+	return retval;
+
+cleanup:
+	if (do_block) {
+		ext2fs_free_block_bitmap(fs->block_map);
+		fs->block_map = 0;
+	}
+	if (do_inode) {
+		ext2fs_free_inode_bitmap(fs->inode_map);
+		fs->inode_map = 0;
+	}
+	if (buf)
+		ext2fs_free_mem(&buf);
+	return retval;
+}
+
+static errcode_t read_bitmaps_range_start(ext2_filsys fs, int do_inode, int do_block,
+					  dgrp_t start, dgrp_t end, pthread_mutex_t *mutex)
+{
+	dgrp_t i;
+	char *block_bitmap = 0, *inode_bitmap = 0;
+	char *buf;
+	errcode_t retval;
+	int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
+	int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
+	int tail_flags = 0;
+	int csum_flag;
+	unsigned int	cnt;
+	blk64_t	blk;
+	blk64_t	blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
+	blk64_t   blk_cnt;
+	ext2_ino_t ino_itr = 1;
+	ext2_ino_t ino_cnt;
+
+	csum_flag = ext2fs_has_group_desc_csum(fs);
+
+	if (do_block) {
+		retval = io_channel_alloc_buf(fs->io, 0, &block_bitmap);
+		if (retval)
+			goto cleanup;
+	} else {
+		block_nbytes = 0;
+	}
+
+	if (do_inode) {
 		retval = io_channel_alloc_buf(fs->io, 0, &inode_bitmap);
 		if (retval)
 			goto cleanup;
-	} else
+	} else {
 		inode_nbytes = 0;
-	ext2fs_free_mem(&buf);
+	}

 	if (fs->flags & EXT2_FLAG_IMAGE_FILE) {
 		blk = (ext2fs_le32_to_cpu(fs->image_header->offset_inodemap) / fs->blocksize);
@@ -303,7 +338,9 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		goto success_cleanup;
 	}

-	for (i = 0; i < fs->group_desc_count; i++) {
+	blk_itr += (block_nbytes << 3) * start;
+	ino_itr += (inode_nbytes << 3) * start;
+	for (i = start; i <= end; i++) {
 		if (block_bitmap) {
 			blk = ext2fs_block_bitmap_loc(fs, i);
 			if ((csum_flag &&
@@ -333,8 +370,12 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			} else
 				memset(block_bitmap, 0, block_nbytes);
 			cnt = block_nbytes << 3;
+			if (mutex)
+				pthread_mutex_lock(mutex);
 			retval = ext2fs_set_block_bitmap_range2(fs->block_map,
 					       blk_itr, cnt, block_bitmap);
+			if (mutex)
+				pthread_mutex_unlock(mutex);
 			if (retval)
 				goto cleanup;
 			blk_itr += block_nbytes << 3;
@@ -369,29 +410,28 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			} else
 				memset(inode_bitmap, 0, inode_nbytes);
 			cnt = inode_nbytes << 3;
+			if (mutex)
+				pthread_mutex_lock(mutex);
 			retval = ext2fs_set_inode_bitmap_range2(fs->inode_map,
 					       ino_itr, cnt, inode_bitmap);
+			if (mutex)
+				pthread_mutex_unlock(mutex);
 			if (retval)
 				goto cleanup;
 			ino_itr += inode_nbytes << 3;
 		}
 	}

-	/* Mark group blocks for any BLOCK_UNINIT groups */
-	if (do_block) {
-		retval = mark_uninit_bg_group_blocks(fs);
-		if (retval)
-			goto cleanup;
-	}
-
 success_cleanup:
-	if (inode_bitmap) {
-		ext2fs_free_mem(&inode_bitmap);
-		fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
-	}
-	if (block_bitmap) {
-		ext2fs_free_mem(&block_bitmap);
-		fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+	if (start == 0 && end == fs->group_desc_count - 1) {
+		if (inode_bitmap) {
+			ext2fs_free_mem(&inode_bitmap);
+			fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+		}
+		if (block_bitmap) {
+			ext2fs_free_mem(&block_bitmap);
+			fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+		}
 	}
 	fs->flags |= tail_flags;
 	return 0;
@@ -412,6 +452,154 @@ cleanup:
 	if (buf)
 		ext2fs_free_mem(&buf);
 	return retval;
+
+}
+
+static errcode_t read_bitmaps_range_end(ext2_filsys fs, int do_inode, int do_block)
+{
+	errcode_t retval = 0;
+
+	/* Mark group blocks for any BLOCK_UNINIT groups */
+	if (do_block) {
+		retval = mark_uninit_bg_group_blocks(fs);
+		if (retval)
+			goto cleanup;
+	}
+
+	return retval;
+cleanup:
+	if (do_block) {
+		ext2fs_free_block_bitmap(fs->block_map);
+		fs->block_map = 0;
+	}
+	if (do_inode) {
+		ext2fs_free_inode_bitmap(fs->inode_map);
+		fs->inode_map = 0;
+	}
+	return retval;
+}
+
+static errcode_t read_bitmaps_range(ext2_filsys fs, int do_inode, int do_block,
+				    dgrp_t start, dgrp_t end)
+{
+	errcode_t retval;
+
+	retval = read_bitmaps_range_prepare(fs, do_inode, do_block);
+	if (retval)
+		return retval;
+
+	retval = read_bitmaps_range_start(fs, do_inode, do_block, start, end, NULL);
+	if (retval)
+		return retval;
+
+	return read_bitmaps_range_end(fs, do_inode, do_block);
+}
+
+struct read_bitmaps_thread_info {
+	ext2_filsys	rbt_fs;
+	int 		rbt_do_inode;
+	int		rbt_do_block;
+	dgrp_t		rbt_grp_start;
+	dgrp_t		rbt_grp_end;
+	errcode_t	rbt_retval;
+	pthread_mutex_t *rbt_mutex;
+};
+
+static void* read_bitmaps_thread(void *data)
+{
+	struct read_bitmaps_thread_info *rbt = data;
+
+	rbt->rbt_retval = read_bitmaps_range_start(rbt->rbt_fs,
+				rbt->rbt_do_inode, rbt->rbt_do_block,
+				rbt->rbt_grp_start, rbt->rbt_grp_end,
+				rbt->rbt_mutex);
+	return NULL;
+}
+
+static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+{
+	pthread_attr_t	attr;
+	int num_threads = fs->fs_num_threads;
+	pthread_t *thread_ids = NULL;
+	struct read_bitmaps_thread_info *thread_infos = NULL;
+	pthread_mutex_t rbt_mutex = PTHREAD_MUTEX_INITIALIZER;
+	errcode_t retval;
+	errcode_t rc;
+	dgrp_t average_group;
+	int i;
+
+	if (num_threads <= 1 || (fs->flags & EXT2_FLAG_IMAGE_FILE))
+		return read_bitmaps_range(fs, do_inode, do_block, 0, fs->group_desc_count - 1);
+
+	retval = pthread_attr_init(&attr);
+	if (retval)
+		return retval;
+
+	thread_ids = calloc(sizeof(pthread_t), num_threads);
+	if (!thread_ids)
+		return -ENOMEM;
+
+	thread_infos = calloc(sizeof(struct read_bitmaps_thread_info),
+				num_threads);
+	if (!thread_infos)
+		goto out;
+
+	average_group = fs->group_desc_count / num_threads;
+	if (average_group == 0)
+		average_group = 1;
+
+	retval = read_bitmaps_range_prepare(fs, do_inode, do_block);
+	if (retval)
+		goto out;
+
+	fprintf(stdout, "Multiple threads triggered to read bitmaps\n");
+	for (i = 0; i < num_threads; i++) {
+		thread_infos[i].rbt_fs = fs;
+		thread_infos[i].rbt_do_inode = do_inode;
+		thread_infos[i].rbt_do_block = do_block;
+		thread_infos[i].rbt_mutex = &rbt_mutex;
+		if (i == 0)
+			thread_infos[i].rbt_grp_start = 0;
+		else
+			thread_infos[i].rbt_grp_start = average_group * i + 1;
+
+		if (i == num_threads - 1)
+			thread_infos[i].rbt_grp_end = fs->group_desc_count - 1;
+		else
+			thread_infos[i].rbt_grp_end = average_group * (i + 1);
+		retval = pthread_create(&thread_ids[i], &attr,
+					&read_bitmaps_thread, &thread_infos[i]);
+		if (retval)
+			break;
+	}
+	for (i = 0; i < num_threads; i++) {
+		if (!thread_ids[i])
+			break;
+		rc = pthread_join(thread_ids[i], NULL);
+		if (rc && !retval)
+			retval = rc;
+		rc = thread_infos[i].rbt_retval;
+		if (rc && !retval)
+			retval = rc;
+	}
+out:
+	rc = pthread_attr_destroy(&attr);
+	if (rc && !retval)
+		retval = rc;
+	free(thread_infos);
+	free(thread_ids);
+
+	if (!retval)
+		retval = read_bitmaps_range_end(fs, do_inode, do_block);
+
+	if (!retval) {
+		if (do_inode)
+			fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+		if (do_block)
+			fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+	}
+
+	return retval;
 }

 errcode_t ext2fs_read_inode_bitmap(ext2_filsys fs)
--
2.25.4

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 873 bytes --]

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: Fwd: [PATCH] [RFC] ext2fs: parallel bitmap loading
  2020-09-04 21:34 ` Fwd: [PATCH] [RFC] ext2fs: parallel bitmap loading Andreas Dilger
@ 2020-09-16 21:03   ` Theodore Y. Ts'o
  2020-09-17  1:34     ` Andreas Dilger
  0 siblings, 1 reply; 4+ messages in thread
From: Theodore Y. Ts'o @ 2020-09-16 21:03 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Ext4 Developers List, Wang Shilong, saranyamohan, harshads

On Fri, Sep 04, 2020 at 03:34:26PM -0600, Andreas Dilger wrote:
> This is a patch that is part of the parallel e2fsck series that Shilong is working on,
> and does not work by itself, but was requested during discussion on the ext4
> concall today.

Andreas, thanks for sending this patch.  (Also available at[1].)

[1] https://lore.kernel.org/linux-ext4/132401FE-6D25-41B3-99D1-50E7BC746237@dilger.ca/

I took look at it, and there are a number of issues with it.  First of
all, there seems to be an assumption that (a) the number of threads is
less than the number of block groups, and (b) the number of threads
can evenly divide the number of block groups.  So for example, if the
number of block groups is prime, or if you are trying to use say, 8 or
16 threads, and the number of block groups is odd, the code in
question will not do the right thing.

(a) meant that attempting to run the e2fsprogs regression test suite
caused most of the test cases to fail with e2fsck crashing due to
buffer overruns.  I fixed this by changing the number of threads to be
16, or if 16 was greater than the number of block groups, to be the
number of block groups, just for debugging purposes.  However, there
were still a few regression test failures.

I also then tried to use a file system that we had been using for
testing fragmentation issues.  The file system was creating a 10GB
virtual disk, and then running these commands:

   DEV=/dev/sdc
   mke2fs -t ext4 $DEV 10G
   mount $DEV /mnt
   pushd /mnt
   for t in $(seq 1 6144) ; do
       for i in $(seq 1 25) ; do
           fallocate tb$t-8mb-$i -l 8M
       done
       for i in $(seq 1 2) ; do
           fallocate tb$t-400mb-$i -l 400M
       done
   done
   popd
   umount /mnt

With the patch applied, all of the threads failed with error code 22
(EINVAL), except for one which failed with a bad block group checksum
error.  I haven't had a chance to dig into further; but I was hoping
that Shilong and/or Saranya might be able to take closer look at that.

But the other thing that we might want to consider is to add
demand-loading of the block (or inode) bitmap.  We got a complaint
that "e2fsck -E journal_only" was super-slow whereas running the
journal by mounting and unmounting the file system was much faster.
The reason, of course, was because the kernel was only reading those
bitmap blocks that are needed to be modified by the orphaned inode
processing, whereas with e2fsprogs, we have to read in all of the
bitmap blocks whether this is necessary or not.

So another idea that we've talked about is teaching libext2fs to be
able to demand load the bitmap, and then when we write out the block
bitmap, we only need to write out those blocks that were loaded.  This
would also speed up running debugfs to examine the file system, as
well as running fuse2fs.  Fortunately, we have abstractions in front
of all of the bitmap accessor functions, and the code paths that would
need to be changed to add demand-loading of bitmaps should be mostly
exclusive of the changes needed for parallel bitmap loading.  So if
Shilong has time to look at making the parallel bitmap loader more
robust, perhaps Saranya could work on the demand-loading idea.

Or if Shilong doesn't have time to try to polish this parallel bitmap
loading changes, we could have Saranya look at clean it up --- since
regardless of whether we implement demand-loading or not, parallel
bitmap reading is going to be useful for some use cases (e.g., a full
fsck, dumpe2fs, or e2image).

What do folks think?

						- Ted

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [RFC] ext2fs: parallel bitmap loading
  2020-09-16 21:03   ` Theodore Y. Ts'o
@ 2020-09-17  1:34     ` Andreas Dilger
  2020-09-17  6:50       ` Wang Shilong
  0 siblings, 1 reply; 4+ messages in thread
From: Andreas Dilger @ 2020-09-17  1:34 UTC (permalink / raw)
  To: Theodore Y. Ts'o
  Cc: Ext4 Developers List, Wang Shilong, saranyamohan, harshads

[-- Attachment #1: Type: text/plain, Size: 5757 bytes --]

On Sep 16, 2020, at 3:03 PM, Theodore Y. Ts'o <tytso@mit.edu> wrote:
> 
> On Fri, Sep 04, 2020 at 03:34:26PM -0600, Andreas Dilger wrote:
>> This is a patch that is part of the parallel e2fsck series that Shilong
>> is working on, and does not work by itself, but was requested during
>> discussion on the ext4 concall today.
> 
> Andreas, thanks for sending this patch.  (Also available at[1].)
> 
> [1] https://lore.kernel.org/linux-ext4/132401FE-6D25-41B3-99D1-50E7BC746237@dilger.ca/
> 
> I took look at it, and there are a number of issues with it.  First of
> all, there seems to be an assumption that (a) the number of threads is
> less than the number of block groups, and (b) the number of threads
> can evenly divide the number of block groups.  So for example, if the
> number of block groups is prime, or if you are trying to use say, 8 or
> 16 threads, and the number of block groups is odd, the code in
> question will not do the right thing.

Yes, the thread count is checked earlier in the parallel e2fsck patch
series to be <= number of block groups.  However, I wasn't aware of any
requirement for groups = N * threads.  It may be coincidental that we
have never tested that case.

In any case, the patch was never really intended to be used by itself,
only for review and discussion of the general approach.

> (a) meant that attempting to run the e2fsprogs regression test suite
> caused most of the test cases to fail with e2fsck crashing due to
> buffer overruns.  I fixed this by changing the number of threads to be
> 16, or if 16 was greater than the number of block groups, to be the
> number of block groups, just for debugging purposes.  However, there
> were still a few regression test failures.
> 
> I also then tried to use a file system that we had been using for
> testing fragmentation issues.  The file system was creating a 10GB
> virtual disk, and then running these commands:
> 
>   DEV=/dev/sdc
>   mke2fs -t ext4 $DEV 10G
>   mount $DEV /mnt
>   pushd /mnt
>   for t in $(seq 1 6144) ; do
>       for i in $(seq 1 25) ; do
>           fallocate tb$t-8mb-$i -l 8M
>       done
>       for i in $(seq 1 2) ; do
>           fallocate tb$t-400mb-$i -l 400M
>       done
>   done
>   popd
>   umount /mnt
> 
> With the patch applied, all of the threads failed with error code 22
> (EINVAL), except for one which failed with a bad block group checksum
> error.  I haven't had a chance to dig into further; but I was hoping
> that Shilong and/or Saranya might be able to take closer look at that.

There may very well be other issues with the patch that make it not
useful as-is in isolation.  I'd have to let Shilong comment on that.

> But the other thing that we might want to consider is to add
> demand-loading of the block (or inode) bitmap.  We got a complaint
> that "e2fsck -E journal_only" was super-slow whereas running the
> journal by mounting and unmounting the file system was much faster.
> The reason, of course, was because the kernel was only reading those
> bitmap blocks that are needed to be modified by the orphaned inode
> processing, whereas with e2fsprogs, we have to read in all of the
> bitmap blocks whether this is necessary or not.

Forking threads to do on-demand loading may have a high overhead, so
it would be interesting to investigate a libext2fs IO engine that is
using libaio.  That would allow O_DIRECT reading of filesystem metadata
without double caching, as well as avoid blocking threads.  Alternately,
there is already a "readahead" method exported that could be used to
avoid changing the code too much, using posix_fadvise(WILLNEED), but I
have no idea on how that would perform.

> So another idea that we've talked about is teaching libext2fs to be
> able to demand load the bitmap, and then when we write out the block
> bitmap, we only need to write out those blocks that were loaded.  This
> would also speed up running debugfs to examine the file system, as
> well as running fuse2fs.  Fortunately, we have abstractions in front
> of all of the bitmap accessor functions, and the code paths that would
> need to be changed to add demand-loading of bitmaps should be mostly
> exclusive of the changes needed for parallel bitmap loading.  So if
> Shilong has time to look at making the parallel bitmap loader more
> robust, perhaps Saranya could work on the demand-loading idea.
> 
> Or if Shilong doesn't have time to try to polish this parallel bitmap
> loading changes, we could have Saranya look at clean it up --- since
> regardless of whether we implement demand-loading or not, parallel
> bitmap reading is going to be useful for some use cases (e.g., a full
> fsck, dumpe2fs, or e2image).

I don't think Shilong will have time to work on major code changes for
the next few weeks at least, due to internal deadlines, after which we
can finish cleaning up and submitting the pfsck patch series upstream.
If you are interested in the whole 59-patch series, it is available via:

git pull https://review.whamcloud.com/tools/e2fsprogs refs/changes/14/39914/1

or viewable online via Gerrit at:

https://review.whamcloud.com/39914

Getting some high-level review/feedback of that patch series would avoid
spending time to rework/rebase it and finding it isn't in the form that
you would prefer, or if it needs major architectural changes.

Note that this is currently based on top of the Lustre e2fsprogs branch.
While these shouldn't cause any problems with non-Lustre filesystems,
there are other patches in the series that are not necessarily ready
for submission (e.g. dirdata, Lustre xattr decoding, inode badness, etc).

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 873 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [RFC] ext2fs: parallel bitmap loading
  2020-09-17  1:34     ` Andreas Dilger
@ 2020-09-17  6:50       ` Wang Shilong
  0 siblings, 0 replies; 4+ messages in thread
From: Wang Shilong @ 2020-09-17  6:50 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Theodore Y. Ts'o, Ext4 Developers List, saranyamohan, harshads

[-- Attachment #1: Type: text/plain, Size: 6449 bytes --]

Hi,

On Thu, Sep 17, 2020 at 9:34 AM Andreas Dilger <adilger@dilger.ca> wrote:
>
> On Sep 16, 2020, at 3:03 PM, Theodore Y. Ts'o <tytso@mit.edu> wrote:
> >
> > On Fri, Sep 04, 2020 at 03:34:26PM -0600, Andreas Dilger wrote:
> >> This is a patch that is part of the parallel e2fsck series that Shilong
> >> is working on, and does not work by itself, but was requested during
> >> discussion on the ext4 concall today.
> >
> > Andreas, thanks for sending this patch.  (Also available at[1].)
> >
> > [1] https://lore.kernel.org/linux-ext4/132401FE-6D25-41B3-99D1-50E7BC746237@dilger.ca/
> >
> > I took look at it, and there are a number of issues with it.  First of
> > all, there seems to be an assumption that (a) the number of threads is
> > less than the number of block groups, and (b) the number of threads
> > can evenly divide the number of block groups.  So for example, if the
> > number of block groups is prime, or if you are trying to use say, 8 or
> > 16 threads, and the number of block groups is odd, the code in
> > question will not do the right thing.
>
> Yes, the thread count is checked earlier in the parallel e2fsck patch
> series to be <= number of block groups.  However, I wasn't aware of any
> requirement for groups = N * threads.  It may be coincidental that we
> have never tested that case.
>
> In any case, the patch was never really intended to be used by itself,
> only for review and discussion of the general approach.
>
> > (a) meant that attempting to run the e2fsprogs regression test suite
> > caused most of the test cases to fail with e2fsck crashing due to
> > buffer overruns.  I fixed this by changing the number of threads to be
> > 16, or if 16 was greater than the number of block groups, to be the
> > number of block groups, just for debugging purposes.  However, there
> > were still a few regression test failures.
> >
> > I also then tried to use a file system that we had been using for
> > testing fragmentation issues.  The file system was creating a 10GB
> > virtual disk, and then running these commands:
> >
> >   DEV=/dev/sdc
> >   mke2fs -t ext4 $DEV 10G
> >   mount $DEV /mnt
> >   pushd /mnt
> >   for t in $(seq 1 6144) ; do
> >       for i in $(seq 1 25) ; do
> >           fallocate tb$t-8mb-$i -l 8M
> >       done
> >       for i in $(seq 1 2) ; do
> >           fallocate tb$t-400mb-$i -l 400M
> >       done
> >   done
> >   popd
> >   umount /mnt
> >

I tested an attachment v2 patch(based on master branch) which used 32
threads locally and it passed the test.

[root@server e2fsprogs]# ./e2fsck/e2fsck -f /dev/sda4
e2fsck 1.46-WIP (20-Mar-2020)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
Multiple threads triggered to read bitmaps
/dev/sda4: 77963/3145728 files (0.0% non-contiguous), 12559729/12563825 blocks


> > With the patch applied, all of the threads failed with error code 22
> > (EINVAL), except for one which failed with a bad block group checksum
> > error.  I haven't had a chance to dig into further; but I was hoping
> > that Shilong and/or Saranya might be able to take closer look at that.
>
> There may very well be other issues with the patch that make it not
> useful as-is in isolation.  I'd have to let Shilong comment on that.
>
> > But the other thing that we might want to consider is to add
> > demand-loading of the block (or inode) bitmap.  We got a complaint
> > that "e2fsck -E journal_only" was super-slow whereas running the
> > journal by mounting and unmounting the file system was much faster.
> > The reason, of course, was because the kernel was only reading those
> > bitmap blocks that are needed to be modified by the orphaned inode
> > processing, whereas with e2fsprogs, we have to read in all of the
> > bitmap blocks whether this is necessary or not.
>
> Forking threads to do on-demand loading may have a high overhead, so
> it would be interesting to investigate a libext2fs IO engine that is
> using libaio.  That would allow O_DIRECT reading of filesystem metadata
> without double caching, as well as avoid blocking threads.  Alternately,
> there is already a "readahead" method exported that could be used to
> avoid changing the code too much, using posix_fadvise(WILLNEED), but I
> have no idea on how that would perform.
>
> > So another idea that we've talked about is teaching libext2fs to be
> > able to demand load the bitmap, and then when we write out the block
> > bitmap, we only need to write out those blocks that were loaded.  This
> > would also speed up running debugfs to examine the file system, as
> > well as running fuse2fs.  Fortunately, we have abstractions in front
> > of all of the bitmap accessor functions, and the code paths that would
> > need to be changed to add demand-loading of bitmaps should be mostly
> > exclusive of the changes needed for parallel bitmap loading.  So if
> > Shilong has time to look at making the parallel bitmap loader more
> > robust, perhaps Saranya could work on the demand-loading idea.
> >
> > Or if Shilong doesn't have time to try to polish this parallel bitmap
> > loading changes, we could have Saranya look at clean it up --- since
> > regardless of whether we implement demand-loading or not, parallel
> > bitmap reading is going to be useful for some use cases (e.g., a full
> > fsck, dumpe2fs, or e2image).
>
> I don't think Shilong will have time to work on major code changes for
> the next few weeks at least, due to internal deadlines, after which we
> can finish cleaning up and submitting the pfsck patch series upstream.
> If you are interested in the whole 59-patch series, it is available via:
>
> git pull https://review.whamcloud.com/tools/e2fsprogs refs/changes/14/39914/1
>
> or viewable online via Gerrit at:
>
> https://review.whamcloud.com/39914
>
> Getting some high-level review/feedback of that patch series would avoid
> spending time to rework/rebase it and finding it isn't in the form that
> you would prefer, or if it needs major architectural changes.
>
> Note that this is currently based on top of the Lustre e2fsprogs branch.
> While these shouldn't cause any problems with non-Lustre filesystems,
> there are other patches in the series that are not necessarily ready
> for submission (e.g. dirdata, Lustre xattr decoding, inode badness, etc).
>
> Cheers, Andreas
>
>
>
>
>

[-- Attachment #2: v2-0001-LU-8465-ext2fs-parallel-bitmap-loading.patch --]
[-- Type: application/octet-stream, Size: 12413 bytes --]

From 1a3370f37bb1060b8149049651c08bb7f14688a6 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong@ddn.com>
Date: Thu, 3 Sep 2020 10:51:49 +0800
Subject: [PATCH v2] LU-8465 ext2fs: parallel bitmap loading

In our benchmarking for PiB size filesystem, pass5 takes
10446s to finish and 99.5% of time takes on reading bitmaps.

It makes sense to reading bitmaps using multiple threads,
a quickly benchmark show 10446s to 626s with 64 threads.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Change-Id: I8d7389413a09bf262d0ae657cb485e8862385d0c
---
 lib/ext2fs/ext2fs.h     |  28 +++-
 lib/ext2fs/rw_bitmaps.c | 281 ++++++++++++++++++++++++++++++++++------
 2 files changed, 269 insertions(+), 40 deletions(-)

diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 69c8a3ff..e8aef736 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -255,10 +255,11 @@ struct struct_ext2_filsys {
 	int				cluster_ratio_bits;
 	__u16				default_bitmap_type;
 	__u16				pad;
+	__u32				fs_num_threads;
 	/*
 	 * Reserved for future expansion
 	 */
-	__u32				reserved[5];
+	__u32				reserved[4];
 
 	/*
 	 * Reserved for the use of the calling application.
@@ -2106,6 +2107,31 @@ ext2fs_const_inode(const struct ext2_inode_large * large_inode)
 	return (const struct ext2_inode *) large_inode;
 }
 
+static dgrp_t ext2fs_get_avg_group(ext2_filsys fs)
+{
+	dgrp_t			 average_group;
+	unsigned		flexbg_size;
+
+	if (fs->fs_num_threads <= 1)
+		return fs->group_desc_count;
+
+	average_group = fs->group_desc_count / fs->fs_num_threads;
+	if (average_group <= 1)
+		return 1;
+
+	if (ext2fs_has_feature_flex_bg(fs->super)) {
+		int times = 1;
+
+		flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+		if (average_group % flexbg_size) {
+			times = average_group / flexbg_size;
+			average_group = times * flexbg_size;
+		}
+	}
+
+	return average_group;
+}
+
 #undef _INLINE_
 #endif
 
diff --git a/lib/ext2fs/rw_bitmaps.c b/lib/ext2fs/rw_bitmaps.c
index d80c9eb8..f5635c4d 100644
--- a/lib/ext2fs/rw_bitmaps.c
+++ b/lib/ext2fs/rw_bitmaps.c
@@ -23,6 +23,7 @@
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
+#include <pthread.h>
 
 #include "ext2_fs.h"
 #include "ext2fs.h"
@@ -205,22 +206,12 @@ static int bitmap_tail_verify(unsigned char *bitmap, int first, int last)
 	return 1;
 }
 
-static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+static errcode_t read_bitmaps_range_prepare(ext2_filsys fs, int do_inode, int do_block)
 {
-	dgrp_t i;
-	char *block_bitmap = 0, *inode_bitmap = 0;
-	char *buf;
 	errcode_t retval;
 	int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
 	int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
-	int tail_flags = 0;
-	int csum_flag;
-	unsigned int	cnt;
-	blk64_t	blk;
-	blk64_t	blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
-	blk64_t   blk_cnt;
-	ext2_ino_t ino_itr = 1;
-	ext2_ino_t ino_cnt;
+	char *buf;
 
 	EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
 
@@ -230,11 +221,10 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 
 	fs->write_bitmaps = ext2fs_write_bitmaps;
 
-	csum_flag = ext2fs_has_group_desc_csum(fs);
-
 	retval = ext2fs_get_mem(strlen(fs->device_name) + 80, &buf);
 	if (retval)
 		return retval;
+
 	if (do_block) {
 		if (fs->block_map)
 			ext2fs_free_block_bitmap(fs->block_map);
@@ -243,11 +233,8 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		retval = ext2fs_allocate_block_bitmap(fs, buf, &fs->block_map);
 		if (retval)
 			goto cleanup;
-		retval = io_channel_alloc_buf(fs->io, 0, &block_bitmap);
-		if (retval)
-			goto cleanup;
-	} else
-		block_nbytes = 0;
+	}
+
 	if (do_inode) {
 		if (fs->inode_map)
 			ext2fs_free_inode_bitmap(fs->inode_map);
@@ -256,13 +243,69 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		retval = ext2fs_allocate_inode_bitmap(fs, buf, &fs->inode_map);
 		if (retval)
 			goto cleanup;
-		retval = io_channel_alloc_buf(fs->io, 0, &inode_bitmap);
+	}
+	ext2fs_free_mem(&buf);
+
+	return retval;
+
+cleanup:
+	if (do_block) {
+		ext2fs_free_block_bitmap(fs->block_map);
+		fs->block_map = 0;
+	}
+	if (do_inode) {
+		ext2fs_free_inode_bitmap(fs->inode_map);
+		fs->inode_map = 0;
+	}
+	if (buf)
+		ext2fs_free_mem(&buf);
+	return retval;
+}
+
+static errcode_t read_bitmaps_range_start(ext2_filsys fs, int do_inode, int do_block,
+					  dgrp_t start, dgrp_t end, pthread_mutex_t *mutex,
+					  io_channel io)
+{
+	dgrp_t i;
+	char *block_bitmap = 0, *inode_bitmap = 0;
+	char *buf;
+	errcode_t retval;
+	int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
+	int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
+	int tail_flags = 0;
+	int csum_flag;
+	unsigned int	cnt;
+	blk64_t	blk;
+	blk64_t	blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
+	blk64_t   blk_cnt;
+	ext2_ino_t ino_itr = 1;
+	ext2_ino_t ino_cnt;
+	io_channel this_io;
+
+	if (!io)
+		this_io = fs->io;
+	else
+		this_io = io;
+
+	csum_flag = ext2fs_has_group_desc_csum(fs);
+
+	if (do_block) {
+		retval = io_channel_alloc_buf(this_io, 0, &block_bitmap);
 		if (retval)
 			goto cleanup;
-	} else
+	} else {
+		block_nbytes = 0;
+	}
+
+	if (do_inode) {
+		retval = io_channel_alloc_buf(this_io, 0, &inode_bitmap);
+		if (retval)
+			goto cleanup;
+	} else {
 		inode_nbytes = 0;
-	ext2fs_free_mem(&buf);
+	}
 
+	/* io should be null */
 	if (fs->flags & EXT2_FLAG_IMAGE_FILE) {
 		blk = (ext2fs_le32_to_cpu(fs->image_header->offset_inodemap) / fs->blocksize);
 		ino_cnt = fs->super->s_inodes_count;
@@ -303,7 +346,9 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		goto success_cleanup;
 	}
 
-	for (i = 0; i < fs->group_desc_count; i++) {
+	blk_itr += ((blk64_t)start * (block_nbytes << 3));
+	ino_itr += ((blk64_t)start * (inode_nbytes << 3));
+	for (i = start; i <= end; i++) {
 		if (block_bitmap) {
 			blk = ext2fs_block_bitmap_loc(fs, i);
 			if ((csum_flag &&
@@ -312,7 +357,7 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			    (blk >= ext2fs_blocks_count(fs->super)))
 				blk = 0;
 			if (blk) {
-				retval = io_channel_read_blk64(fs->io, blk,
+				retval = io_channel_read_blk64(this_io, blk,
 							       1, block_bitmap);
 				if (retval) {
 					retval = EXT2_ET_BLOCK_BITMAP_READ;
@@ -333,8 +378,12 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			} else
 				memset(block_bitmap, 0, block_nbytes);
 			cnt = block_nbytes << 3;
+			if (mutex)
+				pthread_mutex_lock(mutex);
 			retval = ext2fs_set_block_bitmap_range2(fs->block_map,
 					       blk_itr, cnt, block_bitmap);
+			if (mutex)
+				pthread_mutex_unlock(mutex);
 			if (retval)
 				goto cleanup;
 			blk_itr += block_nbytes << 3;
@@ -347,7 +396,7 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			    (blk >= ext2fs_blocks_count(fs->super)))
 				blk = 0;
 			if (blk) {
-				retval = io_channel_read_blk64(fs->io, blk,
+				retval = io_channel_read_blk64(this_io, blk,
 							       1, inode_bitmap);
 				if (retval) {
 					retval = EXT2_ET_INODE_BITMAP_READ;
@@ -369,29 +418,28 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			} else
 				memset(inode_bitmap, 0, inode_nbytes);
 			cnt = inode_nbytes << 3;
+			if (mutex)
+				pthread_mutex_lock(mutex);
 			retval = ext2fs_set_inode_bitmap_range2(fs->inode_map,
 					       ino_itr, cnt, inode_bitmap);
+			if (mutex)
+				pthread_mutex_unlock(mutex);
 			if (retval)
 				goto cleanup;
 			ino_itr += inode_nbytes << 3;
 		}
 	}
 
-	/* Mark group blocks for any BLOCK_UNINIT groups */
-	if (do_block) {
-		retval = mark_uninit_bg_group_blocks(fs);
-		if (retval)
-			goto cleanup;
-	}
-
 success_cleanup:
-	if (inode_bitmap) {
-		ext2fs_free_mem(&inode_bitmap);
-		fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
-	}
-	if (block_bitmap) {
-		ext2fs_free_mem(&block_bitmap);
-		fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+	if (start == 0 && end == fs->group_desc_count - 1) {
+		if (inode_bitmap) {
+			ext2fs_free_mem(&inode_bitmap);
+			fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+		}
+		if (block_bitmap) {
+			ext2fs_free_mem(&block_bitmap);
+			fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+		}
 	}
 	fs->flags |= tail_flags;
 	return 0;
@@ -412,6 +460,161 @@ cleanup:
 	if (buf)
 		ext2fs_free_mem(&buf);
 	return retval;
+
+}
+
+static errcode_t read_bitmaps_range_end(ext2_filsys fs, int do_inode, int do_block)
+{
+	errcode_t retval = 0;
+
+	/* Mark group blocks for any BLOCK_UNINIT groups */
+	if (do_block) {
+		retval = mark_uninit_bg_group_blocks(fs);
+		if (retval)
+			goto cleanup;
+	}
+
+	return retval;
+cleanup:
+	if (do_block) {
+		ext2fs_free_block_bitmap(fs->block_map);
+		fs->block_map = 0;
+	}
+	if (do_inode) {
+		ext2fs_free_inode_bitmap(fs->inode_map);
+		fs->inode_map = 0;
+	}
+	return retval;
+}
+
+static errcode_t read_bitmaps_range(ext2_filsys fs, int do_inode, int do_block,
+				    dgrp_t start, dgrp_t end)
+{
+	errcode_t retval;
+
+	retval = read_bitmaps_range_prepare(fs, do_inode, do_block);
+	if (retval)
+		return retval;
+
+	retval = read_bitmaps_range_start(fs, do_inode, do_block, start, end, NULL, NULL);
+	if (retval)
+		return retval;
+
+	return read_bitmaps_range_end(fs, do_inode, do_block);
+}
+
+struct read_bitmaps_thread_info {
+	ext2_filsys	rbt_fs;
+	int 		rbt_do_inode;
+	int		rbt_do_block;
+	dgrp_t		rbt_grp_start;
+	dgrp_t		rbt_grp_end;
+	errcode_t	rbt_retval;
+	pthread_mutex_t *rbt_mutex;
+	io_channel      rbt_io;
+};
+
+static void* read_bitmaps_thread(void *data)
+{
+	struct read_bitmaps_thread_info *rbt = data;
+
+	rbt->rbt_retval = read_bitmaps_range_start(rbt->rbt_fs,
+				rbt->rbt_do_inode, rbt->rbt_do_block,
+				rbt->rbt_grp_start, rbt->rbt_grp_end,
+				rbt->rbt_mutex, rbt->rbt_io);
+	return NULL;
+}
+
+static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+{
+	pthread_attr_t	attr;
+	int num_threads = fs->fs_num_threads;
+	pthread_t *thread_ids = NULL;
+	struct read_bitmaps_thread_info *thread_infos = NULL;
+	pthread_mutex_t rbt_mutex = PTHREAD_MUTEX_INITIALIZER;
+	errcode_t retval;
+	errcode_t rc;
+	dgrp_t average_group;
+	int i;
+	io_manager manager = unix_io_manager;
+
+	if (num_threads <= 1 || (fs->flags & EXT2_FLAG_IMAGE_FILE))
+		return read_bitmaps_range(fs, do_inode, do_block, 0, fs->group_desc_count - 1);
+
+	retval = pthread_attr_init(&attr);
+	if (retval)
+		return retval;
+
+	thread_ids = calloc(sizeof(pthread_t), num_threads);
+	if (!thread_ids)
+		return -ENOMEM;
+
+	thread_infos = calloc(sizeof(struct read_bitmaps_thread_info),
+				num_threads);
+	if (!thread_infos)
+		goto out;
+
+	average_group = ext2fs_get_avg_group(fs);
+	retval = read_bitmaps_range_prepare(fs, do_inode, do_block);
+	if (retval)
+		goto out;
+
+	fprintf(stdout, "Multiple threads triggered to read bitmaps\n");
+	for (i = 0; i < num_threads; i++) {
+		thread_infos[i].rbt_fs = fs;
+		thread_infos[i].rbt_do_inode = do_inode;
+		thread_infos[i].rbt_do_block = do_block;
+		thread_infos[i].rbt_mutex = &rbt_mutex;
+		if (i == 0)
+			thread_infos[i].rbt_grp_start = 0;
+		else
+			thread_infos[i].rbt_grp_start = average_group * i + 1;
+
+		if (i == num_threads - 1)
+			thread_infos[i].rbt_grp_end = fs->group_desc_count - 1;
+		else
+			thread_infos[i].rbt_grp_end = average_group * (i + 1);
+		retval = manager->open(fs->device_name, IO_FLAG_RW,
+					&thread_infos[i].rbt_io);
+		if (retval)
+			break;
+		io_channel_set_blksize(thread_infos[i].rbt_io, fs->io->block_size);
+		retval = pthread_create(&thread_ids[i], &attr,
+					&read_bitmaps_thread, &thread_infos[i]);
+		if (retval) {
+			io_channel_close(thread_infos[i].rbt_io);
+			break;
+		}
+	}
+	for (i = 0; i < num_threads; i++) {
+		if (!thread_ids[i])
+			break;
+		rc = pthread_join(thread_ids[i], NULL);
+		if (rc && !retval)
+			retval = rc;
+		rc = thread_infos[i].rbt_retval;
+		if (rc && !retval)
+			retval = rc;
+		io_channel_close(thread_infos[i].rbt_io);
+	}
+out:
+	rc = pthread_attr_destroy(&attr);
+	if (rc && !retval)
+		retval = rc;
+	free(thread_infos);
+	free(thread_ids);
+
+	if (!retval)
+		retval = read_bitmaps_range_end(fs, do_inode, do_block);
+
+	if (!retval) {
+		if (do_inode)
+			fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+		if (do_block)
+			fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+	}
+
+	return retval;
 }
 
 errcode_t ext2fs_read_inode_bitmap(ext2_filsys fs)
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-09-17  6:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CA+OwuSj-WjaPbfOSDpg5Mz2tm_W0p40N-L=meiWEDZ6j1ccq=Q@mail.gmail.com>
2020-09-04 21:34 ` Fwd: [PATCH] [RFC] ext2fs: parallel bitmap loading Andreas Dilger
2020-09-16 21:03   ` Theodore Y. Ts'o
2020-09-17  1:34     ` Andreas Dilger
2020-09-17  6:50       ` Wang Shilong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).