linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
@ 2019-10-18  1:08 Philippe Liard
  2019-10-18 16:32 ` Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Philippe Liard @ 2019-10-18  1:08 UTC (permalink / raw)
  To: phillip; +Cc: linux-kernel, groeck, Philippe Liard

The ll_rw_block() function has been deprecated in favor of BIO which
appears to come with large performance improvements.

This patch decreases boot time by close to 40% when using squashfs for
the root file-system. This is observed at least in the context of
starting an Android VM on Chrome OS using crosvm
(https://chromium.googlesource.com/chromiumos/platform/crosvm). The
patch was tested on 4.19 as well as master.

This patch is largely based on Adrien Schildknecht's patch that was
originally sent as https://lkml.org/lkml/2017/9/22/814 though with some
significant changes and simplifications while also taking Phillip
Lougher's feedback into account, around preserving support for
FILE_CACHE in particular.

Signed-off-by: Philippe Liard <pliard@google.com>
---
 fs/squashfs/block.c | 377 ++++++++++++++++++++++++++++----------------
 1 file changed, 244 insertions(+), 133 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index f098b9f1c396..5ec7528b9d2f 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -26,12 +26,14 @@
  * datablocks and metadata blocks.
  */
 
+#include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include <linux/pagemap.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -39,45 +41,207 @@
 #include "decompressor.h"
 #include "page_actor.h"
 
+struct squashfs_bio_request {
+	struct buffer_head **bh;
+	int bh_start;
+	int bh_len;
+};
+
 /*
- * Read the metadata block length, this is stored in the first two
- * bytes of the metadata block.
+ * Returns the amount of bytes copied to the page actor or an error as
+ * a negative number.
  */
-static struct buffer_head *get_block_length(struct super_block *sb,
-			u64 *cur_index, int *offset, int *length)
+static int squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
+				struct squashfs_page_actor *actor,
+				int blk_offset, int req_length, int blk_size)
 {
-	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	struct buffer_head *bh;
+	int bytes_to_copy, copied_bytes = 0;
+	int actor_offset = 0, bh_offset = 0;
+	const int input_size = nr_buffers * blk_size;
+	const int output_capacity = actor->pages * PAGE_SIZE;
+	void *actor_addr = squashfs_first_page(actor);
+
+	if (blk_offset + req_length > input_size ||
+	    req_length > output_capacity)
+		return -EIO;
+
+	while (copied_bytes < req_length) {
+		bytes_to_copy = min_t(int, blk_size - blk_offset,
+				      PAGE_SIZE - actor_offset);
+		bytes_to_copy = min_t(int, bytes_to_copy,
+				      req_length - copied_bytes);
+		memcpy(actor_addr + actor_offset,
+		       bh[bh_offset]->b_data + blk_offset, bytes_to_copy);
+
+		actor_offset += bytes_to_copy;
+		copied_bytes += bytes_to_copy;
+		blk_offset += bytes_to_copy;
+
+		if (actor_offset >= PAGE_SIZE) {
+			actor_offset = 0;
+			actor_addr = squashfs_next_page(actor);
+		}
+		if (blk_offset >= blk_size) {
+			blk_offset = 0;
+			++bh_offset;
+		}
+	}
+	squashfs_finish_page(actor);
+	return copied_bytes;
+}
+
+static void squashfs_bio_end_io(struct bio *bio)
+{
+	struct squashfs_bio_request *bio_req = bio->bi_private;
+	blk_status_t error = bio->bi_status;
+	int i;
+
+	bio_put(bio);
+
+	for (i = bio_req->bh_start; i < bio_req->bh_start + bio_req->bh_len;
+	     ++i) {
+		if (error)
+			clear_buffer_uptodate(bio_req->bh[i]);
+		else
+			set_buffer_uptodate(bio_req->bh[i]);
+		unlock_buffer(bio_req->bh[i]);
+	}
+	kfree(bio_req);
+}
+
+static void put_bh_array(struct buffer_head **bh, int start, int len)
+{
+	int i;
+
+	for (i = start; i < start + len; ++i)
+		put_bh(bh[i]);
+}
 
-	bh = sb_bread(sb, *cur_index);
-	if (bh == NULL)
+static struct buffer_head **
+create_buffer_head_array(struct super_block *sb, int nr_buffers, u64 block)
+{
+	int i;
+	struct buffer_head **bh;
+
+	bh = kmalloc_array(nr_buffers, sizeof(*bh), GFP_NOIO);
+	if (!bh)
 		return NULL;
 
-	if (msblk->devblksize - *offset == 1) {
-		*length = (unsigned char) bh->b_data[*offset];
-		put_bh(bh);
-		bh = sb_bread(sb, ++(*cur_index));
-		if (bh == NULL)
+	for (i = 0; i < nr_buffers; ++i) {
+		bh[i] = sb_getblk(sb, block + i);
+		if (!bh[i]) {
+			put_bh_array(bh, 0, i);
+			kfree(bh);
 			return NULL;
-		*length |= (unsigned char) bh->b_data[0] << 8;
-		*offset = 1;
-	} else {
-		*length = (unsigned char) bh->b_data[*offset] |
-			(unsigned char) bh->b_data[*offset + 1] << 8;
-		*offset += 2;
-
-		if (*offset == msblk->devblksize) {
-			put_bh(bh);
-			bh = sb_bread(sb, ++(*cur_index));
-			if (bh == NULL)
-				return NULL;
-			*offset = 0;
 		}
 	}
-
 	return bh;
 }
 
+static void free_bh_array(struct buffer_head **bh, int nr_buffers)
+{
+	if (bh) {
+		put_bh_array(bh, 0, nr_buffers);
+		kfree(bh);
+	}
+}
+
+/*
+ * Returns 0 on success and fills bh_ptr, nr_buffers and block_offset. An error
+ * is otherwise returned as a negative number. Note that the caller must free
+ * *bh_ptr on success.
+ */
+static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
+			     struct buffer_head ***bh_ptr, int *nr_buffers,
+			     int *block_offset)
+{
+	struct bio *bio = NULL;
+	struct buffer_head *bh, **bh_array;
+	struct squashfs_bio_request *bio_req = NULL;
+	int i, prev_block = 0;
+
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	const u64 read_start = round_down(index, msblk->devblksize);
+	const sector_t block = read_start >> msblk->devblksize_log2;
+
+	const u64 read_end = round_up(index + length, msblk->devblksize);
+	const sector_t block_end = read_end >> msblk->devblksize_log2;
+
+	const int blksz = msblk->devblksize;
+	const int bio_max_pages = min_t(int, block_end - block, BIO_MAX_PAGES);
+	int offset = read_start - round_down(index, PAGE_SIZE);
+	int res;
+
+	*block_offset = index & ((1 << msblk->devblksize_log2) - 1);
+	*nr_buffers = block_end - block;
+	bh_array = create_buffer_head_array(sb, *nr_buffers, block);
+	*bh_ptr = bh_array;
+	if (!bh_array)
+		return -ENOMEM;
+
+	/* Create and submit the BIOs */
+	for (i = 0; i < *nr_buffers; ++i, offset += blksz) {
+		bh = bh_array[i];
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			unlock_buffer(bh);
+			continue;
+		}
+		offset %= PAGE_SIZE;
+
+		/* Append the buffer to the current BIO if it is contiguous */
+		if (bio && bio_req && prev_block + 1 == i) {
+			if (bio_add_page(bio, bh->b_page, blksz, offset)) {
+				bio_req->bh_len++;
+				prev_block = i;
+				continue;
+			}
+		}
+
+		/* Otherwise, submit the current BIO and create a new one */
+		if (bio)
+			submit_bio(bio);
+
+		bio_req = kzalloc(sizeof(struct squashfs_bio_request),
+				  GFP_NOIO);
+		bio = bio_req ? bio_alloc(GFP_NOIO, bio_max_pages) : NULL;
+		if (!bio) {
+			kfree(bio_req);
+			unlock_buffer(bh);
+			res = -ENOMEM;
+			goto cleanup;
+		}
+
+		bio_set_dev(bio, sb->s_bdev);
+		bio->bi_iter.bi_sector =
+			(block + i) * (msblk->devblksize >> SECTOR_SHIFT);
+		bio->bi_private = bio_req;
+		bio->bi_end_io = squashfs_bio_end_io;
+		bio->bi_opf = READ;
+
+		bio_req->bh = bh_array;
+		bio_req->bh_start = i;
+		bio_req->bh_len = 1;
+		bio_add_page(bio, bh->b_page, blksz, offset);
+		prev_block = i;
+	}
+	if (bio)
+		submit_bio(bio);
+
+	res = 0;
+
+cleanup:
+	for (i = 0; i < *nr_buffers; ++i) {
+		wait_on_buffer(bh_array[i]);
+		if (!buffer_uptodate(bh_array[i]) && res == 0)
+			res = -EIO;
+	}
+	if (res) {
+		free_bh_array(bh_array, *nr_buffers);
+		*bh_ptr = NULL;
+	}
+	return res;
+}
 
 /*
  * Read and decompress a metadata block or datablock.  Length is non-zero
@@ -89,129 +253,76 @@ static struct buffer_head *get_block_length(struct super_block *sb,
  * algorithms).
  */
 int squashfs_read_data(struct super_block *sb, u64 index, int length,
-		u64 *next_index, struct squashfs_page_actor *output)
+		       u64 *next_index, struct squashfs_page_actor *output)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	struct buffer_head **bh;
-	int offset = index & ((1 << msblk->devblksize_log2) - 1);
-	u64 cur_index = index >> msblk->devblksize_log2;
-	int bytes, compressed, b = 0, k = 0, avail, i;
-
-	bh = kcalloc(((output->length + msblk->devblksize - 1)
-		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
-	if (bh == NULL)
-		return -ENOMEM;
+	int res;
+	struct buffer_head **bh = NULL;
+	int nr_buffers;
+	int compressed;
+	int offset;
 
-	if (length) {
-		/*
-		 * Datablock.
-		 */
-		bytes = -offset;
-		compressed = SQUASHFS_COMPRESSED_BLOCK(length);
-		length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
-		if (next_index)
-			*next_index = index + length;
-
-		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-			index, compressed ? "" : "un", length, output->length);
-
-		if (length < 0 || length > output->length ||
-				(index + length) > msblk->bytes_used)
-			goto read_failure;
-
-		for (b = 0; bytes < length; b++, cur_index++) {
-			bh[b] = sb_getblk(sb, cur_index);
-			if (bh[b] == NULL)
-				goto block_release;
-			bytes += msblk->devblksize;
-		}
-		ll_rw_block(REQ_OP_READ, 0, b, bh);
-	} else {
+	if (length == 0) {
 		/*
 		 * Metadata block.
 		 */
-		if ((index + 2) > msblk->bytes_used)
-			goto read_failure;
+		length = 2;
+		if (index + length > msblk->bytes_used) {
+			res = -EIO;
+			goto out;
+		}
+		res = squashfs_bio_read(sb, index, length, &bh, &nr_buffers,
+					&offset);
+		if (res)
+			goto out;
 
-		bh[0] = get_block_length(sb, &cur_index, &offset, &length);
-		if (bh[0] == NULL)
-			goto read_failure;
-		b = 1;
+		/* Extract the length of the metadata block */
+		length = (u8) bh[0]->b_data[offset];
+		length |= offset == msblk->devblksize - 1
+			? (u8) bh[1]->b_data[0] << 8
+			: (u8) bh[0]->b_data[offset + 1] << 8;
 
-		bytes = msblk->devblksize - offset;
 		compressed = SQUASHFS_COMPRESSED(length);
 		length = SQUASHFS_COMPRESSED_SIZE(length);
-		if (next_index)
-			*next_index = index + length + 2;
 
-		TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
-				compressed ? "" : "un", length);
-
-		if (length < 0 || length > output->length ||
-					(index + length) > msblk->bytes_used)
-			goto block_release;
-
-		for (; bytes < length; b++) {
-			bh[b] = sb_getblk(sb, ++cur_index);
-			if (bh[b] == NULL)
-				goto block_release;
-			bytes += msblk->devblksize;
-		}
-		ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
+		free_bh_array(bh, nr_buffers);
+		bh = NULL;
+		index += 2;
+	} else {
+		/*
+		 * Data block.
+		 */
+		compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+		length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
 	}
+	if (next_index)
+		*next_index = index + length;
 
-	for (i = 0; i < b; i++) {
-		wait_on_buffer(bh[i]);
-		if (!buffer_uptodate(bh[i]))
-			goto block_release;
-	}
+	res = squashfs_bio_read(sb, index, length, &bh, &nr_buffers, &offset);
+	if (res)
+		goto out;
 
 	if (compressed) {
-		if (!msblk->stream)
-			goto read_failure;
-		length = squashfs_decompress(msblk, bh, b, offset, length,
-			output);
-		if (length < 0)
-			goto read_failure;
-	} else {
-		/*
-		 * Block is uncompressed.
-		 */
-		int in, pg_offset = 0;
-		void *data = squashfs_first_page(output);
-
-		for (bytes = length; k < b; k++) {
-			in = min(bytes, msblk->devblksize - offset);
-			bytes -= in;
-			while (in) {
-				if (pg_offset == PAGE_SIZE) {
-					data = squashfs_next_page(output);
-					pg_offset = 0;
-				}
-				avail = min_t(int, in, PAGE_SIZE -
-						pg_offset);
-				memcpy(data + pg_offset, bh[k]->b_data + offset,
-						avail);
-				in -= avail;
-				pg_offset += avail;
-				offset += avail;
-			}
-			offset = 0;
-			put_bh(bh[k]);
+		if (!msblk->stream) {
+			res = -EIO;
+			goto out;
 		}
-		squashfs_finish_page(output);
+		/* Note that this calls put_bh() */
+		res = squashfs_decompress(msblk, bh, nr_buffers, offset, length,
+					  output);
+		kfree(bh);
+		bh = NULL;
+	} else {
+		res = squashfs_bh_to_actor(bh, nr_buffers, output, offset,
+					   length, msblk->devblksize);
 	}
+out:
+	TRACE("compressed=%d index=%lld length=%d next_index=%lld result=%d\n",
+	      compressed, index, length, next_index ? *next_index : -1, res);
 
-	kfree(bh);
-	return length;
-
-block_release:
-	for (; k < b; k++)
-		put_bh(bh[k]);
+	free_bh_array(bh, nr_buffers);
 
-read_failure:
-	ERROR("squashfs_read_data failed to read block 0x%llx\n",
-					(unsigned long long) index);
-	kfree(bh);
-	return -EIO;
+	if (res < 0)
+		ERROR("Failed to read block 0x%llx\n", index);
+	return res;
 }
-- 
2.23.0.866.gb869b98d4c-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-18  1:08 [PATCH] squashfs: Migrate from ll_rw_block usage to BIO Philippe Liard
@ 2019-10-18 16:32 ` Christoph Hellwig
  2019-10-24  1:23 ` Philippe Liard
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2019-10-18 16:32 UTC (permalink / raw)
  To: Philippe Liard; +Cc: phillip, linux-kernel, groeck

I don't see why you still need buffer_heads at all.  Basically
if you replace each of your allocated buffer heads with a
simple page allocation the code will be much simpler (this version
adds more than 100 lines of code!) and probaby still a bit faster
as you don't need the squashfs_bio_request allocation either.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-18  1:08 [PATCH] squashfs: Migrate from ll_rw_block usage to BIO Philippe Liard
  2019-10-18 16:32 ` Christoph Hellwig
@ 2019-10-24  1:23 ` Philippe Liard
  2019-10-24  5:41   ` Gao Xiang
  2019-10-25  0:45 ` Philippe Liard
  2019-10-29  4:10 ` Philippe Liard
  3 siblings, 1 reply; 11+ messages in thread
From: Philippe Liard @ 2019-10-24  1:23 UTC (permalink / raw)
  To: phillip; +Cc: linux-kernel, groeck, pliard

Thanks Cristoph for taking a look. I like the idea of simplifying this if
possible. I think I understand your suggestion in principle but I'm not
seeing a way to apply it here. Would it be possible for you to be a little
more specific? Let me try to explain this below.

My admittedly limited understanding is that using BIO indirectly requires
buffer_head or an alternative including some synchronization mechanism at
least.
It's true that the bio_{alloc,add_page,submit}() functions don't require
passing a buffer_head. However because bio_submit() is asynchronous AFAICT
the client needs to use a synchronization mechanism to wait for and notify
the completion of the request which buffer heads provide. This is achieved
respectively by wait_on_buffer() and {set,clear}_buffer_uptodate().

Another dependency on buffer heads is the fact that squashfs_read_data()
calls into other squashfs functions operating on buffer heads outside this
file. For example squashfs_decompress() operates on a buffer_head array.

Given that bio_submit() is asynchronous I'm also not seeing how the
squashfs_bio_request allocation can be removed? There can be multiple BIO
requests in flight each needing to carry some context used on completion
of the request.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-24  1:23 ` Philippe Liard
@ 2019-10-24  5:41   ` Gao Xiang
  0 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-10-24  5:41 UTC (permalink / raw)
  To: Philippe Liard; +Cc: phillip, linux-kernel, groeck

On Thu, Oct 24, 2019 at 10:23:54AM +0900, Philippe Liard wrote:
> Thanks Cristoph for taking a look. I like the idea of simplifying this if
> possible. I think I understand your suggestion in principle but I'm not
> seeing a way to apply it here. Would it be possible for you to be a little
> more specific? Let me try to explain this below.
> 
> My admittedly?limited understanding is that using BIO indirectly requires
> buffer_head or an alternative including some synchronization mechanism at
> least.
> It's true that the bio_{alloc,add_page,submit}() functions don't require
> passing a buffer_head. However because bio_submit() is asynchronous AFAICT
> the client needs to use a synchronization mechanism to wait for and notify
> the completion of the request which buffer heads provide. This is achieved
> respectively by wait_on_buffer() and {set,clear}_buffer_uptodate().
> 
> Another dependency on buffer heads is the fact that squashfs_read_data()
> calls into other squashfs functions operating on buffer heads outside this
> file. For example squashfs_decompress() operates on a buffer_head array.
> 
> Given that bio_submit() is asynchronous I'm also not seeing how the
> squashfs_bio_request allocation can be removed? There can be multiple BIO
> requests in flight each needing to carry some context used on completion
> of the request.

Personally speaking, just for Android related use cases, I'd suggest latest
EROFS if you care more about system overall performance more than compression
ratio, even https://lkml.org/lkml/2017/9/22/814 is applied (you can do
benchmark), we did much efforts 3 years ago.

And that is not only performance but noticable memory overhead (a lot of
extra memory allocations) and heavy page cache thrashing in low memory
scenarios (it's very common [1].)

[1] https://linuxplumbersconf.org/event/4/contributions/404/attachments/326/550/Handling_memory_pressure_on_Android.pdf

Thanks,
Gao Xiang



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-18  1:08 [PATCH] squashfs: Migrate from ll_rw_block usage to BIO Philippe Liard
  2019-10-18 16:32 ` Christoph Hellwig
  2019-10-24  1:23 ` Philippe Liard
@ 2019-10-25  0:45 ` Philippe Liard
  2019-10-25  2:53   ` Gao Xiang
  2019-10-29  4:10 ` Philippe Liard
  3 siblings, 1 reply; 11+ messages in thread
From: Philippe Liard @ 2019-10-25  0:45 UTC (permalink / raw)
  To: phillip; +Cc: linux-kernel, groeck, pliard

> Personally speaking, just for Android related use cases, I'd suggest
> latest EROFS if you care more about system overall performance more
> than compression ratio, even https://lkml.org/lkml/2017/9/22/814 is
> applied (you can do benchmark), we did much efforts 3 years ago.
>
> And that is not only performance but noticable memory overhead (a lot
> of extra memory allocations) and heavy page cache thrashing in low
> memory scenarios (it's very common [1].)

Thanks for the suggestion. EROFS is on our radar and we will
(re)consider it once it goes out of staging. But we will most likely
stay on squashfs until this happens.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-25  0:45 ` Philippe Liard
@ 2019-10-25  2:53   ` Gao Xiang
       [not found]     ` <CABXOdTeQTapfvKqGrqZME8JACeJhaHram_ZWk7ZZX2VWvYORaw@mail.gmail.com>
  0 siblings, 1 reply; 11+ messages in thread
From: Gao Xiang @ 2019-10-25  2:53 UTC (permalink / raw)
  To: Philippe Liard; +Cc: phillip, linux-kernel, groeck, linux-erofs, Chao Yu

On Fri, Oct 25, 2019 at 09:45:31AM +0900, Philippe Liard wrote:
> > Personally speaking, just for Android related use cases, I'd suggest
> > latest EROFS if you care more about system overall performance more
> > than compression ratio, even https://lkml.org/lkml/2017/9/22/814 is
> > applied (you can do benchmark), we did much efforts 3 years ago.
> >
> > And that is not only performance but noticable memory overhead (a lot
> > of extra memory allocations) and heavy page cache thrashing in low
> > memory scenarios (it's very common [1].)
> 
> Thanks for the suggestion. EROFS is on our radar and we will
> (re)consider it once it goes out of staging. But we will most likely
> stay on squashfs until this happens.

EROFS is already out of staging in mainline right now,
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/erofs/

If you agree on that, I'd suggest you try it right now
since it's widely (200+ million devices on the market)
deployed for our Android smartphones and fully open source
and open community. I think this is not a regrettable
attempt and we can response any question.

https://lore.kernel.org/r/20191024033259.GA2513@hsiangkao-HP-ZHAN-66-Pro-G1

In my personal opinion, just for Android use cases,
I think it is worth taking some time.

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
       [not found]     ` <CABXOdTeQTapfvKqGrqZME8JACeJhaHram_ZWk7ZZX2VWvYORaw@mail.gmail.com>
@ 2019-10-25  3:12       ` Gao Xiang
  0 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-10-25  3:12 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Philippe Liard, phillip, linux-kernel, Guenter Roeck,
	linux-erofs, Chao Yu

On Thu, Oct 24, 2019 at 08:02:14PM -0700, Guenter Roeck wrote:
> On Thu, Oct 24, 2019 at 7:51 PM Gao Xiang <gaoxiang25@huawei.com> wrote:
> 
> > On Fri, Oct 25, 2019 at 09:45:31AM +0900, Philippe Liard wrote:
> > > > Personally speaking, just for Android related use cases, I'd suggest
> > > > latest EROFS if you care more about system overall performance more
> > > > than compression ratio, even https://lkml.org/lkml/2017/9/22/814 is
> > > > applied (you can do benchmark), we did much efforts 3 years ago.
> > > >
> > > > And that is not only performance but noticable memory overhead (a lot
> > > > of extra memory allocations) and heavy page cache thrashing in low
> > > > memory scenarios (it's very common [1].)
> > >
> > > Thanks for the suggestion. EROFS is on our radar and we will
> > > (re)consider it once it goes out of staging. But we will most likely
> > > stay on squashfs until this happens.
> >
> > EROFS is already out of staging in mainline right now,
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/erofs/
> >
> > If you agree on that, I'd suggest you try it right now
> > since it's widely (200+ million devices on the market)
> > deployed for our Android smartphones and fully open source
> > and open community. I think this is not a regrettable
> > attempt and we can response any question.
> >
> > https://lore.kernel.org/r/20191024033259.GA2513@hsiangkao-HP-ZHAN-66-Pro-G1
> >
> > In my personal opinion, just for Android use cases,
> > I think it is worth taking some time.
> >
> > All well said. The question, though, is if that is a reason to reject
> squashfs performance improvements. I argue that it is not. The decision to
> switch to erofs or not is completely orthogonal to squashfs performance
> improvements, and one doesn't preclude the other.

Note that I have no objection on this patch. And I'm happy to see any
improvements for other compression filesystems. And we are keeping on
boosting up our overall performance as well but I think I can give
some personal suggestions on given specific scenario since we already
did other solutions before. Just FYI to you.

Thanks,
Gao Xiang

> 
> Guenter

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-18  1:08 [PATCH] squashfs: Migrate from ll_rw_block usage to BIO Philippe Liard
                   ` (2 preceding siblings ...)
  2019-10-25  0:45 ` Philippe Liard
@ 2019-10-29  4:10 ` Philippe Liard
  2019-10-29  7:49   ` Christoph Hellwig
  3 siblings, 1 reply; 11+ messages in thread
From: Philippe Liard @ 2019-10-29  4:10 UTC (permalink / raw)
  To: phillip, hch; +Cc: linux-kernel, groeck, pliard

> > I don't see why you still need buffer_heads at all.  Basically if
> > you replace each of your allocated buffer heads with a simple page
> > allocation the code will be much simpler (this version adds more
> > than 100 lines of code!) and probaby still a bit faster as you
> > don't need the squashfs_bio_request allocation either.
>
> Thanks Christoph for taking a look. I like the idea of simplifying
> this if possible. I think I understand your suggestion in principle
> but I'm not seeing a way to apply it here. Would it be possible for
> you to be a little more specific? Let me try to explain this below.
> 
> My admittedly limited understanding is that using BIO indirectly
> requires buffer_head or an alternative including some
> synchronization mechanism at least.
> It's true that the bio_{alloc,add_page,submit}() functions don't
> require passing a buffer_head. However because bio_submit() is
> asynchronous AFAICT the client needs to use a synchronization
> mechanism to wait for and notify the completion of the request which
> buffer heads provide. This is achieved respectively by
> wait_on_buffer() and {set,clear}_buffer_uptodate().
> 
> Another dependency on buffer heads is the fact that
> squashfs_read_data() calls into other squashfs functions operating
> on buffer heads outside this file. For example squashfs_decompress()
> operates on a buffer_head array.
> 
> Given that bio_submit() is asynchronous I'm also not seeing how the
> squashfs_bio_request allocation can be removed? There can be
> multiple BIO requests in flight each needing to carry some context
> used on completion of the request.

Christoph, do you still think this could be simplified as you
suggested?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-29  4:10 ` Philippe Liard
@ 2019-10-29  7:49   ` Christoph Hellwig
  2019-10-30  1:19     ` Philippe Liard
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-10-29  7:49 UTC (permalink / raw)
  To: Philippe Liard; +Cc: phillip, hch, linux-kernel, groeck

FYI, the mail you quoted never made it to me..

On Tue, Oct 29, 2019 at 01:10:13PM +0900, Philippe Liard wrote:
> > My admittedly limited understanding is that using BIO indirectly
> > requires buffer_head or an alternative including some
> > synchronization mechanism at least.

What access do you need to synchronize?  If you read data into the
page cache the page lock provides all synchronization needed.  If
you just read into decrompression buffers there probably is no need
for synchronization at all as each buffer is only accessed by one
thread at a time.

> > It's true that the bio_{alloc,add_page,submit}() functions don't
> > require passing a buffer_head. However because bio_submit() is
> > asynchronous AFAICT the client needs to use a synchronization
> > mechanism to wait for and notify the completion of the request which
> > buffer heads provide. This is achieved respectively by
> > wait_on_buffer() and {set,clear}_buffer_uptodate().

submit_bio_wait() is synchronous and takes care of that for you.

> > Another dependency on buffer heads is the fact that
> > squashfs_read_data() calls into other squashfs functions operating
> > on buffer heads outside this file. For example squashfs_decompress()
> > operates on a buffer_head array.

All the decompressors do is accessing the, and then eventually doing
a bh_put.  So as a prep patch you can just pass them bio_vecs and
keep all the buffer head handling in data.c.  Initially that will be
a little less efficient as it requires two allocations, but as soon
as you kill off the buffer heads it actually becomes much better.

> > Given that bio_submit() is asynchronous I'm also not seeing how the
> > squashfs_bio_request allocation can be removed? There can be
> > multiple BIO requests in flight each needing to carry some context
> > used on completion of the request.
> 
> Christoph, do you still think this could be simplified as you
> suggested?

Yes.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-29  7:49   ` Christoph Hellwig
@ 2019-10-30  1:19     ` Philippe Liard
  2019-10-30 14:01       ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Philippe Liard @ 2019-10-30  1:19 UTC (permalink / raw)
  To: phillip, hch; +Cc: linux-kernel, groeck, pliard

> FYI, the mail you quoted never made it to me..
Sorry about that. That was my first time replying on the LKML and I
must have made a mistake when I invoked git send-email.

> On Tue, Oct 29, 2019 at 01:10:13PM +0900, Philippe Liard wrote:
> > > My admittedly limited understanding is that using BIO indirectly
> > > requires buffer_head or an alternative including some
> > > synchronization mechanism at least.
> 
> What access do you need to synchronize?  If you read data into the
> page cache the page lock provides all synchronization needed.  If
> you just read into decrompression buffers there probably is no need
> for synchronization at all as each buffer is only accessed by one
> thread at a time.
My main concern here was waiting for the BIO request to complete but
submit_bio_wait() that you pointed out below should address that.

> > > It's true that the bio_{alloc,add_page,submit}() functions don't
> > > require passing a buffer_head. However because bio_submit() is
> > > asynchronous AFAICT the client needs to use a synchronization
> > > mechanism to wait for and notify the completion of the request
> > > which buffer heads provide. This is achieved respectively by
> > > wait_on_buffer() and {set,clear}_buffer_uptodate().
> 
> submit_bio_wait() is synchronous and takes care of that for you.
Thanks, I should have noticed that.

> > > Another dependency on buffer heads is the fact that
> > > squashfs_read_data() calls into other squashfs functions operating
> > > on buffer heads outside this file. For example
> > > squashfs_decompress() operates on a buffer_head array.
> 
> All the decompressors do is accessing the, and then eventually doing
> a bh_put.  So as a prep patch you can just pass them bio_vecs and
> keep all the buffer head handling in data.c.  Initially that will be
> a little less efficient as it requires two allocations, but as soon
> as you kill off the buffer heads it actually becomes much better.
I will try that, possibily all as a single patch if it looks simple
enough so that there is no need to convert from buffer heads to
bio_vecs. Let me know though if you feel strongly about having this as
two patches.

> 
> > > Given that bio_submit() is asynchronous I'm also not seeing how
> > > the squashfs_bio_request allocation can be removed? There can be
> > > multiple BIO requests in flight each needing to carry some context
> > > used on completion of the request.
> >
> > Christoph, do you still think this could be simplified as you
> > suggested?
> 
> Yes.
Thanks for explaining all of this. I will make the changes you
suggested and will report back with a new patch.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] squashfs: Migrate from ll_rw_block usage to BIO
  2019-10-30  1:19     ` Philippe Liard
@ 2019-10-30 14:01       ` Christoph Hellwig
  0 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2019-10-30 14:01 UTC (permalink / raw)
  To: Philippe Liard; +Cc: phillip, hch, linux-kernel, groeck

On Wed, Oct 30, 2019 at 10:19:54AM +0900, Philippe Liard wrote:
> > What access do you need to synchronize?  If you read data into the
> > page cache the page lock provides all synchronization needed.  If
> > you just read into decrompression buffers there probably is no need
> > for synchronization at all as each buffer is only accessed by one
> > thread at a time.
> My main concern here was waiting for the BIO request to complete but
> submit_bio_wait() that you pointed out below should address that.

Note that if you are doing multiple bios for a single request using
submit_bio_wait might not be optimal.  In that case you probably want
a refcount and only complete when all of them are done, but by looking
at submit_bio_wait should get an idea how that works.  Alternatively look
at others users, e.g. __blkdev_direct_IO in fs/block_dev.c that already
support multiple bios.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-10-30 14:01 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-18  1:08 [PATCH] squashfs: Migrate from ll_rw_block usage to BIO Philippe Liard
2019-10-18 16:32 ` Christoph Hellwig
2019-10-24  1:23 ` Philippe Liard
2019-10-24  5:41   ` Gao Xiang
2019-10-25  0:45 ` Philippe Liard
2019-10-25  2:53   ` Gao Xiang
     [not found]     ` <CABXOdTeQTapfvKqGrqZME8JACeJhaHram_ZWk7ZZX2VWvYORaw@mail.gmail.com>
2019-10-25  3:12       ` Gao Xiang
2019-10-29  4:10 ` Philippe Liard
2019-10-29  7:49   ` Christoph Hellwig
2019-10-30  1:19     ` Philippe Liard
2019-10-30 14:01       ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).