All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/7] Compress path cleanups
@ 2019-03-12 15:20 Nikolay Borisov
  2019-03-12 15:20 ` [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async Nikolay Borisov
                   ` (7 more replies)
  0 siblings, 8 replies; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

Here is the v4 of the compress path cleanups, this version  incorporates 
feeback from v3, namely: 

* Use struct_size when calculating the size of the struct to allocated in 
cow_file_range_async

* Reinstated the comment about ihold in cow_file_range_async

* Renamed "struct async_chunk *" variables from async_cow to async_chunk

* Gave patch 1 a more explicit title. 

* added RB tags from Johanness to patch 3 and 4

Nikolay Borisov (7):
  btrfs: Preallocate chunks in cow_file_range_async
  btrfs: Rename async_cow to async_chunk
  btrfs: Remove fs_info from struct async_chunk
  btrfs: Make compress_file_range take only struct async_chunk
  btrfs: Replace clear_extent_bit with unlock_extent
  btrfs: Set iotree only once in submit_compressed_extents
  btrfs: Factor out common extent locking code in
    submit_compressed_extents

 fs/btrfs/inode.c | 177 ++++++++++++++++++++++++++---------------------
 1 file changed, 100 insertions(+), 77 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:35   ` Johannes Thumshirn
  2019-03-27 17:23   ` David Sterba
  2019-03-12 15:20 ` [PATCH 2/7] btrfs: Rename async_cow to async_chunk Nikolay Borisov
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

This commit changes the implementation of cow_file_range_async in order
to get rid of the BUG_ON in the middle of the loop. Additionally it
reworks the inner loop in the hopes of making it more understandable.

The idea is to make async_cow be a top-level structured, shared
amongst all chunks being sent for compression. This allows to perform
one memory allocation at the beginning and gracefully fail the IO if
there isn't enough memory. Now, each chunk is going to be described
by an async_chunk struct. It's the responsibility of the final chunk
to actually free the memory.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/inode.c | 105 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 71 insertions(+), 34 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 05bbfd02ea49..2c13915c6f71 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -366,7 +366,7 @@ struct async_extent {
 	struct list_head list;
 };
 
-struct async_cow {
+struct async_chunk {
 	struct inode *inode;
 	struct btrfs_fs_info *fs_info;
 	struct page *locked_page;
@@ -375,9 +375,15 @@ struct async_cow {
 	unsigned int write_flags;
 	struct list_head extents;
 	struct btrfs_work work;
+	atomic_t *pending;
+};
+
+struct async_cow {
+	atomic_t num_chunks; /* Number of chunks in flight */
+	struct async_chunk chunks[];
 };
 
-static noinline int add_async_extent(struct async_cow *cow,
+static noinline int add_async_extent(struct async_chunk *cow,
 				     u64 start, u64 ram_size,
 				     u64 compressed_size,
 				     struct page **pages,
@@ -447,7 +453,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
 static noinline void compress_file_range(struct inode *inode,
 					struct page *locked_page,
 					u64 start, u64 end,
-					struct async_cow *async_cow,
+					struct async_chunk *async_cow,
 					int *num_added)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -713,7 +719,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
  * queued.  We walk all the async extents created by compress_file_range
  * and send them down to the disk.
  */
-static noinline void submit_compressed_extents(struct async_cow *async_cow)
+static noinline void submit_compressed_extents(struct async_chunk *async_cow)
 {
 	struct inode *inode = async_cow->inode;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -1132,9 +1138,9 @@ static noinline int cow_file_range(struct inode *inode,
  */
 static noinline void async_cow_start(struct btrfs_work *work)
 {
-	struct async_cow *async_cow;
+	struct async_chunk *async_cow;
 	int num_added = 0;
-	async_cow = container_of(work, struct async_cow, work);
+	async_cow = container_of(work, struct async_chunk, work);
 
 	compress_file_range(async_cow->inode, async_cow->locked_page,
 			    async_cow->start, async_cow->end, async_cow,
@@ -1151,10 +1157,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
 static noinline void async_cow_submit(struct btrfs_work *work)
 {
 	struct btrfs_fs_info *fs_info;
-	struct async_cow *async_cow;
+	struct async_chunk *async_cow;
 	unsigned long nr_pages;
 
-	async_cow = container_of(work, struct async_cow, work);
+	async_cow = container_of(work, struct async_chunk, work);
 
 	fs_info = async_cow->fs_info;
 	nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
@@ -1177,11 +1183,16 @@ static noinline void async_cow_submit(struct btrfs_work *work)
 
 static noinline void async_cow_free(struct btrfs_work *work)
 {
-	struct async_cow *async_cow;
-	async_cow = container_of(work, struct async_cow, work);
+	struct async_chunk *async_cow;
+	async_cow = container_of(work, struct async_chunk, work);
 	if (async_cow->inode)
 		btrfs_add_delayed_iput(async_cow->inode);
-	kfree(async_cow);
+	/*
+	 * Since the pointer to 'pending' is at the beginning of the array of
+	 * async_cow's, freeing it ensures the whole array has been freed.
+	 */
+	if (atomic_dec_and_test(async_cow->pending))
+		kfree(async_cow->pending);
 }
 
 static int cow_file_range_async(struct inode *inode, struct page *locked_page,
@@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 				unsigned int write_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct async_cow *async_cow;
+	struct async_cow *ctx;
+	struct async_chunk *async_chunk;
 	unsigned long nr_pages;
 	u64 cur_end;
+	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
+	int i;
+	bool should_compress;
 
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
 			 1, 0, NULL);
-	while (start < end) {
-		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
-		BUG_ON(!async_cow); /* -ENOMEM */
+
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
+		num_chunks = 1;
+		should_compress = false;
+	} else {
+		should_compress = true;
+	}
+
+	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);
+	if (!ctx) {
+		unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
+			EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+			EXTENT_DO_ACCOUNTING;
+		unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+			PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
+			PAGE_SET_ERROR;
+		extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
+					     clear_bits, page_ops);
+		return -ENOMEM;
+	}
+
+	async_chunk = ctx->chunks;
+	atomic_set(&ctx->num_chunks, num_chunks);
+
+	for (i = 0; i < num_chunks; i++) {
+
+		if (should_compress)
+			cur_end = min(end, start + SZ_512K - 1);
+		else
+			cur_end = end;
+
 		/*
 		 * igrab is called higher up in the call chain, take only the
 		 * lightweight reference for the callback lifetime
 		 */
 		ihold(inode);
-		async_cow->inode = inode;
-		async_cow->fs_info = fs_info;
-		async_cow->locked_page = locked_page;
-		async_cow->start = start;
-		async_cow->write_flags = write_flags;
-
-		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
-		    !btrfs_test_opt(fs_info, FORCE_COMPRESS))
-			cur_end = end;
-		else
-			cur_end = min(end, start + SZ_512K - 1);
-
-		async_cow->end = cur_end;
-		INIT_LIST_HEAD(&async_cow->extents);
-
-		btrfs_init_work(&async_cow->work,
+		async_chunk[i].pending= &ctx->num_chunks;
+		async_chunk[i].inode = inode;
+		async_chunk[i].start = start;
+		async_chunk[i].end = cur_end;
+		async_chunk[i].fs_info = fs_info;
+		async_chunk[i].locked_page = locked_page;
+		async_chunk[i].write_flags = write_flags;
+		INIT_LIST_HEAD(&async_chunk[i].extents);
+
+		btrfs_init_work(&async_chunk[i].work,
 				btrfs_delalloc_helper,
 				async_cow_start, async_cow_submit,
 				async_cow_free);
 
-		nr_pages = (cur_end - start + PAGE_SIZE) >>
-			PAGE_SHIFT;
+		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
 		atomic_add(nr_pages, &fs_info->async_delalloc_pages);
 
-		btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
+		btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
 
 		*nr_written += nr_pages;
 		start = cur_end + 1;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/7] btrfs: Rename async_cow to async_chunk
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
  2019-03-12 15:20 ` [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:34   ` Johannes Thumshirn
  2019-03-12 15:20 ` [PATCH 3/7] btrfs: Remove fs_info from struct async_chunk Nikolay Borisov
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

Now that we have an explicit async_chunk struct rename references to
variables of this type to async_chunk. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/inode.c | 60 ++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c13915c6f71..be3777d03c80 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -453,7 +453,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
 static noinline void compress_file_range(struct inode *inode,
 					struct page *locked_page,
 					u64 start, u64 end,
-					struct async_chunk *async_cow,
+					struct async_chunk *async_chunk,
 					int *num_added)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -636,7 +636,7 @@ static noinline void compress_file_range(struct inode *inode,
 			 * allocation on disk for these compressed pages, and
 			 * will submit them to the elevator.
 			 */
-			add_async_extent(async_cow, start, total_in,
+			add_async_extent(async_chunk, start, total_in,
 					total_compressed, pages, nr_pages,
 					compress_type);
 
@@ -683,7 +683,7 @@ static noinline void compress_file_range(struct inode *inode,
 
 	if (redirty)
 		extent_range_redirty_for_io(inode, start, end);
-	add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+	add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
 			 BTRFS_COMPRESS_NONE);
 	*num_added += 1;
 
@@ -719,9 +719,9 @@ static void free_async_extent_pages(struct async_extent *async_extent)
  * queued.  We walk all the async extents created by compress_file_range
  * and send them down to the disk.
  */
-static noinline void submit_compressed_extents(struct async_chunk *async_cow)
+static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 {
-	struct inode *inode = async_cow->inode;
+	struct inode *inode = async_chunk->inode;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct async_extent *async_extent;
 	u64 alloc_hint = 0;
@@ -732,8 +732,8 @@ static noinline void submit_compressed_extents(struct async_chunk *async_cow)
 	int ret = 0;
 
 again:
-	while (!list_empty(&async_cow->extents)) {
-		async_extent = list_entry(async_cow->extents.next,
+	while (!list_empty(&async_chunk->extents)) {
+		async_extent = list_entry(async_chunk->extents.next,
 					  struct async_extent, list);
 		list_del(&async_extent->list);
 
@@ -750,7 +750,7 @@ static noinline void submit_compressed_extents(struct async_chunk *async_cow)
 					 async_extent->ram_size - 1);
 
 			/* allocate blocks */
-			ret = cow_file_range(inode, async_cow->locked_page,
+			ret = cow_file_range(inode, async_chunk->locked_page,
 					     async_extent->start,
 					     async_extent->start +
 					     async_extent->ram_size - 1,
@@ -774,7 +774,7 @@ static noinline void submit_compressed_extents(struct async_chunk *async_cow)
 						  async_extent->ram_size - 1,
 						  WB_SYNC_ALL);
 			else if (ret)
-				unlock_page(async_cow->locked_page);
+				unlock_page(async_chunk->locked_page);
 			kfree(async_extent);
 			cond_resched();
 			continue;
@@ -861,7 +861,7 @@ static noinline void submit_compressed_extents(struct async_chunk *async_cow)
 				    ins.objectid,
 				    ins.offset, async_extent->pages,
 				    async_extent->nr_pages,
-				    async_cow->write_flags)) {
+				    async_chunk->write_flags)) {
 			struct page *p = async_extent->pages[0];
 			const u64 start = async_extent->start;
 			const u64 end = start + async_extent->ram_size - 1;
@@ -1138,16 +1138,16 @@ static noinline int cow_file_range(struct inode *inode,
  */
 static noinline void async_cow_start(struct btrfs_work *work)
 {
-	struct async_chunk *async_cow;
+	struct async_chunk *async_chunk;
 	int num_added = 0;
-	async_cow = container_of(work, struct async_chunk, work);
+	async_chunk = container_of(work, struct async_chunk, work);
 
-	compress_file_range(async_cow->inode, async_cow->locked_page,
-			    async_cow->start, async_cow->end, async_cow,
+	compress_file_range(async_chunk->inode, async_chunk->locked_page,
+			    async_chunk->start, async_chunk->end, async_chunk,
 			    &num_added);
 	if (num_added == 0) {
-		btrfs_add_delayed_iput(async_cow->inode);
-		async_cow->inode = NULL;
+		btrfs_add_delayed_iput(async_chunk->inode);
+		async_chunk->inode = NULL;
 	}
 }
 
@@ -1157,13 +1157,13 @@ static noinline void async_cow_start(struct btrfs_work *work)
 static noinline void async_cow_submit(struct btrfs_work *work)
 {
 	struct btrfs_fs_info *fs_info;
-	struct async_chunk *async_cow;
+	struct async_chunk *async_chunk;
 	unsigned long nr_pages;
 
-	async_cow = container_of(work, struct async_chunk, work);
+	async_chunk = container_of(work, struct async_chunk, work);
 
-	fs_info = async_cow->fs_info;
-	nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+	fs_info = async_chunk->fs_info;
+	nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
 		PAGE_SHIFT;
 
 	/* atomic_sub_return implies a barrier */
@@ -1172,27 +1172,27 @@ static noinline void async_cow_submit(struct btrfs_work *work)
 		cond_wake_up_nomb(&fs_info->async_submit_wait);
 
 	/*
-	 * ->inode could be NULL if async_cow_start has failed to compress,
+	 * ->inode could be NULL if async_chunk_start has failed to compress,
 	 * in which case we don't have anything to submit, yet we need to
 	 * always adjust ->async_delalloc_pages as its paired with the init
 	 * happening in cow_file_range_async
 	 */
-	if (async_cow->inode)
-		submit_compressed_extents(async_cow);
+	if (async_chunk->inode)
+		submit_compressed_extents(async_chunk);
 }
 
 static noinline void async_cow_free(struct btrfs_work *work)
 {
-	struct async_chunk *async_cow;
-	async_cow = container_of(work, struct async_chunk, work);
-	if (async_cow->inode)
-		btrfs_add_delayed_iput(async_cow->inode);
+	struct async_chunk *async_chunk;
+	async_chunk = container_of(work, struct async_chunk, work);
+	if (async_chunk->inode)
+		btrfs_add_delayed_iput(async_chunk->inode);
 	/*
 	 * Since the pointer to 'pending' is at the beginning of the array of
-	 * async_cow's, freeing it ensures the whole array has been freed.
+	 * async_chunk's, freeing it ensures the whole array has been freed.
 	 */
-	if (atomic_dec_and_test(async_cow->pending))
-		kfree(async_cow->pending);
+	if (atomic_dec_and_test(async_chunk->pending))
+		kfree(async_chunk->pending);
 }
 
 static int cow_file_range_async(struct inode *inode, struct page *locked_page,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/7] btrfs: Remove fs_info from struct async_chunk
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
  2019-03-12 15:20 ` [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async Nikolay Borisov
  2019-03-12 15:20 ` [PATCH 2/7] btrfs: Rename async_cow to async_chunk Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:20 ` [PATCH 4/7] btrfs: Make compress_file_range take only " Nikolay Borisov
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

The associated btrfs_work already contains a reference to the fs_info so
use that instead of passing it via async_chunk. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 fs/btrfs/inode.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index be3777d03c80..2a9d24bc8b53 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -368,7 +368,6 @@ struct async_extent {
 
 struct async_chunk {
 	struct inode *inode;
-	struct btrfs_fs_info *fs_info;
 	struct page *locked_page;
 	u64 start;
 	u64 end;
@@ -1156,13 +1155,11 @@ static noinline void async_cow_start(struct btrfs_work *work)
  */
 static noinline void async_cow_submit(struct btrfs_work *work)
 {
-	struct btrfs_fs_info *fs_info;
-	struct async_chunk *async_chunk;
+	struct async_chunk *async_chunk = container_of(work, struct async_chunk,
+						     work);
+	struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
 	unsigned long nr_pages;
 
-	async_chunk = container_of(work, struct async_chunk, work);
-
-	fs_info = async_chunk->fs_info;
 	nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
 		PAGE_SHIFT;
 
@@ -1252,7 +1249,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 		async_chunk[i].inode = inode;
 		async_chunk[i].start = start;
 		async_chunk[i].end = cur_end;
-		async_chunk[i].fs_info = fs_info;
 		async_chunk[i].locked_page = locked_page;
 		async_chunk[i].write_flags = write_flags;
 		INIT_LIST_HEAD(&async_chunk[i].extents);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/7] btrfs: Make compress_file_range take only struct async_chunk
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
                   ` (2 preceding siblings ...)
  2019-03-12 15:20 ` [PATCH 3/7] btrfs: Remove fs_info from struct async_chunk Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:20 ` [PATCH 5/7] btrfs: Replace clear_extent_bit with unlock_extent Nikolay Borisov
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

All context this function needs is held within struct async_chunk.
Currently we not only pass the struct but also every individual member.
This is redundant, simplify it by only passing struct async_chunk and
leaving it to compress_file_range to extract the values it requires.
No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 fs/btrfs/inode.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a9d24bc8b53..df008aa195b4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -449,14 +449,14 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
  * are written in the same order that the flusher thread sent them
  * down.
  */
-static noinline void compress_file_range(struct inode *inode,
-					struct page *locked_page,
-					u64 start, u64 end,
-					struct async_chunk *async_chunk,
-					int *num_added)
+static noinline void compress_file_range(struct async_chunk *async_chunk,
+					 int *num_added)
 {
+	struct inode *inode = async_chunk->inode;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	u64 blocksize = fs_info->sectorsize;
+	u64 start = async_chunk->start;
+	u64 end = async_chunk->end;
 	u64 actual_end;
 	int ret = 0;
 	struct page **pages = NULL;
@@ -675,9 +675,9 @@ static noinline void compress_file_range(struct inode *inode,
 	 * to our extent and set things up for the async work queue to run
 	 * cow_file_range to do the normal delalloc dance.
 	 */
-	if (page_offset(locked_page) >= start &&
-	    page_offset(locked_page) <= end)
-		__set_page_dirty_nobuffers(locked_page);
+	if (page_offset(async_chunk->locked_page) >= start &&
+	    page_offset(async_chunk->locked_page) <= end)
+		__set_page_dirty_nobuffers(async_chunk->locked_page);
 		/* unlocked later on in the async handlers */
 
 	if (redirty)
@@ -1141,9 +1141,7 @@ static noinline void async_cow_start(struct btrfs_work *work)
 	int num_added = 0;
 	async_chunk = container_of(work, struct async_chunk, work);
 
-	compress_file_range(async_chunk->inode, async_chunk->locked_page,
-			    async_chunk->start, async_chunk->end, async_chunk,
-			    &num_added);
+	compress_file_range(async_chunk, &num_added);
 	if (num_added == 0) {
 		btrfs_add_delayed_iput(async_chunk->inode);
 		async_chunk->inode = NULL;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/7] btrfs: Replace clear_extent_bit with unlock_extent
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
                   ` (3 preceding siblings ...)
  2019-03-12 15:20 ` [PATCH 4/7] btrfs: Make compress_file_range take only " Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:27   ` Johannes Thumshirn
  2019-03-12 15:20 ` [PATCH 6/7] btrfs: Set iotree only once in submit_compressed_extents Nikolay Borisov
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index df008aa195b4..0123f7067c8a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1204,8 +1204,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 	int i;
 	bool should_compress;
 
-	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
-			 1, 0, NULL);
+	unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
 	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 6/7] btrfs: Set iotree only once in submit_compressed_extents
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
                   ` (4 preceding siblings ...)
  2019-03-12 15:20 ` [PATCH 5/7] btrfs: Replace clear_extent_bit with unlock_extent Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:30   ` Johannes Thumshirn
  2019-03-12 15:20 ` [PATCH 7/7] btrfs: Factor out common extent locking code " Nikolay Borisov
  2019-03-13 15:36 ` [PATCH v4 0/7] Compress path cleanups David Sterba
  7 siblings, 1 reply; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

The inode never changes so it's sufficient to dereference it and get
the iotree only once, before the execution of the main loop. No
functional changes, only the size of the function is decreased:

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-44 (-44)
Function                                     old     new   delta
submit_compressed_extents                   1240    1196     -44
Total: Before=88476, After=88432, chg -0.05%

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/inode.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0123f7067c8a..f5bd6fb840a5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -727,7 +727,7 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 	struct btrfs_key ins;
 	struct extent_map *em;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct extent_io_tree *io_tree;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	int ret = 0;
 
 again:
@@ -735,9 +735,6 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 		async_extent = list_entry(async_chunk->extents.next,
 					  struct async_extent, list);
 		list_del(&async_extent->list);
-
-		io_tree = &BTRFS_I(inode)->io_tree;
-
 retry:
 		/* did the compression code fall back to uncompressed IO? */
 		if (!async_extent->pages) {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 7/7] btrfs: Factor out common extent locking code in submit_compressed_extents
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
                   ` (5 preceding siblings ...)
  2019-03-12 15:20 ` [PATCH 6/7] btrfs: Set iotree only once in submit_compressed_extents Nikolay Borisov
@ 2019-03-12 15:20 ` Nikolay Borisov
  2019-03-12 15:31   ` Johannes Thumshirn
  2019-03-13 15:36 ` [PATCH v4 0/7] Compress path cleanups David Sterba
  7 siblings, 1 reply; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-12 15:20 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Nikolay Borisov

Irrespective of whether the compress code fell back to uncompressed or
a compressed extent has to be submitted, the extent range is always
locked. So factor out the common lock_extent call at the beginning of
the loop. No functional changes just removes one duplicate lock_extent
call.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/inode.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f5bd6fb840a5..9fd7cd453a54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -736,15 +736,14 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 					  struct async_extent, list);
 		list_del(&async_extent->list);
 retry:
+
+		lock_extent(io_tree, async_extent->start,
+			    async_extent->start + async_extent->ram_size - 1);
 		/* did the compression code fall back to uncompressed IO? */
 		if (!async_extent->pages) {
 			int page_started = 0;
 			unsigned long nr_written = 0;
 
-			lock_extent(io_tree, async_extent->start,
-					 async_extent->start +
-					 async_extent->ram_size - 1);
-
 			/* allocate blocks */
 			ret = cow_file_range(inode, async_chunk->locked_page,
 					     async_extent->start,
@@ -776,9 +775,6 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 			continue;
 		}
 
-		lock_extent(io_tree, async_extent->start,
-			    async_extent->start + async_extent->ram_size - 1);
-
 		ret = btrfs_reserve_extent(root, async_extent->ram_size,
 					   async_extent->compressed_size,
 					   async_extent->compressed_size,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/7] btrfs: Replace clear_extent_bit with unlock_extent
  2019-03-12 15:20 ` [PATCH 5/7] btrfs: Replace clear_extent_bit with unlock_extent Nikolay Borisov
@ 2019-03-12 15:27   ` Johannes Thumshirn
  0 siblings, 0 replies; 20+ messages in thread
From: Johannes Thumshirn @ 2019-03-12 15:27 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 6/7] btrfs: Set iotree only once in submit_compressed_extents
  2019-03-12 15:20 ` [PATCH 6/7] btrfs: Set iotree only once in submit_compressed_extents Nikolay Borisov
@ 2019-03-12 15:30   ` Johannes Thumshirn
  0 siblings, 0 replies; 20+ messages in thread
From: Johannes Thumshirn @ 2019-03-12 15:30 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 7/7] btrfs: Factor out common extent locking code in submit_compressed_extents
  2019-03-12 15:20 ` [PATCH 7/7] btrfs: Factor out common extent locking code " Nikolay Borisov
@ 2019-03-12 15:31   ` Johannes Thumshirn
  0 siblings, 0 replies; 20+ messages in thread
From: Johannes Thumshirn @ 2019-03-12 15:31 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/7] btrfs: Rename async_cow to async_chunk
  2019-03-12 15:20 ` [PATCH 2/7] btrfs: Rename async_cow to async_chunk Nikolay Borisov
@ 2019-03-12 15:34   ` Johannes Thumshirn
  0 siblings, 0 replies; 20+ messages in thread
From: Johannes Thumshirn @ 2019-03-12 15:34 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-12 15:20 ` [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async Nikolay Borisov
@ 2019-03-12 15:35   ` Johannes Thumshirn
  2019-03-27 17:23   ` David Sterba
  1 sibling, 0 replies; 20+ messages in thread
From: Johannes Thumshirn @ 2019-03-12 15:35 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v4 0/7] Compress path cleanups
  2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
                   ` (6 preceding siblings ...)
  2019-03-12 15:20 ` [PATCH 7/7] btrfs: Factor out common extent locking code " Nikolay Borisov
@ 2019-03-13 15:36 ` David Sterba
  7 siblings, 0 replies; 20+ messages in thread
From: David Sterba @ 2019-03-13 15:36 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: linux-btrfs

On Tue, Mar 12, 2019 at 05:20:23PM +0200, Nikolay Borisov wrote:
> Here is the v4 of the compress path cleanups, this version  incorporates 
> feeback from v3, namely: 
> 
> * Use struct_size when calculating the size of the struct to allocated in 
> cow_file_range_async
> 
> * Reinstated the comment about ihold in cow_file_range_async
> 
> * Renamed "struct async_chunk *" variables from async_cow to async_chunk
> 
> * Gave patch 1 a more explicit title. 
> 
> * added RB tags from Johanness to patch 3 and 4
> 
> Nikolay Borisov (7):
>   btrfs: Preallocate chunks in cow_file_range_async
>   btrfs: Rename async_cow to async_chunk
>   btrfs: Remove fs_info from struct async_chunk
>   btrfs: Make compress_file_range take only struct async_chunk
>   btrfs: Replace clear_extent_bit with unlock_extent
>   btrfs: Set iotree only once in submit_compressed_extents
>   btrfs: Factor out common extent locking code in
>     submit_compressed_extents

Overall looks good, I'll add it to misc-5.2 after some testing, thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-12 15:20 ` [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async Nikolay Borisov
  2019-03-12 15:35   ` Johannes Thumshirn
@ 2019-03-27 17:23   ` David Sterba
  2019-03-28 12:49     ` Nikolay Borisov
  1 sibling, 1 reply; 20+ messages in thread
From: David Sterba @ 2019-03-27 17:23 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: linux-btrfs

On Tue, Mar 12, 2019 at 05:20:24PM +0200, Nikolay Borisov wrote:
> @@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
>  				unsigned int write_flags)
>  {
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> -	struct async_cow *async_cow;
> +	struct async_cow *ctx;
> +	struct async_chunk *async_chunk;
>  	unsigned long nr_pages;
>  	u64 cur_end;
> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
> +	int i;
> +	bool should_compress;
>  
>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
>  			 1, 0, NULL);
> -	while (start < end) {
> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
> -		BUG_ON(!async_cow); /* -ENOMEM */
> +
> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
> +		num_chunks = 1;
> +		should_compress = false;
> +	} else {
> +		should_compress = true;
> +	}
> +
> +	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);

This leads to OOM due to high order allocation. And this is worse than
the previous state, where there are many small allocation that could
potentially fail (but most likely will not due to GFP_NOSF and size <
PAGE_SIZE).

So this needs to be reworked to avoid the costly allocations or reverted
to the previous state.

btrfs/138               [19:44:05][ 4034.368157] run fstests btrfs/138 at 2019-03-25 19:44:05
[ 4034.559716] BTRFS: device fsid 9300f07a-78f4-4ac6-8376-1a902ef26830 devid 1 transid 5 /dev/vdb
[ 4034.573670] BTRFS info (device vdb): disk space caching is enabled
[ 4034.575068] BTRFS info (device vdb): has skinny extents
[ 4034.576258] BTRFS info (device vdb): flagging fs with big metadata feature
[ 4034.580226] BTRFS info (device vdb): checking UUID tree
[ 4066.104734] BTRFS info (device vdb): disk space caching is enabled
[ 4066.108558] BTRFS info (device vdb): has skinny extents
[ 4066.186856] BTRFS info (device vdb): setting 8 feature flag
[ 4074.017307] BTRFS info (device vdb): disk space caching is enabled
[ 4074.019646] BTRFS info (device vdb): has skinny extents
[ 4074.065117] BTRFS info (device vdb): setting 16 feature flag
[ 4075.787401] kworker/u8:12: page allocation failure: order:4, mode:0x604040(GFP_NOFS|__GFP_COMP), nodemask=(null)
[ 4075.789581] CPU: 0 PID: 31258 Comm: kworker/u8:12 Not tainted 5.0.0-rc8-default+ #524
[ 4075.791235] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c89-prebuilt.qemu.org 04/01/2014
[ 4075.793334] Workqueue: writeback wb_workfn (flush-btrfs-718)
[ 4075.794455] Call Trace:
[ 4075.795029]  dump_stack+0x67/0x90
[ 4075.795756]  warn_alloc.cold.131+0x73/0xf3
[ 4075.796601]  __alloc_pages_slowpath+0xa0e/0xb50
[ 4075.797595]  ? __wake_up_common_lock+0x89/0xc0
[ 4075.798558]  __alloc_pages_nodemask+0x2bd/0x310
[ 4075.799537]  kmalloc_order+0x14/0x60
[ 4075.800382]  kmalloc_order_trace+0x1d/0x120
[ 4075.801341]  btrfs_run_delalloc_range+0x3e6/0x4b0 [btrfs]
[ 4075.802344]  writepage_delalloc+0xf8/0x150 [btrfs]
[ 4075.802991]  __extent_writepage+0x113/0x420 [btrfs]
[ 4075.803640]  extent_write_cache_pages+0x2a6/0x400 [btrfs]
[ 4075.804340]  extent_writepages+0x52/0xa0 [btrfs]
[ 4075.804951]  do_writepages+0x3e/0xe0
[ 4075.805480]  ? writeback_sb_inodes+0x133/0x550
[ 4075.806406]  __writeback_single_inode+0x54/0x640
[ 4075.807315]  writeback_sb_inodes+0x204/0x550
[ 4075.808112]  __writeback_inodes_wb+0x5d/0xb0
[ 4075.808692]  wb_writeback+0x337/0x4a0
[ 4075.809207]  wb_workfn+0x3a7/0x590
[ 4075.809849]  process_one_work+0x246/0x610
[ 4075.810665]  worker_thread+0x3c/0x390
[ 4075.811415]  ? rescuer_thread+0x360/0x360
[ 4075.812293]  kthread+0x116/0x130
[ 4075.812965]  ? kthread_create_on_node+0x60/0x60
[ 4075.813870]  ret_from_fork+0x24/0x30
[ 4075.814664] Mem-Info:
[ 4075.815167] active_anon:2942 inactive_anon:15105 isolated_anon:0
[ 4075.815167]  active_file:2749 inactive_file:454876 isolated_file:0
[ 4075.815167]  unevictable:0 dirty:68316 writeback:0 unstable:0
[ 4075.815167]  slab_reclaimable:5500 slab_unreclaimable:6458
[ 4075.815167]  mapped:940 shmem:15483 pagetables:51 bounce:0
[ 4075.815167]  free:7068 free_pcp:297 free_cma:0
[ 4075.823236] Node 0 active_anon:11768kB inactive_anon:60420kB active_file:10996kB inactive_file:1827676kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:3760kB dirty:277360kB writeback:0kB shmem:61932kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
[ 4075.828200] Node 0 DMA free:7860kB min:44kB low:56kB high:68kB active_anon:0kB inactive_anon:4kB active_file:0kB inactive_file:8012kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
[ 4075.834484] lowmem_reserve[]: 0 1955 1955 1955
[ 4075.835419] Node 0 DMA32 free:11292kB min:5632kB low:7632kB high:9632kB active_anon:11768kB inactive_anon:60416kB active_file:10996kB inactive_file:1820532kB unevictable:0kB writepending:281184kB present:2080568kB managed:2009324kB mlocked:0kB kernel_stack:1984kB pagetables:204kB bounce:0kB free_pcp:132kB local_pcp:0kB free_cma:0k 
[ 4075.841848] lowmem_reserve[]: 0 0 0 0
[ 4075.842677] Node 0 DMA: 1*4kB (U) 2*8kB (U) 4*16kB (UME) 5*32kB (UME) 1*64kB (E) 3*128kB (UME) 2*256kB (UE) 1*512kB (E) 2*1024kB (UE) 2*2048kB (ME) 0*4096kB = 7860kB
[ 4075.844961] Node 0 DMA32: 234*4kB (UME) 238*8kB (UME) 426*16kB (UM) 43*32kB (UM) 28*64kB (UM) 11*128kB (UM) 0*256kB 0*512kB 0*1024kB 1*2048kB (H) 0*4096kB = 16280kB
[ 4075.847915] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
[ 4075.849266] 474599 total pagecache pages
[ 4075.850058] 0 pages in swap cache
[ 4075.850808] Swap cache stats: add 0, delete 0, find 0/0
[ 4075.851990] Free swap  = 0kB
[ 4075.852811] Total swap = 0kB
[ 4075.853635] 524140 pages RAM
[ 4075.854351] 0 pages HighMem/MovableOnly
[ 4075.855048] 17832 pages reserved

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-27 17:23   ` David Sterba
@ 2019-03-28 12:49     ` Nikolay Borisov
  2019-03-28 12:52       ` Nikolay Borisov
  2019-03-28 14:11       ` David Sterba
  0 siblings, 2 replies; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-28 12:49 UTC (permalink / raw)
  To: dsterba, linux-btrfs



On 27.03.19 г. 19:23 ч., David Sterba wrote:
> On Tue, Mar 12, 2019 at 05:20:24PM +0200, Nikolay Borisov wrote:
>> @@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
>>  				unsigned int write_flags)
>>  {
>>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>> -	struct async_cow *async_cow;
>> +	struct async_cow *ctx;
>> +	struct async_chunk *async_chunk;
>>  	unsigned long nr_pages;
>>  	u64 cur_end;
>> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
>> +	int i;
>> +	bool should_compress;
>>  
>>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
>>  			 1, 0, NULL);
>> -	while (start < end) {
>> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
>> -		BUG_ON(!async_cow); /* -ENOMEM */
>> +
>> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
>> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
>> +		num_chunks = 1;
>> +		should_compress = false;
>> +	} else {
>> +		should_compress = true;
>> +	}
>> +
>> +	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);
> 
> This leads to OOM due to high order allocation. And this is worse than
> the previous state, where there are many small allocation that could
> potentially fail (but most likely will not due to GFP_NOSF and size <
> PAGE_SIZE).
> 
> So this needs to be reworked to avoid the costly allocations or reverted
> to the previous state.

Right, makes sense. In order to have a simplified submission logic I
think to rework the allocation to have a loop that allocates a single
item for every chunk or alternatively switch to using kvmalloc? I think
the fact that vmalloced memory might not be contiguous is not critical
for the metadata structures in this case?

> 
> btrfs/138               [19:44:05][ 4034.368157] run fstests btrfs/138 at 2019-03-25 19:44:05
> [ 4034.559716] BTRFS: device fsid 9300f07a-78f4-4ac6-8376-1a902ef26830 devid 1 transid 5 /dev/vdb
> [ 4034.573670] BTRFS info (device vdb): disk space caching is enabled
> [ 4034.575068] BTRFS info (device vdb): has skinny extents
> [ 4034.576258] BTRFS info (device vdb): flagging fs with big metadata feature
> [ 4034.580226] BTRFS info (device vdb): checking UUID tree
> [ 4066.104734] BTRFS info (device vdb): disk space caching is enabled
> [ 4066.108558] BTRFS info (device vdb): has skinny extents
> [ 4066.186856] BTRFS info (device vdb): setting 8 feature flag
> [ 4074.017307] BTRFS info (device vdb): disk space caching is enabled
> [ 4074.019646] BTRFS info (device vdb): has skinny extents
> [ 4074.065117] BTRFS info (device vdb): setting 16 feature flag
> [ 4075.787401] kworker/u8:12: page allocation failure: order:4, mode:0x604040(GFP_NOFS|__GFP_COMP), nodemask=(null)
> [ 4075.789581] CPU: 0 PID: 31258 Comm: kworker/u8:12 Not tainted 5.0.0-rc8-default+ #524
> [ 4075.791235] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c89-prebuilt.qemu.org 04/01/2014
> [ 4075.793334] Workqueue: writeback wb_workfn (flush-btrfs-718)
> [ 4075.794455] Call Trace:
> [ 4075.795029]  dump_stack+0x67/0x90
> [ 4075.795756]  warn_alloc.cold.131+0x73/0xf3
> [ 4075.796601]  __alloc_pages_slowpath+0xa0e/0xb50
> [ 4075.797595]  ? __wake_up_common_lock+0x89/0xc0
> [ 4075.798558]  __alloc_pages_nodemask+0x2bd/0x310
> [ 4075.799537]  kmalloc_order+0x14/0x60
> [ 4075.800382]  kmalloc_order_trace+0x1d/0x120
> [ 4075.801341]  btrfs_run_delalloc_range+0x3e6/0x4b0 [btrfs]
> [ 4075.802344]  writepage_delalloc+0xf8/0x150 [btrfs]
> [ 4075.802991]  __extent_writepage+0x113/0x420 [btrfs]
> [ 4075.803640]  extent_write_cache_pages+0x2a6/0x400 [btrfs]
> [ 4075.804340]  extent_writepages+0x52/0xa0 [btrfs]
> [ 4075.804951]  do_writepages+0x3e/0xe0
> [ 4075.805480]  ? writeback_sb_inodes+0x133/0x550
> [ 4075.806406]  __writeback_single_inode+0x54/0x640
> [ 4075.807315]  writeback_sb_inodes+0x204/0x550
> [ 4075.808112]  __writeback_inodes_wb+0x5d/0xb0
> [ 4075.808692]  wb_writeback+0x337/0x4a0
> [ 4075.809207]  wb_workfn+0x3a7/0x590
> [ 4075.809849]  process_one_work+0x246/0x610
> [ 4075.810665]  worker_thread+0x3c/0x390
> [ 4075.811415]  ? rescuer_thread+0x360/0x360
> [ 4075.812293]  kthread+0x116/0x130
> [ 4075.812965]  ? kthread_create_on_node+0x60/0x60
> [ 4075.813870]  ret_from_fork+0x24/0x30
> [ 4075.814664] Mem-Info:
> [ 4075.815167] active_anon:2942 inactive_anon:15105 isolated_anon:0
> [ 4075.815167]  active_file:2749 inactive_file:454876 isolated_file:0
> [ 4075.815167]  unevictable:0 dirty:68316 writeback:0 unstable:0
> [ 4075.815167]  slab_reclaimable:5500 slab_unreclaimable:6458
> [ 4075.815167]  mapped:940 shmem:15483 pagetables:51 bounce:0
> [ 4075.815167]  free:7068 free_pcp:297 free_cma:0
> [ 4075.823236] Node 0 active_anon:11768kB inactive_anon:60420kB active_file:10996kB inactive_file:1827676kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:3760kB dirty:277360kB writeback:0kB shmem:61932kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
> [ 4075.828200] Node 0 DMA free:7860kB min:44kB low:56kB high:68kB active_anon:0kB inactive_anon:4kB active_file:0kB inactive_file:8012kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
> [ 4075.834484] lowmem_reserve[]: 0 1955 1955 1955
> [ 4075.835419] Node 0 DMA32 free:11292kB min:5632kB low:7632kB high:9632kB active_anon:11768kB inactive_anon:60416kB active_file:10996kB inactive_file:1820532kB unevictable:0kB writepending:281184kB present:2080568kB managed:2009324kB mlocked:0kB kernel_stack:1984kB pagetables:204kB bounce:0kB free_pcp:132kB local_pcp:0kB free_cma:0k 
> [ 4075.841848] lowmem_reserve[]: 0 0 0 0
> [ 4075.842677] Node 0 DMA: 1*4kB (U) 2*8kB (U) 4*16kB (UME) 5*32kB (UME) 1*64kB (E) 3*128kB (UME) 2*256kB (UE) 1*512kB (E) 2*1024kB (UE) 2*2048kB (ME) 0*4096kB = 7860kB
> [ 4075.844961] Node 0 DMA32: 234*4kB (UME) 238*8kB (UME) 426*16kB (UM) 43*32kB (UM) 28*64kB (UM) 11*128kB (UM) 0*256kB 0*512kB 0*1024kB 1*2048kB (H) 0*4096kB = 16280kB
> [ 4075.847915] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
> [ 4075.849266] 474599 total pagecache pages
> [ 4075.850058] 0 pages in swap cache
> [ 4075.850808] Swap cache stats: add 0, delete 0, find 0/0
> [ 4075.851990] Free swap  = 0kB
> [ 4075.852811] Total swap = 0kB
> [ 4075.853635] 524140 pages RAM
> [ 4075.854351] 0 pages HighMem/MovableOnly
> [ 4075.855048] 17832 pages reserved
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-28 12:49     ` Nikolay Borisov
@ 2019-03-28 12:52       ` Nikolay Borisov
  2019-03-28 14:11       ` David Sterba
  1 sibling, 0 replies; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-28 12:52 UTC (permalink / raw)
  To: dsterba, linux-btrfs



On 28.03.19 г. 14:49 ч., Nikolay Borisov wrote:
> 
> 
> On 27.03.19 г. 19:23 ч., David Sterba wrote:
>> On Tue, Mar 12, 2019 at 05:20:24PM +0200, Nikolay Borisov wrote:
>>> @@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
>>>  				unsigned int write_flags)
>>>  {
>>>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>>> -	struct async_cow *async_cow;
>>> +	struct async_cow *ctx;
>>> +	struct async_chunk *async_chunk;
>>>  	unsigned long nr_pages;
>>>  	u64 cur_end;
>>> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
>>> +	int i;
>>> +	bool should_compress;
>>>  
>>>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
>>>  			 1, 0, NULL);
>>> -	while (start < end) {
>>> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
>>> -		BUG_ON(!async_cow); /* -ENOMEM */
>>> +
>>> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
>>> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
>>> +		num_chunks = 1;
>>> +		should_compress = false;
>>> +	} else {
>>> +		should_compress = true;
>>> +	}
>>> +
>>> +	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);
>>
>> This leads to OOM due to high order allocation. And this is worse than
>> the previous state, where there are many small allocation that could
>> potentially fail (but most likely will not due to GFP_NOSF and size <
>> PAGE_SIZE).
>>
>> So this needs to be reworked to avoid the costly allocations or reverted
>> to the previous state.
> 
> Right, makes sense. In order to have a simplified submission logic I
> think to rework the allocation to have a loop that allocates a single
> item for every chunk or alternatively switch to using kvmalloc? I think
> the fact that vmalloced memory might not be contiguous is not critical
> for the metadata structures in this case?

Just had a quick read through gfp_mask-from-fs-io.rst:

vmalloc doesn't support GFP_NOFS semantic because there are hardcoded

GFP_KERNEL allocations deep inside the allocator which are quite
non-trivial
to fix up. That means that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO is

almost always a bug. The good news is that the NOFS/NOIO semantic can be

achieved by the scope API.


> 
>>
>> btrfs/138               [19:44:05][ 4034.368157] run fstests btrfs/138 at 2019-03-25 19:44:05
>> [ 4034.559716] BTRFS: device fsid 9300f07a-78f4-4ac6-8376-1a902ef26830 devid 1 transid 5 /dev/vdb
>> [ 4034.573670] BTRFS info (device vdb): disk space caching is enabled
>> [ 4034.575068] BTRFS info (device vdb): has skinny extents
>> [ 4034.576258] BTRFS info (device vdb): flagging fs with big metadata feature
>> [ 4034.580226] BTRFS info (device vdb): checking UUID tree
>> [ 4066.104734] BTRFS info (device vdb): disk space caching is enabled
>> [ 4066.108558] BTRFS info (device vdb): has skinny extents
>> [ 4066.186856] BTRFS info (device vdb): setting 8 feature flag
>> [ 4074.017307] BTRFS info (device vdb): disk space caching is enabled
>> [ 4074.019646] BTRFS info (device vdb): has skinny extents
>> [ 4074.065117] BTRFS info (device vdb): setting 16 feature flag
>> [ 4075.787401] kworker/u8:12: page allocation failure: order:4, mode:0x604040(GFP_NOFS|__GFP_COMP), nodemask=(null)
>> [ 4075.789581] CPU: 0 PID: 31258 Comm: kworker/u8:12 Not tainted 5.0.0-rc8-default+ #524
>> [ 4075.791235] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c89-prebuilt.qemu.org 04/01/2014
>> [ 4075.793334] Workqueue: writeback wb_workfn (flush-btrfs-718)
>> [ 4075.794455] Call Trace:
>> [ 4075.795029]  dump_stack+0x67/0x90
>> [ 4075.795756]  warn_alloc.cold.131+0x73/0xf3
>> [ 4075.796601]  __alloc_pages_slowpath+0xa0e/0xb50
>> [ 4075.797595]  ? __wake_up_common_lock+0x89/0xc0
>> [ 4075.798558]  __alloc_pages_nodemask+0x2bd/0x310
>> [ 4075.799537]  kmalloc_order+0x14/0x60
>> [ 4075.800382]  kmalloc_order_trace+0x1d/0x120
>> [ 4075.801341]  btrfs_run_delalloc_range+0x3e6/0x4b0 [btrfs]
>> [ 4075.802344]  writepage_delalloc+0xf8/0x150 [btrfs]
>> [ 4075.802991]  __extent_writepage+0x113/0x420 [btrfs]
>> [ 4075.803640]  extent_write_cache_pages+0x2a6/0x400 [btrfs]
>> [ 4075.804340]  extent_writepages+0x52/0xa0 [btrfs]
>> [ 4075.804951]  do_writepages+0x3e/0xe0
>> [ 4075.805480]  ? writeback_sb_inodes+0x133/0x550
>> [ 4075.806406]  __writeback_single_inode+0x54/0x640
>> [ 4075.807315]  writeback_sb_inodes+0x204/0x550
>> [ 4075.808112]  __writeback_inodes_wb+0x5d/0xb0
>> [ 4075.808692]  wb_writeback+0x337/0x4a0
>> [ 4075.809207]  wb_workfn+0x3a7/0x590
>> [ 4075.809849]  process_one_work+0x246/0x610
>> [ 4075.810665]  worker_thread+0x3c/0x390
>> [ 4075.811415]  ? rescuer_thread+0x360/0x360
>> [ 4075.812293]  kthread+0x116/0x130
>> [ 4075.812965]  ? kthread_create_on_node+0x60/0x60
>> [ 4075.813870]  ret_from_fork+0x24/0x30
>> [ 4075.814664] Mem-Info:
>> [ 4075.815167] active_anon:2942 inactive_anon:15105 isolated_anon:0
>> [ 4075.815167]  active_file:2749 inactive_file:454876 isolated_file:0
>> [ 4075.815167]  unevictable:0 dirty:68316 writeback:0 unstable:0
>> [ 4075.815167]  slab_reclaimable:5500 slab_unreclaimable:6458
>> [ 4075.815167]  mapped:940 shmem:15483 pagetables:51 bounce:0
>> [ 4075.815167]  free:7068 free_pcp:297 free_cma:0
>> [ 4075.823236] Node 0 active_anon:11768kB inactive_anon:60420kB active_file:10996kB inactive_file:1827676kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:3760kB dirty:277360kB writeback:0kB shmem:61932kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
>> [ 4075.828200] Node 0 DMA free:7860kB min:44kB low:56kB high:68kB active_anon:0kB inactive_anon:4kB active_file:0kB inactive_file:8012kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
>> [ 4075.834484] lowmem_reserve[]: 0 1955 1955 1955
>> [ 4075.835419] Node 0 DMA32 free:11292kB min:5632kB low:7632kB high:9632kB active_anon:11768kB inactive_anon:60416kB active_file:10996kB inactive_file:1820532kB unevictable:0kB writepending:281184kB present:2080568kB managed:2009324kB mlocked:0kB kernel_stack:1984kB pagetables:204kB bounce:0kB free_pcp:132kB local_pcp:0kB free_cma:0k 
>> [ 4075.841848] lowmem_reserve[]: 0 0 0 0
>> [ 4075.842677] Node 0 DMA: 1*4kB (U) 2*8kB (U) 4*16kB (UME) 5*32kB (UME) 1*64kB (E) 3*128kB (UME) 2*256kB (UE) 1*512kB (E) 2*1024kB (UE) 2*2048kB (ME) 0*4096kB = 7860kB
>> [ 4075.844961] Node 0 DMA32: 234*4kB (UME) 238*8kB (UME) 426*16kB (UM) 43*32kB (UM) 28*64kB (UM) 11*128kB (UM) 0*256kB 0*512kB 0*1024kB 1*2048kB (H) 0*4096kB = 16280kB
>> [ 4075.847915] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
>> [ 4075.849266] 474599 total pagecache pages
>> [ 4075.850058] 0 pages in swap cache
>> [ 4075.850808] Swap cache stats: add 0, delete 0, find 0/0
>> [ 4075.851990] Free swap  = 0kB
>> [ 4075.852811] Total swap = 0kB
>> [ 4075.853635] 524140 pages RAM
>> [ 4075.854351] 0 pages HighMem/MovableOnly
>> [ 4075.855048] 17832 pages reserved
>>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-28 12:49     ` Nikolay Borisov
  2019-03-28 12:52       ` Nikolay Borisov
@ 2019-03-28 14:11       ` David Sterba
  2019-03-28 15:10         ` Nikolay Borisov
  1 sibling, 1 reply; 20+ messages in thread
From: David Sterba @ 2019-03-28 14:11 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: dsterba, linux-btrfs

On Thu, Mar 28, 2019 at 02:49:30PM +0200, Nikolay Borisov wrote:
> 
> 
> On 27.03.19 г. 19:23 ч., David Sterba wrote:
> > On Tue, Mar 12, 2019 at 05:20:24PM +0200, Nikolay Borisov wrote:
> >> @@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
> >>  				unsigned int write_flags)
> >>  {
> >>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> >> -	struct async_cow *async_cow;
> >> +	struct async_cow *ctx;
> >> +	struct async_chunk *async_chunk;
> >>  	unsigned long nr_pages;
> >>  	u64 cur_end;
> >> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
> >> +	int i;
> >> +	bool should_compress;
> >>  
> >>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
> >>  			 1, 0, NULL);
> >> -	while (start < end) {
> >> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
> >> -		BUG_ON(!async_cow); /* -ENOMEM */
> >> +
> >> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> >> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
> >> +		num_chunks = 1;
> >> +		should_compress = false;
> >> +	} else {
> >> +		should_compress = true;
> >> +	}
> >> +
> >> +	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);
> > 
> > This leads to OOM due to high order allocation. And this is worse than
> > the previous state, where there are many small allocation that could
> > potentially fail (but most likely will not due to GFP_NOSF and size <
> > PAGE_SIZE).
> > 
> > So this needs to be reworked to avoid the costly allocations or reverted
> > to the previous state.
> 
> Right, makes sense. In order to have a simplified submission logic I
> think to rework the allocation to have a loop that allocates a single
> item for every chunk or alternatively switch to using kvmalloc? I think
> the fact that vmalloced memory might not be contiguous is not critical
> for the metadata structures in this case?

kvmalloc would work here, though there is some overhead involved
compared to bare kmalloc (the mappings). As the call happens on writeout
path, I'd be cautious about unnecessary overhead.

If looping over the range works, then we can allocate the largest size
that does not require kvmalloc (PAGE_ALLOC_COSTLY_ORDER) and then reuse
it for each iteration.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-28 14:11       ` David Sterba
@ 2019-03-28 15:10         ` Nikolay Borisov
  2019-03-28 15:45           ` David Sterba
  0 siblings, 1 reply; 20+ messages in thread
From: Nikolay Borisov @ 2019-03-28 15:10 UTC (permalink / raw)
  To: dsterba, linux-btrfs



On 28.03.19 г. 16:11 ч., David Sterba wrote:
> On Thu, Mar 28, 2019 at 02:49:30PM +0200, Nikolay Borisov wrote:
>>
>>
>> On 27.03.19 г. 19:23 ч., David Sterba wrote:
>>> On Tue, Mar 12, 2019 at 05:20:24PM +0200, Nikolay Borisov wrote:
>>>> @@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
>>>>  				unsigned int write_flags)
>>>>  {
>>>>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>>>> -	struct async_cow *async_cow;
>>>> +	struct async_cow *ctx;
>>>> +	struct async_chunk *async_chunk;
>>>>  	unsigned long nr_pages;
>>>>  	u64 cur_end;
>>>> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
>>>> +	int i;
>>>> +	bool should_compress;
>>>>  
>>>>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
>>>>  			 1, 0, NULL);
>>>> -	while (start < end) {
>>>> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
>>>> -		BUG_ON(!async_cow); /* -ENOMEM */
>>>> +
>>>> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
>>>> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
>>>> +		num_chunks = 1;
>>>> +		should_compress = false;
>>>> +	} else {
>>>> +		should_compress = true;
>>>> +	}
>>>> +
>>>> +	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);
>>>
>>> This leads to OOM due to high order allocation. And this is worse than
>>> the previous state, where there are many small allocation that could
>>> potentially fail (but most likely will not due to GFP_NOSF and size <
>>> PAGE_SIZE).
>>>
>>> So this needs to be reworked to avoid the costly allocations or reverted
>>> to the previous state.
>>
>> Right, makes sense. In order to have a simplified submission logic I
>> think to rework the allocation to have a loop that allocates a single
>> item for every chunk or alternatively switch to using kvmalloc? I think
>> the fact that vmalloced memory might not be contiguous is not critical
>> for the metadata structures in this case?
> 
> kvmalloc would work here, though there is some overhead involved
> compared to bare kmalloc (the mappings). As the call happens on writeout
> path, I'd be cautious about unnecessary overhead.
> 
> If looping over the range works, then we can allocate the largest size
> that does not require kvmalloc (PAGE_ALLOC_COSTLY_ORDER) and then reuse
> it for each iteration.

I'm afraid I don't understand your hybrid suggestion. Ie something along 
the lines of : 

if (struct_size(ctx, chunks, num_chunks) < ((2 << PAGE_ALLOC_COSTLY_ORDER) * PAGE_SIZE))  {
     use kmalloc
} else { 

????
}

> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async
  2019-03-28 15:10         ` Nikolay Borisov
@ 2019-03-28 15:45           ` David Sterba
  0 siblings, 0 replies; 20+ messages in thread
From: David Sterba @ 2019-03-28 15:45 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: dsterba, linux-btrfs

On Thu, Mar 28, 2019 at 05:10:39PM +0200, Nikolay Borisov wrote:
> 
> 
> On 28.03.19 г. 16:11 ч., David Sterba wrote:
> > On Thu, Mar 28, 2019 at 02:49:30PM +0200, Nikolay Borisov wrote:
> >>
> >>
> >> On 27.03.19 г. 19:23 ч., David Sterba wrote:
> >>> On Tue, Mar 12, 2019 at 05:20:24PM +0200, Nikolay Borisov wrote:
> >>>> @@ -1190,45 +1201,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
> >>>>  				unsigned int write_flags)
> >>>>  {
> >>>>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> >>>> -	struct async_cow *async_cow;
> >>>> +	struct async_cow *ctx;
> >>>> +	struct async_chunk *async_chunk;
> >>>>  	unsigned long nr_pages;
> >>>>  	u64 cur_end;
> >>>> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
> >>>> +	int i;
> >>>> +	bool should_compress;
> >>>>  
> >>>>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
> >>>>  			 1, 0, NULL);
> >>>> -	while (start < end) {
> >>>> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
> >>>> -		BUG_ON(!async_cow); /* -ENOMEM */
> >>>> +
> >>>> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> >>>> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
> >>>> +		num_chunks = 1;
> >>>> +		should_compress = false;
> >>>> +	} else {
> >>>> +		should_compress = true;
> >>>> +	}
> >>>> +
> >>>> +	ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS);
> >>>
> >>> This leads to OOM due to high order allocation. And this is worse than
> >>> the previous state, where there are many small allocation that could
> >>> potentially fail (but most likely will not due to GFP_NOSF and size <
> >>> PAGE_SIZE).
> >>>
> >>> So this needs to be reworked to avoid the costly allocations or reverted
> >>> to the previous state.
> >>
> >> Right, makes sense. In order to have a simplified submission logic I
> >> think to rework the allocation to have a loop that allocates a single
> >> item for every chunk or alternatively switch to using kvmalloc? I think
> >> the fact that vmalloced memory might not be contiguous is not critical
> >> for the metadata structures in this case?
> > 
> > kvmalloc would work here, though there is some overhead involved
> > compared to bare kmalloc (the mappings). As the call happens on writeout
> > path, I'd be cautious about unnecessary overhead.
> > 
> > If looping over the range works, then we can allocate the largest size
> > that does not require kvmalloc (PAGE_ALLOC_COSTLY_ORDER) and then reuse
> > it for each iteration.
> 
> I'm afraid I don't understand your hybrid suggestion. Ie something along 
> the lines of : 

The idea was to split the range to several chunks, allocate buffer and
the reuse the memory, but this does not work as each submission requires
its own memory.  So let's do kvmalloc for now.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-03-28 15:44 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-12 15:20 [PATCH v4 0/7] Compress path cleanups Nikolay Borisov
2019-03-12 15:20 ` [PATCH 1/7] btrfs: Preallocate chunks in cow_file_range_async Nikolay Borisov
2019-03-12 15:35   ` Johannes Thumshirn
2019-03-27 17:23   ` David Sterba
2019-03-28 12:49     ` Nikolay Borisov
2019-03-28 12:52       ` Nikolay Borisov
2019-03-28 14:11       ` David Sterba
2019-03-28 15:10         ` Nikolay Borisov
2019-03-28 15:45           ` David Sterba
2019-03-12 15:20 ` [PATCH 2/7] btrfs: Rename async_cow to async_chunk Nikolay Borisov
2019-03-12 15:34   ` Johannes Thumshirn
2019-03-12 15:20 ` [PATCH 3/7] btrfs: Remove fs_info from struct async_chunk Nikolay Borisov
2019-03-12 15:20 ` [PATCH 4/7] btrfs: Make compress_file_range take only " Nikolay Borisov
2019-03-12 15:20 ` [PATCH 5/7] btrfs: Replace clear_extent_bit with unlock_extent Nikolay Borisov
2019-03-12 15:27   ` Johannes Thumshirn
2019-03-12 15:20 ` [PATCH 6/7] btrfs: Set iotree only once in submit_compressed_extents Nikolay Borisov
2019-03-12 15:30   ` Johannes Thumshirn
2019-03-12 15:20 ` [PATCH 7/7] btrfs: Factor out common extent locking code " Nikolay Borisov
2019-03-12 15:31   ` Johannes Thumshirn
2019-03-13 15:36 ` [PATCH v4 0/7] Compress path cleanups David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.