All of lore.kernel.org
 help / color / mirror / Atom feed
From: Naohiro Aota <naohiro.aota@wdc.com>
To: linux-btrfs@vger.kernel.org, dsterba@suse.com
Cc: hare@suse.com, linux-fsdevel@vger.kernel.org,
	Jens Axboe <axboe@kernel.dk>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Naohiro Aota <naohiro.aota@wdc.com>
Subject: [PATCH v11 15/40] btrfs: redirty released extent buffers in ZONED mode
Date: Tue, 22 Dec 2020 12:49:08 +0900	[thread overview]
Message-ID: <530bf9339d499c4f2209baeca7769a1c32a245bc.1608608848.git.naohiro.aota@wdc.com> (raw)
In-Reply-To: <06add214bc16ef08214de1594ecdfcc4cdcdbd78.1608608848.git.naohiro.aota@wdc.com>

Tree manipulating operations like merging nodes often release
once-allocated tree nodes. Btrfs cleans such nodes so that pages in the
node are not uselessly written out. On ZONED volumes, however, such
optimization blocks the following IOs as the cancellation of the write out
of the freed blocks breaks the sequential write sequence expected by the
device.

This patch introduces a list of clean and unwritten extent buffers that
have been released in a transaction. Btrfs redirty the buffer so that
btree_write_cache_pages() can send proper bios to the devices.

Besides it clears the entire content of the extent buffer not to confuse
raw block scanners e.g. btrfsck. By clearing the content,
csum_dirty_buffer() complains about bytenr mismatch, so avoid the checking
and checksum using newly introduced buffer flag EXTENT_BUFFER_NO_CHECK.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/disk-io.c     |  8 ++++++++
 fs/btrfs/extent-tree.c | 12 +++++++++++-
 fs/btrfs/extent_io.c   |  4 ++++
 fs/btrfs/extent_io.h   |  2 ++
 fs/btrfs/transaction.c | 10 ++++++++++
 fs/btrfs/transaction.h |  3 +++
 fs/btrfs/tree-log.c    |  6 ++++++
 fs/btrfs/zoned.c       | 37 +++++++++++++++++++++++++++++++++++++
 fs/btrfs/zoned.h       |  7 +++++++
 9 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 192e366f8afc..e9b6c6a21681 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -459,6 +459,12 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec
 		return 0;
 
 	found_start = btrfs_header_bytenr(eb);
+
+	if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
+		WARN_ON(found_start != 0);
+		return 0;
+	}
+
 	/*
 	 * Please do not consolidate these warnings into a single if.
 	 * It is useful to know what went wrong.
@@ -4697,6 +4703,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
 				     EXTENT_DIRTY);
 	btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
 
+	btrfs_free_redirty_list(cur_trans);
+
 	cur_trans->state =TRANS_STATE_COMPLETED;
 	wake_up(&cur_trans->commit_wait);
 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 88e103451aca..c3e955bbd2ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3374,8 +3374,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
 		if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
 			ret = check_ref_cleanup(trans, buf->start);
-			if (!ret)
+			if (!ret) {
+				btrfs_redirty_list_add(trans->transaction, buf);
 				goto out;
+			}
 		}
 
 		pin = 0;
@@ -3387,6 +3389,13 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 			goto out;
 		}
 
+		if (btrfs_is_zoned(fs_info)) {
+			btrfs_redirty_list_add(trans->transaction, buf);
+			pin_down_extent(trans, cache, buf->start, buf->len, 1);
+			btrfs_put_block_group(cache);
+			goto out;
+		}
+
 		WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
 		btrfs_add_free_space(cache, buf->start, buf->len);
@@ -4726,6 +4735,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	__btrfs_tree_lock(buf, nest);
 	btrfs_clean_tree_block(buf);
 	clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
+	clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
 
 	set_extent_buffer_uptodate(buf);
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6e3b72e63e42..129d571a5c1a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -24,6 +24,7 @@
 #include "rcu-string.h"
 #include "backref.h"
 #include "disk-io.h"
+#include "zoned.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -5048,6 +5049,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
 
 	btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
 			     &fs_info->allocated_ebs);
+	INIT_LIST_HEAD(&eb->release_list);
 
 	spin_lock_init(&eb->refs_lock);
 	atomic_set(&eb->refs, 1);
@@ -5825,6 +5827,8 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 	char *src = (char *)srcv;
 	unsigned long i = get_eb_page_index(start);
 
+	WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
+
 	if (check_eb_range(eb, start, len))
 		return;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 19221095c635..5a81268c4d8c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -31,6 +31,7 @@ enum {
 	EXTENT_BUFFER_IN_TREE,
 	/* write IO error */
 	EXTENT_BUFFER_WRITE_ERR,
+	EXTENT_BUFFER_NO_CHECK,
 };
 
 /* these are flags for __process_pages_contig */
@@ -93,6 +94,7 @@ struct extent_buffer {
 	struct rw_semaphore lock;
 
 	struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
+	struct list_head release_list;
 #ifdef CONFIG_BTRFS_DEBUG
 	struct list_head leak_list;
 #endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4ffe66164fa3..ce480fe78531 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -21,6 +21,7 @@
 #include "qgroup.h"
 #include "block-group.h"
 #include "space-info.h"
+#include "zoned.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
@@ -375,6 +376,8 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
 	spin_lock_init(&cur_trans->dirty_bgs_lock);
 	INIT_LIST_HEAD(&cur_trans->deleted_bgs);
 	spin_lock_init(&cur_trans->dropped_roots_lock);
+	INIT_LIST_HEAD(&cur_trans->releasing_ebs);
+	spin_lock_init(&cur_trans->releasing_ebs_lock);
 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
 	extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
 			IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
@@ -2344,6 +2347,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 		goto scrub_continue;
 	}
 
+	/*
+	 * At this point, we should have written all the tree blocks
+	 * allocated in this transaction. So it's now safe to free the
+	 * redirtyied extent buffers.
+	 */
+	btrfs_free_redirty_list(cur_trans);
+
 	ret = write_all_supers(fs_info, 0);
 	/*
 	 * the super is written, we can safely allow the tree-loggers
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 31ca81bad822..660b4e1f1181 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -92,6 +92,9 @@ struct btrfs_transaction {
 	 */
 	atomic_t pending_ordered;
 	wait_queue_head_t pending_wait;
+
+	spinlock_t releasing_ebs_lock;
+	struct list_head releasing_ebs;
 };
 
 #define __TRANS_FREEZABLE	(1U << 0)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8ee0700a980f..930e752686b4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -19,6 +19,7 @@
 #include "qgroup.h"
 #include "block-group.h"
 #include "space-info.h"
+#include "zoned.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -2752,6 +2753,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 						free_extent_buffer(next);
 						return ret;
 					}
+					btrfs_redirty_list_add(
+						trans->transaction, next);
 				} else {
 					if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
 						clear_extent_buffer_dirty(next);
@@ -3296,6 +3299,9 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 	clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
 			  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
 	extent_io_tree_release(&log->log_csum_range);
+
+	if (trans && log->node)
+		btrfs_redirty_list_add(trans->transaction, log->node);
 	btrfs_put_root(log);
 }
 
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 02373a7433b8..73e083a86213 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -10,6 +10,7 @@
 #include "rcu-string.h"
 #include "disk-io.h"
 #include "block-group.h"
+#include "transaction.h"
 
 /* Maximum number of zones to report per blkdev_report_zones() call */
 #define BTRFS_REPORT_NR_ZONES   4096
@@ -1151,3 +1152,39 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
 	 */
 	btrfs_free_excluded_extents(cache);
 }
+
+void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+			    struct extent_buffer *eb)
+{
+	struct btrfs_fs_info *fs_info = eb->fs_info;
+
+	if (!btrfs_is_zoned(fs_info) ||
+	    btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) ||
+	    !list_empty(&eb->release_list))
+		return;
+
+	set_extent_buffer_dirty(eb);
+	set_extent_bits_nowait(&trans->dirty_pages, eb->start,
+			       eb->start + eb->len - 1, EXTENT_DIRTY);
+	memzero_extent_buffer(eb, 0, eb->len);
+	set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
+
+	spin_lock(&trans->releasing_ebs_lock);
+	list_add_tail(&eb->release_list, &trans->releasing_ebs);
+	spin_unlock(&trans->releasing_ebs_lock);
+	atomic_inc(&eb->refs);
+}
+
+void btrfs_free_redirty_list(struct btrfs_transaction *trans)
+{
+	spin_lock(&trans->releasing_ebs_lock);
+	while (!list_empty(&trans->releasing_ebs)) {
+		struct extent_buffer *eb;
+
+		eb = list_first_entry(&trans->releasing_ebs,
+				      struct extent_buffer, release_list);
+		list_del_init(&eb->release_list);
+		free_extent_buffer(eb);
+	}
+	spin_unlock(&trans->releasing_ebs_lock);
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 0cc0b27e9437..b2ce16de0c22 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -43,6 +43,9 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
 int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size);
 int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
 void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
+void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+			    struct extent_buffer *eb);
+void btrfs_free_redirty_list(struct btrfs_transaction *trans);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
 				     struct blk_zone *zone)
@@ -127,6 +130,10 @@ static inline int btrfs_load_block_group_zone_info(
 
 static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
 
+static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+					  struct extent_buffer *eb) { }
+static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
-- 
2.27.0


  parent reply	other threads:[~2020-12-22  3:53 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-22  3:48 [PATCH v11 00/40] btrfs: zoned block device support Naohiro Aota
2020-12-22  3:48 ` [PATCH v11 01/40] block: add bio_add_zone_append_page Naohiro Aota
2020-12-22  3:48   ` [PATCH v11 02/40] iomap: support REQ_OP_ZONE_APPEND Naohiro Aota
2021-01-04 22:30     ` Darrick J. Wong
2021-01-13  9:34       ` Johannes Thumshirn
2020-12-22  3:48   ` [PATCH v11 03/40] btrfs: defer loading zone info after opening trees Naohiro Aota
2021-01-11 19:16     ` Josef Bacik
2020-12-22  3:48   ` [PATCH v11 04/40] btrfs: change superblock location on conventional zone Naohiro Aota
2021-01-11 19:47     ` Josef Bacik
2021-01-14 15:10       ` Naohiro Aota
2020-12-22  3:48   ` [PATCH v11 05/40] btrfs: release path before calling into btrfs_load_block_group_zone_info Naohiro Aota
2021-01-11 20:01     ` Josef Bacik
2021-01-12  8:05       ` Johannes Thumshirn
2020-12-22  3:48   ` [PATCH v11 06/40] btrfs: do not load fs_info->zoned from incompat flag Naohiro Aota
2021-01-11 20:08     ` Josef Bacik
2021-01-12  8:00       ` Johannes Thumshirn
2020-12-22  3:49   ` [PATCH v11 07/40] btrfs: disallow fitrim in ZONED mode Naohiro Aota
2021-01-11 20:12     ` Josef Bacik
2021-01-12 10:19       ` Johannes Thumshirn
2020-12-22  3:49   ` [PATCH v11 08/40] btrfs: emulated zoned mode on non-zoned devices Naohiro Aota
2021-01-11 20:54     ` Josef Bacik
2021-01-13 17:58     ` David Sterba
2021-01-13 18:23       ` Johannes Thumshirn
2020-12-22  3:49   ` [PATCH v11 09/40] btrfs: implement zoned chunk allocator Naohiro Aota
2021-01-11 21:24     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 10/40] btrfs: verify device extent is aligned to zone Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 11/40] btrfs: load zone's allocation offset Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 12/40] btrfs: calculate allocation offset for conventional zones Naohiro Aota
2021-01-12 15:12     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 13/40] btrfs: track unusable bytes for zones Naohiro Aota
2021-01-12 15:45     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 14/40] btrfs: do sequential extent allocation in ZONED mode Naohiro Aota
2020-12-22  3:49   ` Naohiro Aota [this message]
2021-01-12 15:51     ` [PATCH v11 15/40] btrfs: redirty released extent buffers " Josef Bacik
2020-12-22  3:49   ` [PATCH v11 16/40] btrfs: advance allocation pointer after tree log node Naohiro Aota
2021-01-12 15:52     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 17/40] btrfs: enable to mount ZONED incompat flag Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 18/40] btrfs: reset zones of unused block groups Naohiro Aota
2021-01-12 15:54     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 19/40] btrfs: extract page adding function Naohiro Aota
2021-01-11 16:22     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 20/40] btrfs: use bio_add_zone_append_page for zoned btrfs Naohiro Aota
2021-01-12 15:55     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 21/40] btrfs: handle REQ_OP_ZONE_APPEND as writing Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 22/40] btrfs: split ordered extent when bio is sent Naohiro Aota
2021-01-12 15:59     ` Josef Bacik
2021-01-13 10:05       ` Johannes Thumshirn
2021-01-15  7:08     ` Su Yue
2020-12-22  3:49   ` [PATCH v11 23/40] btrfs: extend btrfs_rmap_block for specifying a device Naohiro Aota
2021-01-12 16:00     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 24/40] btrfs: cache if block-group is on a sequential zone Naohiro Aota
2021-01-12 16:01     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 25/40] btrfs: use ZONE_APPEND write for ZONED btrfs Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 26/40] btrfs: enable zone append writing for direct IO Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 27/40] btrfs: introduce dedicated data write path for ZONED mode Naohiro Aota
2021-01-12 19:24     ` Josef Bacik
2021-01-13 10:41       ` Naohiro Aota
2021-01-12 19:28     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 28/40] btrfs: serialize meta IOs on " Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 29/40] btrfs: wait existing extents before truncating Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 30/40] btrfs: avoid async metadata checksum on ZONED mode Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 31/40] btrfs: mark block groups to copy for device-replace Naohiro Aota
2021-01-12 19:30     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 32/40] btrfs: implement cloning for ZONED device-replace Naohiro Aota
2021-01-12 19:36     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 33/40] btrfs: implement copying " Naohiro Aota
2021-01-12 19:37     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 34/40] btrfs: support dev-replace in ZONED mode Naohiro Aota
2021-01-12 19:37     ` Josef Bacik
2021-01-12 19:40     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 35/40] btrfs: enable relocation " Naohiro Aota
2021-01-12 19:43     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 36/40] btrfs: relocate block group to repair IO failure in ZONED Naohiro Aota
2020-12-22  3:49   ` [PATCH v11 37/40] btrfs: split alloc_log_tree() Naohiro Aota
2021-01-12 19:44     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 38/40] btrfs: extend zoned allocator to use dedicated tree-log block group Naohiro Aota
2021-01-12 19:48     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 39/40] btrfs: serialize log transaction on ZONED mode Naohiro Aota
2021-01-12 19:50     ` Josef Bacik
2020-12-22  3:49   ` [PATCH v11 40/40] btrfs: reorder log node allocation Naohiro Aota
2020-12-22 13:35   ` [PATCH v11 01/40] block: add bio_add_zone_append_page Christoph Hellwig
2021-01-12 13:48     ` Johannes Thumshirn
2020-12-22 13:38 ` [PATCH v11 00/40] btrfs: zoned block device support Christoph Hellwig
2021-01-11 10:17   ` Johannes Thumshirn
2021-01-12 10:23     ` hch
2021-01-12 10:26       ` Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=530bf9339d499c4f2209baeca7769a1c32a245bc.1608608848.git.naohiro.aota@wdc.com \
    --to=naohiro.aota@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=darrick.wong@oracle.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=hch@infradead.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.