All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] btrfs: Handle btrfs_reloc_clone_csums error correctly to avoid deadlock
@ 2017-02-15  8:49 Qu Wenruo
  2017-02-15 15:59 ` Filipe Manana
  0 siblings, 1 reply; 9+ messages in thread
From: Qu Wenruo @ 2017-02-15  8:49 UTC (permalink / raw)
  To: linux-btrfs

If run btrfs/125 with nospace_cache or space_cache=v2 mount option,
btrfs will block with the following backtrace:

Call Trace:
 __schedule+0x2d4/0xae0
 schedule+0x3d/0x90
 btrfs_start_ordered_extent+0x160/0x200 [btrfs]
 ? wake_atomic_t_function+0x60/0x60
 btrfs_run_ordered_extent_work+0x25/0x40 [btrfs]
 btrfs_scrubparity_helper+0x1c1/0x620 [btrfs]
 btrfs_flush_delalloc_helper+0xe/0x10 [btrfs]
 process_one_work+0x2af/0x720
 ? process_one_work+0x22b/0x720
 worker_thread+0x4b/0x4f0
 kthread+0x10f/0x150
 ? process_one_work+0x720/0x720
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x2e/0x40

The direct cause is the error handler in run_delalloc_nocow() doesn't
handle error from btrfs_reloc_clone_csums() well.

The related part call path will be:
__extent_writepage
|- writepage_delalloc()
|  |- run_delalloc_range()
|     |- run_delalloc_nocow()
|        |- btrfs_add_ordered_extent()
|        |  Now one ordered extent for file range, e.g [0, 1M) is inserted
|        |
|        |- btrfs_reloc_clone_csums()
|        |  Fails with -EIO, as RAID5/6 doesn't repair some csum tree
|        |  blocks
|        |
|        |- extent_clear_unlock_delalloc()
|           Error routine, unlock and clear page DIRTY, end page writeback
|           So the remaining 255 pages will not go through writeback
|
|- __extent_writepage_io()
   |- writepage_end_io_hook()  
      |- btrfs_dev_test_ordered_pending()
         Reduce ordered_extent->bytes_left by 4K.
         Still have (1M - 4K) to finish.

While the remaining 255 pages will not go through IO nor trigger
writepage_end_io_hook(), the ordered extent for [0, 1M) will
never finish, and blocking current transaction forever.

Although the root cause is still in RAID5/6, it won't hurt to fix the
error routine first.

This patch will cleanup the ordered extent in error routine, so at least
we won't cause deadlock.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 fs/btrfs/extent_io.c    |  1 -
 fs/btrfs/inode.c        | 10 ++++++++--
 fs/btrfs/ordered-data.c | 25 +++++++++++++++++++++++++
 fs/btrfs/ordered-data.h | 10 ++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ac383a3a649..a14d1b0840c5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3258,7 +3258,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
 					       delalloc_end,
 					       &page_started,
 					       nr_written);
-		/* File system has been set read-only */
 		if (ret) {
 			SetPageError(page);
 			/* fill_delalloc should be return < 0 for error
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1e861a063721..3c3ade58afd7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1052,8 +1052,11 @@ static noinline int cow_file_range(struct inode *inode,
 		    BTRFS_DATA_RELOC_TREE_OBJECTID) {
 			ret = btrfs_reloc_clone_csums(inode, start,
 						      cur_alloc_size);
-			if (ret)
+			if (ret) {
+				btrfs_clean_ordered_extent(inode, start,
+							   ram_size);
 				goto out_drop_extent_cache;
+			}
 		}
 
 		btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1538,7 +1541,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	if (!ret)
 		ret = err;
 
-	if (ret && cur_offset < end)
+	if (ret && cur_offset < end) {
 		extent_clear_unlock_delalloc(inode, cur_offset, end, end,
 					     locked_page, EXTENT_LOCKED |
 					     EXTENT_DELALLOC | EXTENT_DEFRAG |
@@ -1546,6 +1549,9 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 					     PAGE_CLEAR_DIRTY |
 					     PAGE_SET_WRITEBACK |
 					     PAGE_END_WRITEBACK);
+		btrfs_clean_ordered_extent(inode, cur_offset,
+					   end - cur_offset + 1);
+	}
 	btrfs_free_path(path);
 	return ret;
 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 041c3326d109..dba1cf3464a7 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -650,6 +650,31 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 	wake_up(&entry->wait);
 }
 
+void btrfs_clean_ordered_extent(struct inode *inode, u64 file_offset,
+				u64 ram_len)
+{
+	struct btrfs_ordered_extent *entry;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+
+	entry = btrfs_lookup_ordered_range(inode, file_offset, ram_len);
+	if (!entry || entry->file_offset != file_offset ||
+	    entry->len != ram_len)
+		goto not_found;
+
+	/* Same as btrfs_finish_ordered_io() */
+	btrfs_remove_ordered_extent(inode, entry);
+	btrfs_put_ordered_extent(entry);
+	btrfs_put_ordered_extent(entry);
+	return;
+
+not_found:
+	WARN_ON(1);
+	btrfs_err(root->fs_info,
+	"failed to find and clean ordered extent: root %llu ino %llu file_offset %llu len %llu",
+		  root->objectid, btrfs_ino(inode), file_offset, ram_len);
+	return;
+}
+
 static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
 {
 	struct btrfs_ordered_extent *ordered;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5f2b0ca28705..7a989778aa89 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -163,6 +163,16 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
 void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
 void btrfs_remove_ordered_extent(struct inode *inode,
 				struct btrfs_ordered_extent *entry);
+
+/*
+ * Function to cleanup an allocated ordered extent in error routine.
+ *
+ * As error handler in run_delalloc_range() will clear all related pages
+ * and skip their IO, we have no method to finish inserted ordered extent.
+ * So we must use this function to clean it up.
+ */
+void btrfs_clean_ordered_extent(struct inode *inode, u64 file_offset,
+				u64 ram_len);
 int btrfs_dec_test_ordered_pending(struct inode *inode,
 				   struct btrfs_ordered_extent **cached,
 				   u64 file_offset, u64 io_size, int uptodate);
-- 
2.11.1




^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-02-21  0:25 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-15  8:49 [PATCH] btrfs: Handle btrfs_reloc_clone_csums error correctly to avoid deadlock Qu Wenruo
2017-02-15 15:59 ` Filipe Manana
2017-02-16  0:39   ` Qu Wenruo
2017-02-16 10:03     ` Filipe Manana
2017-02-17  0:37       ` Qu Wenruo
2017-02-17 15:25         ` Filipe Manana
2017-02-20  0:31           ` Qu Wenruo
2017-02-20 13:43             ` Filipe Manana
2017-02-21  0:25               ` Qu Wenruo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.