Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 0/2] Btrfs: workqueue cleanups
@ 2019-08-13 17:33 Omar Sandoval
  2019-08-13 17:33 ` [PATCH v2 1/2] Btrfs: get rid of unique workqueue helper functions Omar Sandoval
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Omar Sandoval @ 2019-08-13 17:33 UTC (permalink / raw)
  To: linux-btrfs; +Cc: kernel-team

From: Omar Sandoval <osandov@fb.com>

This does some cleanups to the Btrfs workqueue code following my
previous fix [1]. Changed since v1 [2]:

- Removed errant Fixes: tag in patch 1
- Fixed a comment typo in patch 2
- Added NB: to comments in patch 2
- Added Nikolay and Filipe's reviewed-by tags

Thanks!

1: https://lore.kernel.org/linux-btrfs/0bea516a54b26e4e1c42e6fe47548cb48cc4172b.1565112813.git.osandov@fb.com/
2: https://lore.kernel.org/linux-btrfs/cover.1565680721.git.osandov@fb.com/

Omar Sandoval (2):
  Btrfs: get rid of unique workqueue helper functions
  Btrfs: get rid of pointless wtag variable in async-thread.c

 fs/btrfs/async-thread.c      | 79 ++++++++++--------------------------
 fs/btrfs/async-thread.h      | 33 +--------------
 fs/btrfs/block-group.c       |  3 +-
 fs/btrfs/delayed-inode.c     |  4 +-
 fs/btrfs/disk-io.c           | 34 +++++-----------
 fs/btrfs/inode.c             | 36 +++++-----------
 fs/btrfs/ordered-data.c      |  1 -
 fs/btrfs/qgroup.c            |  1 -
 fs/btrfs/raid56.c            |  5 +--
 fs/btrfs/reada.c             |  3 +-
 fs/btrfs/scrub.c             | 14 +++----
 fs/btrfs/volumes.c           |  3 +-
 include/trace/events/btrfs.h |  6 +--
 13 files changed, 61 insertions(+), 161 deletions(-)

-- 
2.22.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2 1/2] Btrfs: get rid of unique workqueue helper functions
  2019-08-13 17:33 [PATCH v2 0/2] Btrfs: workqueue cleanups Omar Sandoval
@ 2019-08-13 17:33 ` Omar Sandoval
  2019-08-13 17:33 ` [PATCH v2 2/2] Btrfs: get rid of pointless wtag variable in async-thread.c Omar Sandoval
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Omar Sandoval @ 2019-08-13 17:33 UTC (permalink / raw)
  To: linux-btrfs; +Cc: kernel-team, Nikolay Borisov, Filipe Manana

From: Omar Sandoval <osandov@fb.com>

Commit 9e0af2376434 ("Btrfs: fix task hang under heavy compressed
write") worked around the issue that a recycled work item could get a
false dependency on the original work item due to how the workqueue code
guarantees non-reentrancy. It did so by giving different work functions
to different types of work.

However, the fix in "Btrfs: fix workqueue deadlock on dependent
filesystems" is more complete, as it prevents a work item from being
recycled at all (except for a tiny window that the kernel workqueue code
handles for us). This fix obsoletes the previous fix, so we don't need
the unique helpers for correctness. The only other reason to keep them
would be so they show up in stack traces, but they always seem to be
optimized to a tail call, so they don't show up anyways. So, let's just
get rid of the extra indirection.

While we're here, rename normal_work_helper() to the more informative
btrfs_work_helper().

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/async-thread.c  | 58 +++++++++-------------------------------
 fs/btrfs/async-thread.h  | 33 ++---------------------
 fs/btrfs/block-group.c   |  3 +--
 fs/btrfs/delayed-inode.c |  4 +--
 fs/btrfs/disk-io.c       | 34 ++++++++---------------
 fs/btrfs/inode.c         | 36 ++++++++-----------------
 fs/btrfs/ordered-data.c  |  1 -
 fs/btrfs/qgroup.c        |  1 -
 fs/btrfs/raid56.c        |  5 ++--
 fs/btrfs/reada.c         |  3 +--
 fs/btrfs/scrub.c         | 14 +++++-----
 fs/btrfs/volumes.c       |  3 +--
 12 files changed, 50 insertions(+), 145 deletions(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 6b8ad4a1b568..d105d3df6fa6 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -53,16 +53,6 @@ struct btrfs_workqueue {
 	struct __btrfs_workqueue *high;
 };
 
-static void normal_work_helper(struct btrfs_work *work);
-
-#define BTRFS_WORK_HELPER(name)					\
-noinline_for_stack void btrfs_##name(struct work_struct *arg)		\
-{									\
-	struct btrfs_work *work = container_of(arg, struct btrfs_work,	\
-					       normal_work);		\
-	normal_work_helper(work);					\
-}
-
 struct btrfs_fs_info *
 btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
 {
@@ -89,29 +79,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
 	return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
 }
 
-BTRFS_WORK_HELPER(worker_helper);
-BTRFS_WORK_HELPER(delalloc_helper);
-BTRFS_WORK_HELPER(flush_delalloc_helper);
-BTRFS_WORK_HELPER(cache_helper);
-BTRFS_WORK_HELPER(submit_helper);
-BTRFS_WORK_HELPER(fixup_helper);
-BTRFS_WORK_HELPER(endio_helper);
-BTRFS_WORK_HELPER(endio_meta_helper);
-BTRFS_WORK_HELPER(endio_meta_write_helper);
-BTRFS_WORK_HELPER(endio_raid56_helper);
-BTRFS_WORK_HELPER(endio_repair_helper);
-BTRFS_WORK_HELPER(rmw_helper);
-BTRFS_WORK_HELPER(endio_write_helper);
-BTRFS_WORK_HELPER(freespace_write_helper);
-BTRFS_WORK_HELPER(delayed_meta_helper);
-BTRFS_WORK_HELPER(readahead_helper);
-BTRFS_WORK_HELPER(qgroup_rescan_helper);
-BTRFS_WORK_HELPER(extent_refs_helper);
-BTRFS_WORK_HELPER(scrub_helper);
-BTRFS_WORK_HELPER(scrubwrc_helper);
-BTRFS_WORK_HELPER(scrubnc_helper);
-BTRFS_WORK_HELPER(scrubparity_helper);
-
 static struct __btrfs_workqueue *
 __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
 			unsigned int flags, int limit_active, int thresh)
@@ -302,12 +269,13 @@ static void run_ordered_work(struct btrfs_work *self)
 			 * original work item cannot depend on the recycled work
 			 * item in that case (see find_worker_executing_work()).
 			 *
-			 * Note that the work of one Btrfs filesystem may depend
-			 * on the work of another Btrfs filesystem via, e.g., a
-			 * loop device. Therefore, we must not allow the current
-			 * work item to be recycled until we are really done,
-			 * otherwise we break the above assumption and can
-			 * deadlock.
+			 * Note that different types of Btrfs work can depend on
+			 * each other, and one type of work on one Btrfs
+			 * filesystem may even depend on the same type of work
+			 * on another Btrfs filesystem via, e.g., a loop device.
+			 * Therefore, we must not allow the current work item to
+			 * be recycled until we are really done, otherwise we
+			 * break the above assumption and can deadlock.
 			 */
 			free_self = true;
 		} else {
@@ -331,8 +299,10 @@ static void run_ordered_work(struct btrfs_work *self)
 	}
 }
 
-static void normal_work_helper(struct btrfs_work *work)
+static void btrfs_work_helper(struct work_struct *normal_work)
 {
+	struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
+					       normal_work);
 	struct __btrfs_workqueue *wq;
 	void *wtag;
 	int need_order = 0;
@@ -362,15 +332,13 @@ static void normal_work_helper(struct btrfs_work *work)
 		trace_btrfs_all_work_done(wq->fs_info, wtag);
 }
 
-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
-		     btrfs_func_t func,
-		     btrfs_func_t ordered_func,
-		     btrfs_func_t ordered_free)
+void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
+		     btrfs_func_t ordered_func, btrfs_func_t ordered_free)
 {
 	work->func = func;
 	work->ordered_func = ordered_func;
 	work->ordered_free = ordered_free;
-	INIT_WORK(&work->normal_work, uniq_func);
+	INIT_WORK(&work->normal_work, btrfs_work_helper);
 	INIT_LIST_HEAD(&work->ordered_list);
 	work->flags = 0;
 }
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 7861c9feba5f..c5bf2b117c05 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -29,42 +29,13 @@ struct btrfs_work {
 	unsigned long flags;
 };
 
-#define BTRFS_WORK_HELPER_PROTO(name)					\
-void btrfs_##name(struct work_struct *arg)
-
-BTRFS_WORK_HELPER_PROTO(worker_helper);
-BTRFS_WORK_HELPER_PROTO(delalloc_helper);
-BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
-BTRFS_WORK_HELPER_PROTO(cache_helper);
-BTRFS_WORK_HELPER_PROTO(submit_helper);
-BTRFS_WORK_HELPER_PROTO(fixup_helper);
-BTRFS_WORK_HELPER_PROTO(endio_helper);
-BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
-BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
-BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
-BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
-BTRFS_WORK_HELPER_PROTO(rmw_helper);
-BTRFS_WORK_HELPER_PROTO(endio_write_helper);
-BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
-BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
-BTRFS_WORK_HELPER_PROTO(readahead_helper);
-BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
-BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
-BTRFS_WORK_HELPER_PROTO(scrub_helper);
-BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
-BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
-BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
-
-
 struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
 					      const char *name,
 					      unsigned int flags,
 					      int limit_active,
 					      int thresh);
-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
-		     btrfs_func_t func,
-		     btrfs_func_t ordered_func,
-		     btrfs_func_t ordered_free);
+void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
+		     btrfs_func_t ordered_func, btrfs_func_t ordered_free);
 void btrfs_queue_work(struct btrfs_workqueue *wq,
 		      struct btrfs_work *work);
 void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 262e62ef52a5..8c3a443a6a60 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -695,8 +695,7 @@ int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
 	caching_ctl->block_group = cache;
 	caching_ctl->progress = cache->key.objectid;
 	refcount_set(&caching_ctl->count, 1);
-	btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
-			caching_thread, NULL, NULL);
+	btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
 
 	spin_lock(&cache->lock);
 	/*
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6858a05606dd..d7127ea375c1 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1366,8 +1366,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
 		return -ENOMEM;
 
 	async_work->delayed_root = delayed_root;
-	btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
-			btrfs_async_run_delayed_root, NULL, NULL);
+	btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
+			NULL);
 	async_work->nr = nr;
 
 	btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 589405eeb80f..fa4b6c3b166d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -696,43 +696,31 @@ static void end_workqueue_bio(struct bio *bio)
 	struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
 	struct btrfs_fs_info *fs_info;
 	struct btrfs_workqueue *wq;
-	btrfs_work_func_t func;
 
 	fs_info = end_io_wq->info;
 	end_io_wq->status = bio->bi_status;
 
 	if (bio_op(bio) == REQ_OP_WRITE) {
-		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
+		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
 			wq = fs_info->endio_meta_write_workers;
-			func = btrfs_endio_meta_write_helper;
-		} else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
+		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
 			wq = fs_info->endio_freespace_worker;
-			func = btrfs_freespace_write_helper;
-		} else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
 			wq = fs_info->endio_raid56_workers;
-			func = btrfs_endio_raid56_helper;
-		} else {
+		else
 			wq = fs_info->endio_write_workers;
-			func = btrfs_endio_write_helper;
-		}
 	} else {
-		if (unlikely(end_io_wq->metadata ==
-			     BTRFS_WQ_ENDIO_DIO_REPAIR)) {
+		if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
 			wq = fs_info->endio_repair_workers;
-			func = btrfs_endio_repair_helper;
-		} else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
 			wq = fs_info->endio_raid56_workers;
-			func = btrfs_endio_raid56_helper;
-		} else if (end_io_wq->metadata) {
+		else if (end_io_wq->metadata)
 			wq = fs_info->endio_meta_workers;
-			func = btrfs_endio_meta_helper;
-		} else {
+		else
 			wq = fs_info->endio_workers;
-			func = btrfs_endio_helper;
-		}
 	}
 
-	btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
+	btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
 	btrfs_queue_work(wq, &end_io_wq->work);
 }
 
@@ -825,8 +813,8 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	async->mirror_num = mirror_num;
 	async->submit_bio_start = submit_bio_start;
 
-	btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
-			run_one_async_done, run_one_async_free);
+	btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
+			run_one_async_free);
 
 	async->bio_offset = bio_offset;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 612c25aac15c..1cd28df6a126 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1253,10 +1253,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 		async_chunk[i].write_flags = write_flags;
 		INIT_LIST_HEAD(&async_chunk[i].extents);
 
-		btrfs_init_work(&async_chunk[i].work,
-				btrfs_delalloc_helper,
-				async_cow_start, async_cow_submit,
-				async_cow_free);
+		btrfs_init_work(&async_chunk[i].work, async_cow_start,
+				async_cow_submit, async_cow_free);
 
 		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
 		atomic_add(nr_pages, &fs_info->async_delalloc_pages);
@@ -2196,8 +2194,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 
 	SetPageChecked(page);
 	get_page(page);
-	btrfs_init_work(&fixup->work, btrfs_fixup_helper,
-			btrfs_writepage_fixup_worker, NULL, NULL);
+	btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
 	fixup->page = page;
 	btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
 	return -EBUSY;
@@ -3190,7 +3187,6 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ordered_extent *ordered_extent = NULL;
 	struct btrfs_workqueue *wq;
-	btrfs_work_func_t func;
 
 	trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
 
@@ -3199,16 +3195,12 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
 					    end - start + 1, uptodate))
 		return;
 
-	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
 		wq = fs_info->endio_freespace_worker;
-		func = btrfs_freespace_write_helper;
-	} else {
+	else
 		wq = fs_info->endio_write_workers;
-		func = btrfs_endio_write_helper;
-	}
 
-	btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
-			NULL);
+	btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
 	btrfs_queue_work(wq, &ordered_extent->work);
 }
 
@@ -8159,18 +8151,14 @@ static void __endio_write_update_ordered(struct inode *inode,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ordered_extent *ordered = NULL;
 	struct btrfs_workqueue *wq;
-	btrfs_work_func_t func;
 	u64 ordered_offset = offset;
 	u64 ordered_bytes = bytes;
 	u64 last_offset;
 
-	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
 		wq = fs_info->endio_freespace_worker;
-		func = btrfs_freespace_write_helper;
-	} else {
+	else
 		wq = fs_info->endio_write_workers;
-		func = btrfs_endio_write_helper;
-	}
 
 	while (ordered_offset < offset + bytes) {
 		last_offset = ordered_offset;
@@ -8178,9 +8166,8 @@ static void __endio_write_update_ordered(struct inode *inode,
 							   &ordered_offset,
 							   ordered_bytes,
 							   uptodate)) {
-			btrfs_init_work(&ordered->work, func,
-					finish_ordered_fn,
-					NULL, NULL);
+			btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
+					NULL);
 			btrfs_queue_work(wq, &ordered->work);
 		}
 		/*
@@ -10045,8 +10032,7 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
 	init_completion(&work->completion);
 	INIT_LIST_HEAD(&work->list);
 	work->inode = inode;
-	btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
-			btrfs_run_delalloc_work, NULL, NULL);
+	btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
 
 	return work;
 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ae7f64a8facb..779a5dfa5324 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -546,7 +546,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
 		spin_unlock(&root->ordered_extent_lock);
 
 		btrfs_init_work(&ordered->flush_work,
-				btrfs_flush_delalloc_helper,
 				btrfs_run_ordered_extent_work, NULL, NULL);
 		list_add_tail(&ordered->work_list, &works);
 		btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8d3bd799ac7d..cfe45320293e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3275,7 +3275,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
 	memset(&fs_info->qgroup_rescan_work, 0,
 	       sizeof(fs_info->qgroup_rescan_work));
 	btrfs_init_work(&fs_info->qgroup_rescan_work,
-			btrfs_qgroup_rescan_helper,
 			btrfs_qgroup_rescan_worker, NULL, NULL);
 	return 0;
 }
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f3d0576dd327..16c8af21b3fb 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -174,7 +174,7 @@ static void scrub_parity_work(struct btrfs_work *work);
 
 static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
 {
-	btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
+	btrfs_init_work(&rbio->work, work_func, NULL, NULL);
 	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
 }
 
@@ -1727,8 +1727,7 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	plug = container_of(cb, struct btrfs_plug_cb, cb);
 
 	if (from_schedule) {
-		btrfs_init_work(&plug->work, btrfs_rmw_helper,
-				unplug_work, NULL, NULL);
+		btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
 		btrfs_queue_work(plug->info->rmw_workers,
 				 &plug->work);
 		return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 0b034c494355..719a6165fadb 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -792,8 +792,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
 		/* FIXME we cannot handle this properly right now */
 		BUG();
 	}
-	btrfs_init_work(&rmw->work, btrfs_readahead_helper,
-			reada_start_machine_worker, NULL, NULL);
+	btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
 	rmw->fs_info = fs_info;
 
 	btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f7d4e03f4c5d..00b4ab8236b4 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -598,8 +598,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
 		sbio->index = i;
 		sbio->sctx = sctx;
 		sbio->page_count = 0;
-		btrfs_init_work(&sbio->work, btrfs_scrub_helper,
-				scrub_bio_end_io_worker, NULL, NULL);
+		btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
+				NULL);
 
 		if (i != SCRUB_BIOS_PER_SCTX - 1)
 			sctx->bios[i]->next_free = i + 1;
@@ -1720,8 +1720,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
 	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
-	btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
-			 scrub_wr_bio_end_io_worker, NULL, NULL);
+	btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
 	btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
 }
 
@@ -2204,8 +2203,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 		raid56_add_scrub_pages(rbio, spage->page, spage->logical);
 	}
 
-	btrfs_init_work(&sblock->work, btrfs_scrub_helper,
-			scrub_missing_raid56_worker, NULL, NULL);
+	btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
 	scrub_block_get(sblock);
 	scrub_pending_bio_inc(sctx);
 	raid56_submit_missing_rbio(rbio);
@@ -2743,8 +2741,8 @@ static void scrub_parity_bio_endio(struct bio *bio)
 
 	bio_put(bio);
 
-	btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
-			scrub_parity_bio_endio_worker, NULL, NULL);
+	btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
+			NULL);
 	btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fa6eb9e0ba89..9b684adad81c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6656,8 +6656,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 	else
 		generate_random_uuid(dev->uuid);
 
-	btrfs_init_work(&dev->work, btrfs_submit_helper,
-			pending_bios_fn, NULL, NULL);
+	btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
 
 	return dev;
 }
-- 
2.22.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2 2/2] Btrfs: get rid of pointless wtag variable in async-thread.c
  2019-08-13 17:33 [PATCH v2 0/2] Btrfs: workqueue cleanups Omar Sandoval
  2019-08-13 17:33 ` [PATCH v2 1/2] Btrfs: get rid of unique workqueue helper functions Omar Sandoval
@ 2019-08-13 17:33 ` Omar Sandoval
  2019-08-19 17:24   ` David Sterba
  2019-08-19 17:28 ` [PATCH v2 0/2] Btrfs: workqueue cleanups David Sterba
  2019-08-21 13:20 ` David Sterba
  3 siblings, 1 reply; 9+ messages in thread
From: Omar Sandoval @ 2019-08-13 17:33 UTC (permalink / raw)
  To: linux-btrfs; +Cc: kernel-team, Nikolay Borisov, Filipe Manana

From: Omar Sandoval <osandov@fb.com>

Commit ac0c7cf8be00 ("btrfs: fix crash when tracepoint arguments are
freed by wq callbacks") added a void pointer, wtag, which is passed into
trace_btrfs_all_work_done() instead of the freed work item. This is
silly for a few reasons:

1. The freed work item still has the same address.
2. work is still in scope after it's freed, so assigning wtag doesn't
   stop anyone from using it.
3. The tracepoint has always taken a void * argument, so assigning wtag
   doesn't actually make things any more type-safe. (Note that the
   original bug in commit bc074524e123 ("btrfs: prefix fsid to all trace
   events") was that the void * was implicitly casted when it was passed
   to btrfs_work_owner() in the trace point itself).

Instead, let's add some clearer warnings as comments.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/async-thread.c      | 21 ++++++++-------------
 include/trace/events/btrfs.h |  6 +++---
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index d105d3df6fa6..60319075b781 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -226,7 +226,6 @@ static void run_ordered_work(struct btrfs_work *self)
 	struct btrfs_work *work;
 	spinlock_t *lock = &wq->list_lock;
 	unsigned long flags;
-	void *wtag;
 	bool free_self = false;
 
 	while (1) {
@@ -281,21 +280,19 @@ static void run_ordered_work(struct btrfs_work *self)
 		} else {
 			/*
 			 * We don't want to call the ordered free functions with
-			 * the lock held though. Save the work as tag for the
-			 * trace event, because the callback could free the
-			 * structure.
+			 * the lock held.
 			 */
-			wtag = work;
 			work->ordered_free(work);
-			trace_btrfs_all_work_done(wq->fs_info, wtag);
+			/* NB: work must not be dereferenced past this point. */
+			trace_btrfs_all_work_done(wq->fs_info, work);
 		}
 	}
 	spin_unlock_irqrestore(lock, flags);
 
 	if (free_self) {
-		wtag = self;
 		self->ordered_free(self);
-		trace_btrfs_all_work_done(wq->fs_info, wtag);
+		/* NB: self must not be dereferenced past this point. */
+		trace_btrfs_all_work_done(wq->fs_info, self);
 	}
 }
 
@@ -304,7 +301,6 @@ static void btrfs_work_helper(struct work_struct *normal_work)
 	struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
 					       normal_work);
 	struct __btrfs_workqueue *wq;
-	void *wtag;
 	int need_order = 0;
 
 	/*
@@ -318,8 +314,6 @@ static void btrfs_work_helper(struct work_struct *normal_work)
 	if (work->ordered_func)
 		need_order = 1;
 	wq = work->wq;
-	/* Safe for tracepoints in case work gets freed by the callback */
-	wtag = work;
 
 	trace_btrfs_work_sched(work);
 	thresh_exec_hook(wq);
@@ -327,9 +321,10 @@ static void btrfs_work_helper(struct work_struct *normal_work)
 	if (need_order) {
 		set_bit(WORK_DONE_BIT, &work->flags);
 		run_ordered_work(work);
+	} else {
+		/* NB: work must not be dereferenced past this point. */
+		trace_btrfs_all_work_done(wq->fs_info, work);
 	}
-	if (!need_order)
-		trace_btrfs_all_work_done(wq->fs_info, wtag);
 }
 
 void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 5cb95646b94e..2618a7f532c0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1388,9 +1388,9 @@ DECLARE_EVENT_CLASS(btrfs__work,
 );
 
 /*
- * For situiations when the work is freed, we pass fs_info and a tag that that
- * matches address of the work structure so it can be paired with the
- * scheduling event.
+ * For situations when the work is freed, we pass fs_info and a tag that matches
+ * the address of the work structure so it can be paired with the scheduling
+ * event. DO NOT add anything here that dereferences wtag.
  */
 DECLARE_EVENT_CLASS(btrfs__work__done,
 
-- 
2.22.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/2] Btrfs: get rid of pointless wtag variable in async-thread.c
  2019-08-13 17:33 ` [PATCH v2 2/2] Btrfs: get rid of pointless wtag variable in async-thread.c Omar Sandoval
@ 2019-08-19 17:24   ` David Sterba
  0 siblings, 0 replies; 9+ messages in thread
From: David Sterba @ 2019-08-19 17:24 UTC (permalink / raw)
  To: Omar Sandoval; +Cc: linux-btrfs, kernel-team, Nikolay Borisov, Filipe Manana

On Tue, Aug 13, 2019 at 10:33:44AM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> Commit ac0c7cf8be00 ("btrfs: fix crash when tracepoint arguments are
> freed by wq callbacks") added a void pointer, wtag, which is passed into
> trace_btrfs_all_work_done() instead of the freed work item. This is
> silly for a few reasons:
> 
> 1. The freed work item still has the same address.
> 2. work is still in scope after it's freed, so assigning wtag doesn't
>    stop anyone from using it.
> 3. The tracepoint has always taken a void * argument, so assigning wtag
>    doesn't actually make things any more type-safe. (Note that the
>    original bug in commit bc074524e123 ("btrfs: prefix fsid to all trace
>    events") was that the void * was implicitly casted when it was passed
>    to btrfs_work_owner() in the trace point itself).

I'd argue that the patch did it the way to prevent silly errors like
reusing 'work' because see it's passed to the tracepoint so it's fine to
use any time later as well. The value of the pointer was just something
to grep for not meant to be used in any other way.


> Instead, let's add some clearer warnings as comments.
> 
> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
> Reviewed-by: Filipe Manana <fdmanana@suse.com>
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  fs/btrfs/async-thread.c      | 21 ++++++++-------------
>  include/trace/events/btrfs.h |  6 +++---
>  2 files changed, 11 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
> index d105d3df6fa6..60319075b781 100644
> --- a/fs/btrfs/async-thread.c
> +++ b/fs/btrfs/async-thread.c
> @@ -226,7 +226,6 @@ static void run_ordered_work(struct btrfs_work *self)
>  	struct btrfs_work *work;
>  	spinlock_t *lock = &wq->list_lock;
>  	unsigned long flags;
> -	void *wtag;
>  	bool free_self = false;
>  
>  	while (1) {
> @@ -281,21 +280,19 @@ static void run_ordered_work(struct btrfs_work *self)
>  		} else {
>  			/*
>  			 * We don't want to call the ordered free functions with
> -			 * the lock held though. Save the work as tag for the
> -			 * trace event, because the callback could free the
> -			 * structure.
> +			 * the lock held.
>  			 */
> -			wtag = work;
>  			work->ordered_free(work);
> -			trace_btrfs_all_work_done(wq->fs_info, wtag);
> +			/* NB: work must not be dereferenced past this point. */
> +			trace_btrfs_all_work_done(wq->fs_info, work);

I hope that programmers read code and comments so what you do is fine
too and we don't have to reset work to NULL at this point, though this
would make it really hard to miss.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] Btrfs: workqueue cleanups
  2019-08-13 17:33 [PATCH v2 0/2] Btrfs: workqueue cleanups Omar Sandoval
  2019-08-13 17:33 ` [PATCH v2 1/2] Btrfs: get rid of unique workqueue helper functions Omar Sandoval
  2019-08-13 17:33 ` [PATCH v2 2/2] Btrfs: get rid of pointless wtag variable in async-thread.c Omar Sandoval
@ 2019-08-19 17:28 ` David Sterba
  2019-08-21 13:20 ` David Sterba
  3 siblings, 0 replies; 9+ messages in thread
From: David Sterba @ 2019-08-19 17:28 UTC (permalink / raw)
  To: Omar Sandoval; +Cc: linux-btrfs, kernel-team

On Tue, Aug 13, 2019 at 10:33:42AM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> This does some cleanups to the Btrfs workqueue code following my
> previous fix [1]. Changed since v1 [2]:
> 
> - Removed errant Fixes: tag in patch 1
> - Fixed a comment typo in patch 2
> - Added NB: to comments in patch 2
> - Added Nikolay and Filipe's reviewed-by tags

Added to misc-next, thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] Btrfs: workqueue cleanups
  2019-08-13 17:33 [PATCH v2 0/2] Btrfs: workqueue cleanups Omar Sandoval
                   ` (2 preceding siblings ...)
  2019-08-19 17:28 ` [PATCH v2 0/2] Btrfs: workqueue cleanups David Sterba
@ 2019-08-21 13:20 ` David Sterba
  2019-08-21 13:24   ` David Sterba
  3 siblings, 1 reply; 9+ messages in thread
From: David Sterba @ 2019-08-21 13:20 UTC (permalink / raw)
  To: Omar Sandoval; +Cc: linux-btrfs, kernel-team

On Tue, Aug 13, 2019 at 10:33:42AM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> This does some cleanups to the Btrfs workqueue code following my
> previous fix [1]. Changed since v1 [2]:
> 
> - Removed errant Fixes: tag in patch 1
> - Fixed a comment typo in patch 2
> - Added NB: to comments in patch 2
> - Added Nikolay and Filipe's reviewed-by tags
> 
> Thanks!
> 
> 1: https://lore.kernel.org/linux-btrfs/0bea516a54b26e4e1c42e6fe47548cb48cc4172b.1565112813.git.osandov@fb.com/
> 2: https://lore.kernel.org/linux-btrfs/cover.1565680721.git.osandov@fb.com/
> 
> Omar Sandoval (2):
>   Btrfs: get rid of unique workqueue helper functions
>   Btrfs: get rid of pointless wtag variable in async-thread.c

The patches seem to cause crashes inside the worques, happend several
times in random patches, sample stacktrace below. This blocks me from
testing so I'll move the patches out of misc-next for now and add back
once there's a fix.

btrfs/004               [18:00:37][   70.500719] run fstests btrfs/004 at 2019-08-20 18:00:37
[   70.866378] BTRFS info (device vda): disk space caching is enabled
[   70.868960] BTRFS info (device vda): has skinny extents
[   71.046922] BTRFS: device fsid e4a337ee-c8db-4be4-ac0f-6cd899f74fa8 devid 1 transid 5 /dev/vdb
[   71.065084] BTRFS info (device vdb): turning on discard
[   71.066812] BTRFS info (device vdb): disk space caching is enabled
[   71.068907] BTRFS info (device vdb): has skinny extents
[   71.070589] BTRFS info (device vdb): flagging fs with big metadata feature
[   71.078315] BTRFS info (device vdb): checking UUID tree
[   74.627999] BTRFS info (device vdb): turning on discard
[   74.630041] BTRFS info (device vdb): setting incompat feature flag for COMPRESS_LZO (0x8)
[   74.632869] BTRFS info (device vdb): use lzo compression, level 0
[   74.634472] BTRFS info (device vdb): disk space caching is enabled
[   74.636137] BTRFS info (device vdb): has skinny extents
[   75.038059] fsstress (21349) used greatest stack depth: 10912 bytes left
[   79.343639] BTRFS info (device vdb): turning on discard
[   79.345268] BTRFS info (device vdb): disk space caching is enabled
[   79.346866] BTRFS info (device vdb): has skinny extents
[   90.608845] general protection fault: 0000 [#1] SMP
[   90.611148] CPU: 2 PID: 21318 Comm: kworker/u8:8 Not tainted 5.3.0-rc5-default+ #699
[   90.613973] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014
[   90.617179] Workqueue: btrfs-worker btrfs_work_helper [btrfs]
[   90.618740] RIP: 0010:__lock_acquire+0x815/0x1100
[   90.624107] RSP: 0018:ffffa5cb05f23cc0 EFLAGS: 00010002
[   90.625652] RAX: 0000000000000000 RBX: 0000000000000086 RCX: 0000000000000000
[   90.627804] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6b6b6ba3
[   90.629171] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001
[   90.631566] R10: 6b6b6b6b6b6b6ba3 R11: 0000000000000000 R12: 0000000000000001
[   90.632952] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa0f49efed340
[   90.634019] FS:  0000000000000000(0000) GS:ffffa0f4bd800000(0000) knlGS:0000000000000000
[   90.635313] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   90.636293] CR2: 0000555f9a8d7538 CR3: 0000000072ac8000 CR4: 00000000000006e0
[   90.637987] Call Trace:
[   90.638620]  ? lockdep_hardirqs_on+0x103/0x190
[   90.639745]  ? trace_hardirqs_on_thunk+0x1a/0x20
[   90.640864]  lock_acquire+0x95/0x1a0
[   90.641783]  ? btrfs_work_helper+0x1ba/0x600 [btrfs]
[   90.642883]  _raw_spin_lock_irqsave+0x3c/0x80
[   90.643924]  ? btrfs_work_helper+0x1ba/0x600 [btrfs]
[   90.645129]  btrfs_work_helper+0x1ba/0x600 [btrfs]
[   90.646327]  process_one_work+0x22f/0x5b0
[   90.647325]  worker_thread+0x50/0x3b0
[   90.648228]  ? process_one_work+0x5b0/0x5b0
[   90.649243]  kthread+0x11a/0x130
[   90.649825]  ? kthread_create_worker_on_cpu+0x70/0x70
[   90.650633]  ret_from_fork+0x24/0x30
[   90.651256] Modules linked in: btrfs libcrc32c xor zstd_decompress zstd_compress xxhash raid6_pq loop
[   90.653126] ---[ end trace 5a9019a191ef1b98 ]---
[   90.654215] RIP: 0010:__lock_acquire+0x815/0x1100
[   90.658563] RSP: 0018:ffffa5cb05f23cc0 EFLAGS: 00010002
[   90.659392] RAX: 0000000000000000 RBX: 0000000000000086 RCX: 0000000000000000
[   90.660937] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6b6b6ba3
[   90.662457] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001
[   90.663524] R10: 6b6b6b6b6b6b6ba3 R11: 0000000000000000 R12: 0000000000000001
[   90.665197] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa0f49efed340
[   90.667127] FS:  0000000000000000(0000) GS:ffffa0f4bd800000(0000) knlGS:0000000000000000
[   90.669365] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   90.670966] CR2: 0000555f9a8d7538 CR3: 0000000072ac8000 CR4: 00000000000006e0
[   90.672893] BUG: sleeping function called from invalid context at include/linux/cgroup-defs.h:760
[   90.675438] in_atomic(): 1, irqs_disabled(): 1, pid: 21318, name: kworker/u8:8
[   90.677531] INFO: lockdep is turned off.
[   90.678757] irq event stamp: 94362
[   90.679645] hardirqs last  enabled at (94361): [<ffffffff8e0029da>] trace_hardirqs_on_thunk+0x1a/0x20
[   90.681805] hardirqs last disabled at (94362): [<ffffffff8e73f8d6>] _raw_spin_lock_irqsave+0x16/0x80
[   90.683239] softirqs last  enabled at (94360): [<ffffffff8ea00358>] __do_softirq+0x358/0x52b
[   90.684920] softirqs last disabled at (94079): [<ffffffff8e088d6d>] irq_exit+0x9d/0xb0
[   90.686500] CPU: 2 PID: 21318 Comm: kworker/u8:8 Tainted: G      D           5.3.0-rc5-default+ #699
[   90.687962] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014
[   90.690382] Workqueue: btrfs-worker btrfs_work_helper [btrfs]
[   90.691953] Call Trace:
[   90.692769]  dump_stack+0x67/0x90
[   90.693816]  ___might_sleep.cold+0x9f/0xf2
[   90.695033]  exit_signals+0x30/0x110
[   90.696030]  do_exit+0xb7/0x920
[   90.696902]  ? kthread+0x11a/0x130
[   90.697799]  rewind_stack_do_exit+0x17/0x20
[   90.699258] note: kworker/u8:8[21318] exited with preempt_count 1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] Btrfs: workqueue cleanups
  2019-08-21 13:20 ` David Sterba
@ 2019-08-21 13:24   ` David Sterba
  2019-08-21 14:14     ` David Sterba
  0 siblings, 1 reply; 9+ messages in thread
From: David Sterba @ 2019-08-21 13:24 UTC (permalink / raw)
  To: David Sterba; +Cc: Omar Sandoval, linux-btrfs, kernel-team

On Wed, Aug 21, 2019 at 03:20:21PM +0200, David Sterba wrote:
> On Tue, Aug 13, 2019 at 10:33:42AM -0700, Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> > 
> > This does some cleanups to the Btrfs workqueue code following my
> > previous fix [1]. Changed since v1 [2]:
> > 
> > - Removed errant Fixes: tag in patch 1
> > - Fixed a comment typo in patch 2
> > - Added NB: to comments in patch 2
> > - Added Nikolay and Filipe's reviewed-by tags
> > 
> > Thanks!
> > 
> > 1: https://lore.kernel.org/linux-btrfs/0bea516a54b26e4e1c42e6fe47548cb48cc4172b.1565112813.git.osandov@fb.com/
> > 2: https://lore.kernel.org/linux-btrfs/cover.1565680721.git.osandov@fb.com/
> > 
> > Omar Sandoval (2):
> >   Btrfs: get rid of unique workqueue helper functions
> >   Btrfs: get rid of pointless wtag variable in async-thread.c
> 
> The patches seem to cause crashes inside the worques, happend several
> times in random patches, sample stacktrace below. This blocks me from
> testing so I'll move the patches out of misc-next for now and add back
> once there's a fix.

Another possibility is that the cleanup patches make it more likely to
happen and the root cause is "Btrfs: fix workqueue deadlock on dependent
filesystems".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] Btrfs: workqueue cleanups
  2019-08-21 13:24   ` David Sterba
@ 2019-08-21 14:14     ` David Sterba
  2019-08-21 15:18       ` Josef Bacik
  0 siblings, 1 reply; 9+ messages in thread
From: David Sterba @ 2019-08-21 14:14 UTC (permalink / raw)
  To: David Sterba; +Cc: Omar Sandoval, linux-btrfs, kernel-team

On Wed, Aug 21, 2019 at 03:24:46PM +0200, David Sterba wrote:
> On Wed, Aug 21, 2019 at 03:20:21PM +0200, David Sterba wrote:
> > On Tue, Aug 13, 2019 at 10:33:42AM -0700, Omar Sandoval wrote:
> > > From: Omar Sandoval <osandov@fb.com>
> > > 
> > > This does some cleanups to the Btrfs workqueue code following my
> > > previous fix [1]. Changed since v1 [2]:
> > > 
> > > - Removed errant Fixes: tag in patch 1
> > > - Fixed a comment typo in patch 2
> > > - Added NB: to comments in patch 2
> > > - Added Nikolay and Filipe's reviewed-by tags
> > > 
> > > Thanks!
> > > 
> > > 1: https://lore.kernel.org/linux-btrfs/0bea516a54b26e4e1c42e6fe47548cb48cc4172b.1565112813.git.osandov@fb.com/
> > > 2: https://lore.kernel.org/linux-btrfs/cover.1565680721.git.osandov@fb.com/
> > > 
> > > Omar Sandoval (2):
> > >   Btrfs: get rid of unique workqueue helper functions
> > >   Btrfs: get rid of pointless wtag variable in async-thread.c
> > 
> > The patches seem to cause crashes inside the worques, happend several
> > times in random patches, sample stacktrace below. This blocks me from
> > testing so I'll move the patches out of misc-next for now and add back
> > once there's a fix.
> 
> Another possibility is that the cleanup patches make it more likely to
> happen and the root cause is "Btrfs: fix workqueue deadlock on dependent
> filesystems".

With just the deadlock fix on top<F2>, crashed in btrfs/011;

[ 2847.115355] general protection fault: 0000 [#1] SMP
[ 2847.120304] CPU: 0 PID: 10594 Comm: kworker/u8:3 Not tainted 5.3.0-rc5-default+ #699
[ 2847.124830] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014
[ 2847.128328] Workqueue: btrfs-worker btrfs_worker_helper [btrfs]
[ 2847.130135] RIP: 0010:__lock_acquire+0x815/0x1100
[ 2847.136626] RSP: 0018:ffffb3e8c3dbfcd0 EFLAGS: 00010002
[ 2847.137627] RAX: 0000000000000000 RBX: 0000000000000086 RCX: 0000000000000000
[ 2847.138690] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6b6b6ba3
[ 2847.139989] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001
[ 2847.141055] R10: 6b6b6b6b6b6b6ba3 R11: 0000000000000000 R12: 0000000000000001
[ 2847.142113] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa26aa1ab29c0
[ 2847.143251] FS:  0000000000000000(0000) GS:ffffa26abd400000(0000) knlGS:0000000000000000
[ 2847.145539] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2847.147154] CR2: 00007f70c6c2977c CR3: 0000000031011000 CR4: 00000000000006f0
[ 2847.149046] Call Trace:
[ 2847.149683]  ? lockdep_hardirqs_on+0x103/0x190
[ 2847.150482]  ? trace_hardirqs_on_thunk+0x1a/0x20
[ 2847.151355]  lock_acquire+0x95/0x1a0
[ 2847.152205]  ? normal_work_helper+0x264/0x5f0 [btrfs]
[ 2847.153035]  _raw_spin_lock_irqsave+0x3c/0x80
[ 2847.153774]  ? normal_work_helper+0x264/0x5f0 [btrfs]
[ 2847.154597]  normal_work_helper+0x264/0x5f0 [btrfs]
[ 2847.155609]  process_one_work+0x22f/0x5b0
[ 2847.156487]  worker_thread+0x50/0x3b0
[ 2847.157275]  ? process_one_work+0x5b0/0x5b0
[ 2847.158356]  kthread+0x11a/0x130
[ 2847.159035]  ? kthread_create_worker_on_cpu+0x70/0x70
[ 2847.160186]  ret_from_fork+0x24/0x30
[ 2847.160828] Modules linked in: btrfs libcrc32c xor zstd_decompress zstd_compress xxhash raid6_pq loop
[ 2847.162540] ---[ end trace 4b931bab091a0069 ]---
[ 2847.163650] RIP: 0010:__lock_acquire+0x815/0x1100
[ 2847.168293] RSP: 0018:ffffb3e8c3dbfcd0 EFLAGS: 00010002
[ 2847.169286] RAX: 0000000000000000 RBX: 0000000000000086 RCX: 0000000000000000
[ 2847.170348] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6b6b6ba3
[ 2847.171456] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001
[ 2847.172874] R10: 6b6b6b6b6b6b6ba3 R11: 0000000000000000 R12: 0000000000000001
[ 2847.173937] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa26aa1ab29c0
[ 2847.175727] FS:  0000000000000000(0000) GS:ffffa26abd400000(0000) knlGS:0000000000000000
[ 2847.177985] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2847.179435] CR2: 00007f70c6c2977c CR3: 0000000031011000 CR4: 00000000000006f0
[ 2847.180657] BUG: sleeping function called from invalid context at include/linux/cgroup-defs.h:760
[ 2847.182056] in_atomic(): 1, irqs_disabled(): 1, pid: 10594, name: kworker/u8:3
[ 2847.183465] INFO: lockdep is turned off.
[ 2847.185401] irq event stamp: 37147954
[ 2847.187257] hardirqs last  enabled at (37147953): [<ffffffffa00029da>] trace_hardirqs_on_thunk+0x1a/0x20
[ 2847.190661] hardirqs last disabled at (37147954): [<ffffffffa073f8d6>] _raw_spin_lock_irqsave+0x16/0x80
[ 2847.193732] softirqs last  enabled at (37147952): [<ffffffffa0a00358>] __do_softirq+0x358/0x52b
[ 2847.196321] softirqs last disabled at (37147905): [<ffffffffa0088d6d>] irq_exit+0x9d/0xb0
[ 2847.198554] CPU: 0 PID: 10594 Comm: kworker/u8:3 Tainted: G      D           5.3.0-rc5-default+ #699
[ 2847.201083] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014
[ 2847.203894] Workqueue: btrfs-worker btrfs_worker_helper [btrfs]
[ 2847.205305] Call Trace:
[ 2847.206010]  dump_stack+0x67/0x90
[ 2847.206943]  ___might_sleep.cold+0x9f/0xf2
[ 2847.208080]  exit_signals+0x30/0x110
[ 2847.209173]  do_exit+0xb7/0x920
[ 2847.210176]  ? kthread+0x11a/0x130
[ 2847.211261]  rewind_stack_do_exit+0x17/0x20
[ 2847.212571] note: kworker/u8:3[10594] exited with preempt_count 1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] Btrfs: workqueue cleanups
  2019-08-21 14:14     ` David Sterba
@ 2019-08-21 15:18       ` Josef Bacik
  0 siblings, 0 replies; 9+ messages in thread
From: Josef Bacik @ 2019-08-21 15:18 UTC (permalink / raw)
  To: dsterba, Omar Sandoval, linux-btrfs, kernel-team

On Wed, Aug 21, 2019 at 04:14:53PM +0200, David Sterba wrote:
> On Wed, Aug 21, 2019 at 03:24:46PM +0200, David Sterba wrote:
> > On Wed, Aug 21, 2019 at 03:20:21PM +0200, David Sterba wrote:
> > > On Tue, Aug 13, 2019 at 10:33:42AM -0700, Omar Sandoval wrote:
> > > > From: Omar Sandoval <osandov@fb.com>
> > > > 
> > > > This does some cleanups to the Btrfs workqueue code following my
> > > > previous fix [1]. Changed since v1 [2]:
> > > > 
> > > > - Removed errant Fixes: tag in patch 1
> > > > - Fixed a comment typo in patch 2
> > > > - Added NB: to comments in patch 2
> > > > - Added Nikolay and Filipe's reviewed-by tags
> > > > 
> > > > Thanks!
> > > > 
> > > > 1: https://lore.kernel.org/linux-btrfs/0bea516a54b26e4e1c42e6fe47548cb48cc4172b.1565112813.git.osandov@fb.com/
> > > > 2: https://lore.kernel.org/linux-btrfs/cover.1565680721.git.osandov@fb.com/
> > > > 
> > > > Omar Sandoval (2):
> > > >   Btrfs: get rid of unique workqueue helper functions
> > > >   Btrfs: get rid of pointless wtag variable in async-thread.c
> > > 
> > > The patches seem to cause crashes inside the worques, happend several
> > > times in random patches, sample stacktrace below. This blocks me from
> > > testing so I'll move the patches out of misc-next for now and add back
> > > once there's a fix.
> > 
> > Another possibility is that the cleanup patches make it more likely to
> > happen and the root cause is "Btrfs: fix workqueue deadlock on dependent
> > filesystems".
> 
> With just the deadlock fix on top<F2>, crashed in btrfs/011;
> 

It's because we're doing

wq = work->wq;

after we've done set_bit(WORK_DONE_BIT, &work->flags).  The work could be freed
immediately after this and thus doing work->wq could give you garbage.  Could
you try with this diff applied and verify it goes away?

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 6b8ad4a1b568..10a04b99798a 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -252,9 +252,9 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
 	}
 }
 
-static void run_ordered_work(struct btrfs_work *self)
+static void run_ordered_work(struct __btrfs_workqueue *wq,
+			     struct btrfs_work *self)
 {
-	struct __btrfs_workqueue *wq = self->wq;
 	struct list_head *list = &wq->ordered_list;
 	struct btrfs_work *work;
 	spinlock_t *lock = &wq->list_lock;
@@ -356,7 +356,7 @@ static void normal_work_helper(struct btrfs_work *work)
 	work->func(work);
 	if (need_order) {
 		set_bit(WORK_DONE_BIT, &work->flags);
-		run_ordered_work(work);
+		run_ordered_work(wq, work);
 	}
 	if (!need_order)
 		trace_btrfs_all_work_done(wq->fs_info, wtag);

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, back to index

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-13 17:33 [PATCH v2 0/2] Btrfs: workqueue cleanups Omar Sandoval
2019-08-13 17:33 ` [PATCH v2 1/2] Btrfs: get rid of unique workqueue helper functions Omar Sandoval
2019-08-13 17:33 ` [PATCH v2 2/2] Btrfs: get rid of pointless wtag variable in async-thread.c Omar Sandoval
2019-08-19 17:24   ` David Sterba
2019-08-19 17:28 ` [PATCH v2 0/2] Btrfs: workqueue cleanups David Sterba
2019-08-21 13:20 ` David Sterba
2019-08-21 13:24   ` David Sterba
2019-08-21 14:14     ` David Sterba
2019-08-21 15:18       ` Josef Bacik

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org linux-btrfs@archiver.kernel.org
	public-inbox-index linux-btrfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox