All of lore.kernel.org
 help / color / mirror / Atom feed
* fix loop autoclear for xfstets xfs/049
@ 2021-12-23 11:25 Christoph Hellwig
  2021-12-23 11:25 ` [PATCH 1/2] loop: use a global workqueue Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-23 11:25 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Tetsuo Handa, Jan Kara, Dan Schatzberg, linux-block

Hi Jens, hi Tetsuo,

this is a 3rd approach to fix the loop autoclean delay.  Instead of
working around the workqueue lockdep issues this switches the loop
driver to use a global workqueue and thus avoids the destroy_workqueue
call under disk->open_mutex entirely.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] loop: use a global workqueue
  2021-12-23 11:25 fix loop autoclear for xfstets xfs/049 Christoph Hellwig
@ 2021-12-23 11:25 ` Christoph Hellwig
  2021-12-23 14:37   ` Tetsuo Handa
  2021-12-29  2:23   ` Tetsuo Handa
  2021-12-23 11:25 ` [PATCH 2/2] loop: make autoclear operation synchronous again Christoph Hellwig
  2021-12-23 13:40 ` fix loop autoclear for xfstets xfs/049 Jan Kara
  2 siblings, 2 replies; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-23 11:25 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Tetsuo Handa, Jan Kara, Dan Schatzberg, linux-block

Using a per-device unbound workqueue is a bit of an anti-pattern and
in this case also creates lock ordering problems.  Just use a global
concurrencymanaged workqueue instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/loop.c | 36 +++++++++++++++---------------------
 drivers/block/loop.h |  1 -
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7f4ea06534c2d..573f0d83fe80a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -89,6 +89,7 @@
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
 static DEFINE_MUTEX(loop_validate_mutex);
+static struct workqueue_struct *loop_workqueue;
 
 /**
  * loop_global_lock_killable() - take locks for safe loop_validate_file() test
@@ -884,7 +885,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
 		cmd_list = &lo->rootcg_cmd_list;
 	}
 	list_add_tail(&cmd->list_entry, cmd_list);
-	queue_work(lo->workqueue, work);
+	queue_work(loop_workqueue, work);
 	spin_unlock_irq(&lo->lo_work_lock);
 }
 
@@ -1006,15 +1007,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-	lo->workqueue = alloc_workqueue("loop%d",
-					WQ_UNBOUND | WQ_FREEZABLE,
-					0,
-					lo->lo_number);
-	if (!lo->workqueue) {
-		error = -ENOMEM;
-		goto out_unlock;
-	}
-
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
 	set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
@@ -1115,7 +1107,6 @@ static void __loop_clr_fd(struct loop_device *lo)
 	/* freeze request queue during the transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
-	destroy_workqueue(lo->workqueue);
 	spin_lock_irq(&lo->lo_work_lock);
 	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
 				idle_list) {
@@ -2212,15 +2203,11 @@ static int __init loop_init(void)
 		max_part = (1UL << part_shift) - 1;
 	}
 
-	if ((1UL << part_shift) > DISK_MAX_PARTS) {
-		err = -EINVAL;
-		goto err_out;
-	}
+	if ((1UL << part_shift) > DISK_MAX_PARTS)
+		return -EINVAL;
 
-	if (max_loop > 1UL << (MINORBITS - part_shift)) {
-		err = -EINVAL;
-		goto err_out;
-	}
+	if (max_loop > 1UL << (MINORBITS - part_shift))
+		return -EINVAL;
 
 	/*
 	 * If max_loop is specified, create that many devices upfront.
@@ -2235,9 +2222,14 @@ static int __init loop_init(void)
 	else
 		nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
 
+	loop_workqueue = alloc_workqueue("loop", WQ_MEM_RECLAIM | WQ_FREEZABLE,
+					 0);
+	if (!loop_workqueue)
+		return -ENOMEM;
+
 	err = misc_register(&loop_misc);
 	if (err < 0)
-		goto err_out;
+		goto destroy_workqueue;
 
 
 	if (__register_blkdev(LOOP_MAJOR, "loop", loop_probe)) {
@@ -2254,7 +2246,8 @@ static int __init loop_init(void)
 
 misc_out:
 	misc_deregister(&loop_misc);
-err_out:
+destroy_workqueue:
+	destroy_workqueue(loop_workqueue);
 	return err;
 }
 
@@ -2276,6 +2269,7 @@ static void __exit loop_exit(void)
 		loop_remove(lo);
 
 	idr_destroy(&loop_index_idr);
+	destroy_workqueue(loop_workqueue);
 }
 
 module_init(loop_init);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc0259..885c83b4417e1 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -42,7 +42,6 @@ struct loop_device {
 	spinlock_t		lo_lock;
 	int			lo_state;
 	spinlock_t              lo_work_lock;
-	struct workqueue_struct *workqueue;
 	struct work_struct      rootcg_work;
 	struct list_head        rootcg_cmd_list;
 	struct list_head        idle_worker_list;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] loop: make autoclear operation synchronous again
  2021-12-23 11:25 fix loop autoclear for xfstets xfs/049 Christoph Hellwig
  2021-12-23 11:25 ` [PATCH 1/2] loop: use a global workqueue Christoph Hellwig
@ 2021-12-23 11:25 ` Christoph Hellwig
  2021-12-23 13:40 ` fix loop autoclear for xfstets xfs/049 Jan Kara
  2 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-23 11:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Tetsuo Handa, Jan Kara, Dan Schatzberg, linux-block,
	kernel test robot, Tetsuo Handa

From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>

The kernel test robot is reporting that xfstest can fail at

  umount ext2 on xfs
  umount xfs

sequence, for commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous") broke what commit ("loop: Make explicit loop device
destruction lazy") wanted to achieve.

Although we cannot guarantee that nobody is holding a reference when
"umount xfs" is called, we should try to close a race window opened
by asynchronous autoclear operation.

Make the autoclear operation upon close() synchronous, by performing
__loop_clr_fd() directly from the release callback.

Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 322c4293ecc58110 ("loop: make autoclear operation asynchronous")
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
[hch: rebased]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/loop.c | 31 +------------------------------
 drivers/block/loop.h |  1 -
 2 files changed, 1 insertion(+), 31 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 573f0d83fe80a..7faacefc4ede9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1156,40 +1156,12 @@ static void __loop_clr_fd(struct loop_device *lo)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART;
 
 	fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
 	mutex_lock(&lo->lo_mutex);
 	lo->lo_state = Lo_unbound;
 	mutex_unlock(&lo->lo_mutex);
 	module_put(THIS_MODULE);
 }
 
-static void loop_rundown_workfn(struct work_struct *work)
-{
-	struct loop_device *lo = container_of(work, struct loop_device,
-					      rundown_work);
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__loop_clr_fd(lo);
-	kobject_put(&bdev->bd_device.kobj);
-	module_put(disk->fops->owner);
-	loop_rundown_completed(lo);
-}
-
-static void loop_schedule_rundown(struct loop_device *lo)
-{
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__module_get(disk->fops->owner);
-	kobject_get(&bdev->bd_device.kobj);
-	INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
-	queue_work(system_long_wq, &lo->rundown_work);
-}
-
 static int loop_clr_fd(struct loop_device *lo)
 {
 	int err;
@@ -1220,7 +1192,6 @@ static int loop_clr_fd(struct loop_device *lo)
 	mutex_unlock(&lo->lo_mutex);
 
 	__loop_clr_fd(lo);
-	loop_rundown_completed(lo);
 	return 0;
 }
 
@@ -1745,7 +1716,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 		 * In autoclear mode, stop the loop thread
 		 * and remove configuration after last close.
 		 */
-		loop_schedule_rundown(lo);
+		__loop_clr_fd(lo);
 		return;
 	} else if (lo->lo_state == Lo_bound) {
 		/*
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 885c83b4417e1..0400cbfed6308 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -55,7 +55,6 @@ struct loop_device {
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
 	bool			idr_visible;
-	struct work_struct      rundown_work;
 };
 
 struct loop_cmd {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: fix loop autoclear for xfstets xfs/049
  2021-12-23 11:25 fix loop autoclear for xfstets xfs/049 Christoph Hellwig
  2021-12-23 11:25 ` [PATCH 1/2] loop: use a global workqueue Christoph Hellwig
  2021-12-23 11:25 ` [PATCH 2/2] loop: make autoclear operation synchronous again Christoph Hellwig
@ 2021-12-23 13:40 ` Jan Kara
  2021-12-24  6:02   ` Christoph Hellwig
  2 siblings, 1 reply; 13+ messages in thread
From: Jan Kara @ 2021-12-23 13:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Tetsuo Handa, Jan Kara, Dan Schatzberg, linux-block

Hi!

On Thu 23-12-21 12:25:07, Christoph Hellwig wrote:
> this is a 3rd approach to fix the loop autoclean delay.  Instead of
> working around the workqueue lockdep issues this switches the loop
> driver to use a global workqueue and thus avoids the destroy_workqueue
> call under disk->open_mutex entirely.

Hum, I have nothing against this but I'm somewhat wondering: Lockdep was
originally complaining because it somehow managed to find a write whose
completion was indirectly dependent on disk->open_mutex and
destroy_workqueue() could wait for such write to complete under
disk->open_mutex. Now your patch will fix this lockdep complaint but we
still would wait for the write to complete through blk_mq_freeze_queue()
(just lockdep is not clever enough to detect this). So IHMO if there was a
deadlock before, it will be still there with your changes. Now I'm not 100%
sure the deadlock lockdep was complaining about is real in the first place
because it involved some writes to proc files (taking some locks) and
hibernation mutex and whatnot.  But it is true that writing to a backing
file will grab fs freeze protection and that can bring with it all sorts of
interesting dependencies.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] loop: use a global workqueue
  2021-12-23 11:25 ` [PATCH 1/2] loop: use a global workqueue Christoph Hellwig
@ 2021-12-23 14:37   ` Tetsuo Handa
  2021-12-24  6:03     ` Christoph Hellwig
  2021-12-29  2:23   ` Tetsuo Handa
  1 sibling, 1 reply; 13+ messages in thread
From: Tetsuo Handa @ 2021-12-23 14:37 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe; +Cc: Jan Kara, Dan Schatzberg, linux-block

On 2021/12/23 20:25, Christoph Hellwig wrote:
> Using a per-device unbound workqueue is a bit of an anti-pattern and
> in this case also creates lock ordering problems.  Just use a global
> concurrency managed workqueue instead.

Use of a global workqueue for the loop driver itself is fine. But

> @@ -1115,7 +1107,6 @@ static void __loop_clr_fd(struct loop_device *lo)
>  	/* freeze request queue during the transition */
>  	blk_mq_freeze_queue(lo->lo_queue);
>  
> -	destroy_workqueue(lo->workqueue);

is it safe to remove destroy_workqueue() call here?

>  	spin_lock_irq(&lo->lo_work_lock);
>  	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
>  				idle_list) {

destroy_workqueue() implies flush_workqueue() which is creating the lock
ordering problem. And I think that flush_workqueue() is required for making
sure that there is no more work to process (i.e. loop_process_work() is
no longer running) before start deleting idle workers.

My understanding is that the problem is not the use of a per-device workqueue
but the need to call flush_workqueue() in order to make sure that all pending
works are completed.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: fix loop autoclear for xfstets xfs/049
  2021-12-23 13:40 ` fix loop autoclear for xfstets xfs/049 Jan Kara
@ 2021-12-24  6:02   ` Christoph Hellwig
  2021-12-26  7:09     ` Tetsuo Handa
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-24  6:02 UTC (permalink / raw)
  To: Jan Kara
  Cc: Christoph Hellwig, Jens Axboe, Tetsuo Handa, Dan Schatzberg, linux-block

On Thu, Dec 23, 2021 at 02:40:50PM +0100, Jan Kara wrote:
> Hum, I have nothing against this but I'm somewhat wondering: Lockdep was
> originally complaining because it somehow managed to find a write whose
> completion was indirectly dependent on disk->open_mutex and
> destroy_workqueue() could wait for such write to complete under
> disk->open_mutex. Now your patch will fix this lockdep complaint but we
> still would wait for the write to complete through blk_mq_freeze_queue()
> (just lockdep is not clever enough to detect this). So IHMO if there was a
> deadlock before, it will be still there with your changes. Now I'm not 100%
> sure the deadlock lockdep was complaining about is real in the first place
> because it involved some writes to proc files (taking some locks) and
> hibernation mutex and whatnot.  But it is true that writing to a backing
> file will grab fs freeze protection and that can bring with it all sorts of
> interesting dependencies.

I don't think the problem was a write completion, but the synchronous
nature of the workqueue operations.  But I also have to admit the whole
lockdep vs workqueue thing keeps confusing me.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] loop: use a global workqueue
  2021-12-23 14:37   ` Tetsuo Handa
@ 2021-12-24  6:03     ` Christoph Hellwig
  2021-12-24 12:05       ` Tetsuo Handa
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-24  6:03 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Jan Kara, Dan Schatzberg, linux-block

On Thu, Dec 23, 2021 at 11:37:21PM +0900, Tetsuo Handa wrote:
> > @@ -1115,7 +1107,6 @@ static void __loop_clr_fd(struct loop_device *lo)
> >  	/* freeze request queue during the transition */
> >  	blk_mq_freeze_queue(lo->lo_queue);
> >  
> > -	destroy_workqueue(lo->workqueue);
> 
> is it safe to remove destroy_workqueue() call here?
> 
> >  	spin_lock_irq(&lo->lo_work_lock);
> >  	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
> >  				idle_list) {
> 
> destroy_workqueue() implies flush_workqueue() which is creating the lock
> ordering problem. And I think that flush_workqueue() is required for making
> sure that there is no more work to process (i.e. loop_process_work() is
> no longer running) before start deleting idle workers.
> 
> My understanding is that the problem is not the use of a per-device workqueue
> but the need to call flush_workqueue() in order to make sure that all pending
> works are completed.

All the work items are for requests, and the blk_mq_freeze_queue should
take care of flushing them all out.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] loop: use a global workqueue
  2021-12-24  6:03     ` Christoph Hellwig
@ 2021-12-24 12:05       ` Tetsuo Handa
  2021-12-24 14:05         ` Tetsuo Handa
  2021-12-29 17:21         ` Christoph Hellwig
  0 siblings, 2 replies; 13+ messages in thread
From: Tetsuo Handa @ 2021-12-24 12:05 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Jan Kara, Dan Schatzberg, linux-block

On 2021/12/24 15:03, Christoph Hellwig wrote:
> On Thu, Dec 23, 2021 at 11:37:21PM +0900, Tetsuo Handa wrote:
>>> @@ -1115,7 +1107,6 @@ static void __loop_clr_fd(struct loop_device *lo)
>>>  	/* freeze request queue during the transition */
>>>  	blk_mq_freeze_queue(lo->lo_queue);
>>>  
>>> -	destroy_workqueue(lo->workqueue);
>>
>> is it safe to remove destroy_workqueue() call here?
>>
>>>  	spin_lock_irq(&lo->lo_work_lock);
>>>  	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
>>>  				idle_list) {
>>
>> destroy_workqueue() implies flush_workqueue() which is creating the lock
>> ordering problem. And I think that flush_workqueue() is required for making
>> sure that there is no more work to process (i.e. loop_process_work() is
>> no longer running) before start deleting idle workers.
>>
>> My understanding is that the problem is not the use of a per-device workqueue
>> but the need to call flush_workqueue() in order to make sure that all pending
>> works are completed.
> 
> All the work items are for requests, and the blk_mq_freeze_queue should
> take care of flushing them all out.

Hmm, OK.

(1) loop_queue_rq() calls blk_mq_start_request() and then calls loop_queue_work().

(2) loop_queue_work() allocates "struct work_struct" and calls queue_work().

(3) loop_handle_cmd() from loop_process_work() from loop_workfn() is called by a WQ thread.

(4) do_req_filebacked() from loop_handle_cmd() performs read/write on lo->lo_backing_file.

(5) Either completion function or loop_handle_cmd() calls blk_mq_complete_request().

Therefore, as long as blk_mq_freeze_queue(lo->lo_queue) waits for completion of (5) and
blocks new events for (2), there should be no work to process by loop_process_work().

Then, we can defer

	destroy_workqueue(lo->workqueue);
	spin_lock_irq(&lo->lo_work_lock);
	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
				idle_list) {
		list_del(&worker->idle_list);
		rb_erase(&worker->rb_node, &lo->worker_tree);
		css_put(worker->blkcg_css);
		kfree(worker);
	}
	spin_unlock_irq(&lo->lo_work_lock);
	del_timer_sync(&lo->timer);

block in __loop_clr_fd() till loop_remove() if we want. Assuming that
loop devices are likely created only when there is no free one, a loop
device is likely reused once created. Then, we don't need to care idle
workers on every loop_configure()/__loop_clr_fd() pairs?

By the way, is it safe to use single global WQ if (4) is a synchronous I/O request?
Since there can be up to 1048576 loop devices, and one loop device can use another
loop device as lo->lo_backing_file (unless loop_validate_file() finds a circular
usage), one synchronous I/O request in (4) might recursively involve up to 1048576
works (which would be too many concurrency to be handled by a WQ) ?

Also, is

	blk_mq_start_request(rq);

	if (lo->lo_state != Lo_bound)
		return BLK_STS_IOERR;

in loop_queue_rq() correct? (Not only lo->lo_state test is racy, but wants
blk_mq_end_request() like lo_complete_rq() does?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] loop: use a global workqueue
  2021-12-24 12:05       ` Tetsuo Handa
@ 2021-12-24 14:05         ` Tetsuo Handa
  2021-12-29 17:21         ` Christoph Hellwig
  1 sibling, 0 replies; 13+ messages in thread
From: Tetsuo Handa @ 2021-12-24 14:05 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Jan Kara, Dan Schatzberg, linux-block

On 2021/12/24 21:05, Tetsuo Handa wrote:
> Also, is
> 
> 	blk_mq_start_request(rq);
> 
> 	if (lo->lo_state != Lo_bound)
> 		return BLK_STS_IOERR;
> 
> in loop_queue_rq() correct? (Not only lo->lo_state test is racy, but wants
> blk_mq_end_request() like lo_complete_rq() does?

OK. blk_mq_end_request() is called by blk_mq_dispatch_rq_list() when BLK_STS_IOERR
is returned. Thus, just "s/lo->lo_state/data_race(lo->lo_state)/" will be fine.

> By the way, is it safe to use single global WQ if (4) is a synchronous I/O request?
> Since there can be up to 1048576 loop devices, and one loop device can use another
> loop device as lo->lo_backing_file (unless loop_validate_file() finds a circular
> usage), one synchronous I/O request in (4) might recursively involve up to 1048576
> works (which would be too many concurrency to be handled by a WQ) ?

I wonder whether use of WQ_MEM_RECLAIM in your patch is appropriate.
WQ_MEM_RECLAIM guarantees that at least one "struct task_struct" is available
so that "struct work" can be processed under memory pressure. However, while
flushing buffered write() request to storage would help increasing free memory,
processing buffered read() request would not help increasing free memory. Rather,
doesn't it help reducing free memory by copying that data into page cache?
So, I feel that only works which flush buffered write() request would qualify
processing via WQ_MEM_RECLAIM WQ, and mixing both read() and write() into the
same queue is wrong.

Anyway, as a minimal change for fixing xfstest problem, what about below one?
Just a revert of commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous") and simply defer destroy_workqueue(lo->workqueue) till
loop_remove().

---
 drivers/block/loop.c | 75 ++++++++++++++++++++------------------------
 drivers/block/loop.h |  1 -
 2 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..e0ac186ca998 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1006,10 +1006,10 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-	lo->workqueue = alloc_workqueue("loop%d",
-					WQ_UNBOUND | WQ_FREEZABLE,
-					0,
-					lo->lo_number);
+	if (!lo->workqueue)
+		lo->workqueue = alloc_workqueue("loop%d",
+						WQ_UNBOUND | WQ_FREEZABLE,
+						0, lo->lo_number);
 	if (!lo->workqueue) {
 		error = -ENOMEM;
 		goto out_unlock;
@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return error;
 }
 
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
 {
 	struct file *filp;
 	gfp_t gfp = lo->old_gfp_mask;
@@ -1115,7 +1115,6 @@ static void __loop_clr_fd(struct loop_device *lo)
 	/* freeze request queue during the transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
-	destroy_workqueue(lo->workqueue);
 	spin_lock_irq(&lo->lo_work_lock);
 	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
 				idle_list) {
@@ -1144,6 +1143,8 @@ static void __loop_clr_fd(struct loop_device *lo)
 	/* let user-space know about this change */
 	kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
 	mapping_set_gfp_mask(filp->f_mapping, gfp);
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1151,52 +1152,44 @@ static void __loop_clr_fd(struct loop_device *lo)
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
 		int err;
 
-		mutex_lock(&lo->lo_disk->open_mutex);
+		/*
+		 * open_mutex has been held already in release path, so don't
+		 * acquire it if this function is called in such case.
+		 *
+		 * If the reread partition isn't from release path, lo_refcnt
+		 * must be at least one and it can only become zero when the
+		 * current holder is released.
+		 */
+		if (!release)
+			mutex_lock(&lo->lo_disk->open_mutex);
 		err = bdev_disk_changed(lo->lo_disk, false);
-		mutex_unlock(&lo->lo_disk->open_mutex);
+		if (!release)
+			mutex_unlock(&lo->lo_disk->open_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo->lo_number, err);
 		/* Device is gone, no point in returning error */
 	}
 
+	/*
+	 * lo->lo_state is set to Lo_unbound here after above partscan has
+	 * finished. There cannot be anybody else entering __loop_clr_fd() as
+	 * Lo_rundown state protects us from all the other places trying to
+	 * change the 'lo' device.
+	 */
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART;
-
-	fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
 	mutex_lock(&lo->lo_mutex);
 	lo->lo_state = Lo_unbound;
 	mutex_unlock(&lo->lo_mutex);
-	module_put(THIS_MODULE);
-}
-
-static void loop_rundown_workfn(struct work_struct *work)
-{
-	struct loop_device *lo = container_of(work, struct loop_device,
-					      rundown_work);
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__loop_clr_fd(lo);
-	kobject_put(&bdev->bd_device.kobj);
-	module_put(disk->fops->owner);
-	loop_rundown_completed(lo);
-}
 
-static void loop_schedule_rundown(struct loop_device *lo)
-{
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__module_get(disk->fops->owner);
-	kobject_get(&bdev->bd_device.kobj);
-	INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
-	queue_work(system_long_wq, &lo->rundown_work);
+	/*
+	 * Need not hold lo_mutex to fput backing file. Calling fput holding
+	 * lo_mutex triggers a circular lock dependency possibility warning as
+	 * fput can take open_mutex which is usually taken before lo_mutex.
+	 */
+	fput(filp);
 }
 
 static int loop_clr_fd(struct loop_device *lo)
@@ -1228,8 +1221,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_state = Lo_rundown;
 	mutex_unlock(&lo->lo_mutex);
 
-	__loop_clr_fd(lo);
-	loop_rundown_completed(lo);
+	__loop_clr_fd(lo, false);
 	return 0;
 }
 
@@ -1754,7 +1746,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 		 * In autoclear mode, stop the loop thread
 		 * and remove configuration after last close.
 		 */
-		loop_schedule_rundown(lo);
+		__loop_clr_fd(lo, true);
 		return;
 	} else if (lo->lo_state == Lo_bound) {
 		/*
@@ -2080,6 +2072,7 @@ static void loop_remove(struct loop_device *lo)
 	mutex_unlock(&loop_ctl_mutex);
 	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
+	destroy_workqueue(lo->workqueue);
 	kfree(lo);
 }
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..082d4b6bfc6a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
 	bool			idr_visible;
-	struct work_struct      rundown_work;
 };
 
 struct loop_cmd {
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: fix loop autoclear for xfstets xfs/049
  2021-12-24  6:02   ` Christoph Hellwig
@ 2021-12-26  7:09     ` Tetsuo Handa
  2021-12-29 17:20       ` Christoph Hellwig
  0 siblings, 1 reply; 13+ messages in thread
From: Tetsuo Handa @ 2021-12-26  7:09 UTC (permalink / raw)
  To: Christoph Hellwig, Jan Kara; +Cc: Jens Axboe, Dan Schatzberg, linux-block

On 2021/12/24 15:02, Christoph Hellwig wrote:
> On Thu, Dec 23, 2021 at 02:40:50PM +0100, Jan Kara wrote:
>> Hum, I have nothing against this but I'm somewhat wondering: Lockdep was
>> originally complaining because it somehow managed to find a write whose
>> completion was indirectly dependent on disk->open_mutex and
>> destroy_workqueue() could wait for such write to complete under
>> disk->open_mutex. Now your patch will fix this lockdep complaint but we
>> still would wait for the write to complete through blk_mq_freeze_queue()
>> (just lockdep is not clever enough to detect this). So IHMO if there was a
>> deadlock before, it will be still there with your changes. Now I'm not 100%
>> sure the deadlock lockdep was complaining about is real in the first place
>> because it involved some writes to proc files (taking some locks) and
>> hibernation mutex and whatnot.  But it is true that writing to a backing
>> file will grab fs freeze protection and that can bring with it all sorts of
>> interesting dependencies.
> 
> I don't think the problem was a write completion, but the synchronous
> nature of the workqueue operations.  But I also have to admit the whole
> lockdep vs workqueue thing keeps confusing me.

Here is a simplified reproducer and a log. It was difficult to find a reproducer
because /proc/lockdep zaps dependency chain when a dynamically created object is destroyed.

----------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <linux/loop.h>
#include <sys/sendfile.h>

int main(int argc, char *argv[])
{
	const int file_fd = open("testfile", O_RDWR | O_CREAT, 0600);
	ftruncate(file_fd, 1048576);
	sendfile(file_fd, open("/proc/mounts", O_RDONLY), 0, 1048576);
	char filename[128] = { };
	const int loop_num = ioctl(open("/dev/loop-control", 3),  LOOP_CTL_GET_FREE, 0);
	snprintf(filename, sizeof(filename) - 1, "/dev/loop%d", loop_num);
	const int loop_fd_1 = open(filename, O_RDWR);
	ioctl(loop_fd_1, LOOP_SET_FD, file_fd);
	const int loop_fd_2 = open(filename, O_RDWR);
	ioctl(loop_fd_1, LOOP_CLR_FD, 0);
	sendfile(loop_fd_2, file_fd, 0, 1048576);
	fsync(loop_fd_2);
	write(open("/sys/power/resume", O_WRONLY), "700", 3);
	system("/bin/cat /proc/lockdep > /tmp/lockdep"); // Save before "zap on release" forgets the dependency.
	return 0;
}
----------------------------------------

----------------------------------------
[   36.910512] loop0: detected capacity change from 0 to 2048
[   37.014998] 
[   37.015573] ======================================================
[   37.017667] WARNING: possible circular locking dependency detected
[   37.019788] 5.16.0-rc4-next-20211210 #10 Not tainted
[   37.021516] ------------------------------------------------------
[   37.023602] systemd-udevd/2254 is trying to acquire lock:
[   37.025820] ffff888119656538 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x70/0x560
[   37.028874] 
[   37.028874] but task is already holding lock:
[   37.030871] ffff888104e54d18 (&disk->open_mutex){+.+.}-{3:3}, at: blkdev_put+0x4c/0x1c0
[   37.033653] 
[   37.033653] which lock already depends on the new lock.
[   37.033653] 
[   37.037679] 
[   37.037679] the existing dependency chain (in reverse order) is:
[   37.042074] 
[   37.042074] -> #6 (&disk->open_mutex){+.+.}-{3:3}:
[   37.045465]        lock_acquire+0xc7/0x1d0
[   37.047446]        __mutex_lock_common+0xb9/0xdd0
[   37.049567]        mutex_lock_nested+0x17/0x20
[   37.051610]        blkdev_get_by_dev+0xeb/0x2f0
[   37.053680]        swsusp_check+0x27/0x120
[   37.055634]        software_resume+0x5f/0x1f0
[   37.057721]        resume_store+0x6e/0x90
[   37.060020]        kernfs_fop_write_iter+0x120/0x1b0
[   37.062285]        vfs_write+0x2ed/0x360
[   37.064092]        ksys_write+0x67/0xd0
[   37.065886]        do_syscall_64+0x3d/0x90
[   37.067699]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.069991] 
[   37.069991] -> #5 (system_transition_mutex/1){+.+.}-{3:3}:
[   37.073194]        lock_acquire+0xc7/0x1d0
[   37.075694]        __mutex_lock_common+0xb9/0xdd0
[   37.077747]        mutex_lock_nested+0x17/0x20
[   37.079735]        software_resume+0x4d/0x1f0
[   37.081677]        resume_store+0x6e/0x90
[   37.083421]        kernfs_fop_write_iter+0x120/0x1b0
[   37.085514]        vfs_write+0x2ed/0x360
[   37.087280]        ksys_write+0x67/0xd0
[   37.089052]        do_syscall_64+0x3d/0x90
[   37.090947]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.094050] 
[   37.094050] -> #4 (&of->mutex){+.+.}-{3:3}:
[   37.096733]        lock_acquire+0xc7/0x1d0
[   37.098442]        __mutex_lock_common+0xb9/0xdd0
[   37.100461]        mutex_lock_nested+0x17/0x20
[   37.102221]        kernfs_seq_start+0x1d/0xf0
[   37.103975]        seq_read_iter+0xf8/0x3e0
[   37.105888]        vfs_read+0x2db/0x350
[   37.107838]        ksys_read+0x67/0xd0
[   37.110267]        do_syscall_64+0x3d/0x90
[   37.112066]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.114246] 
[   37.114246] -> #3 (&p->lock){+.+.}-{3:3}:
[   37.116809]        lock_acquire+0xc7/0x1d0
[   37.118552]        __mutex_lock_common+0xb9/0xdd0
[   37.120584]        mutex_lock_nested+0x17/0x20
[   37.122478]        seq_read_iter+0x37/0x3e0
[   37.125093]        generic_file_splice_read+0xf3/0x170
[   37.127174]        splice_direct_to_actor+0x13f/0x310
[   37.129128]        do_splice_direct+0x84/0xd0
[   37.130903]        do_sendfile+0x267/0x410
[   37.132534]        __se_sys_sendfile64+0x9f/0xd0
[   37.134395]        do_syscall_64+0x3d/0x90
[   37.136069]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.138175] 
[   37.138175] -> #2 (sb_writers#7){.+.+}-{0:0}:
[   37.141353]        lock_acquire+0xc7/0x1d0
[   37.143415]        loop_process_work+0x664/0x980 [loop]
[   37.145828]        process_one_work+0x230/0x3c0
[   37.147625]        worker_thread+0x21d/0x490
[   37.149382]        kthread+0x192/0x1b0
[   37.150934]        ret_from_fork+0x1f/0x30
[   37.152598] 
[   37.152598] -> #1 ((work_completion)(&worker->work)){+.+.}-{0:0}:
[   37.155702]        lock_acquire+0xc7/0x1d0
[   37.157923]        process_one_work+0x21d/0x3c0
[   37.160202]        worker_thread+0x21d/0x490
[   37.162134]        kthread+0x192/0x1b0
[   37.163678]        ret_from_fork+0x1f/0x30
[   37.165439] 
[   37.165439] -> #0 ((wq_completion)loop0){+.+.}-{0:0}:
[   37.168213]        validate_chain+0xba0/0x2ae0
[   37.170033]        __lock_acquire+0x8e0/0xbe0
[   37.171810]        lock_acquire+0xc7/0x1d0
[   37.173459]        flush_workqueue+0x8c/0x560
[   37.176198]        drain_workqueue+0x80/0x140
[   37.178126]        destroy_workqueue+0x36/0x3d0
[   37.180111]        __loop_clr_fd+0x98/0x350 [loop]
[   37.182035]        blkdev_put+0x14b/0x1c0
[   37.183710]        blkdev_close+0x12/0x20
[   37.185359]        __fput+0xfb/0x230
[   37.186867]        task_work_run+0x69/0xc0
[   37.188565]        exit_to_user_mode_loop+0x144/0x160
[   37.190611]        exit_to_user_mode_prepare+0xbd/0x130
[   37.193387]        syscall_exit_to_user_mode+0x26/0x60
[   37.195474]        do_syscall_64+0x49/0x90
[   37.197158]        entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.199520] 
[   37.199520] other info that might help us debug this:
[   37.199520] 
[   37.203310] Chain exists of:
[   37.203310]   (wq_completion)loop0 --> system_transition_mutex/1 --> &disk->open_mutex
[   37.203310] 
[   37.209552]  Possible unsafe locking scenario:
[   37.209552] 
[   37.212337]        CPU0                    CPU1
[   37.214255]        ----                    ----
[   37.216084]   lock(&disk->open_mutex);
[   37.217671]                                lock(system_transition_mutex/1);
[   37.220438]                                lock(&disk->open_mutex);
[   37.222862]   lock((wq_completion)loop0);
[   37.224902] 
[   37.224902]  *** DEADLOCK ***
[   37.224902] 
[   37.228403] 1 lock held by systemd-udevd/2254:
[   37.230301]  #0: ffff888104e54d18 (&disk->open_mutex){+.+.}-{3:3}, at: blkdev_put+0x4c/0x1c0
[   37.233386] 
[   37.233386] stack backtrace:
[   37.235654] CPU: 0 PID: 2254 Comm: systemd-udevd Kdump: loaded Not tainted 5.16.0-rc4-next-20211210 #10
[   37.239620] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020
[   37.244830] Call Trace:
[   37.246117]  <TASK>
[   37.247237]  dump_stack_lvl+0x79/0xbf
[   37.248946]  print_circular_bug+0x5df/0x5f0
[   37.250802]  ? stack_trace_save+0x70/0x70
[   37.252524]  ? rcu_lock_acquire+0x30/0x30
[   37.254292]  ? rcu_read_lock_sched_held+0x41/0x90
[   37.256306]  ? perf_trace_mem_return_failed+0xe0/0xe0
[   37.259439]  ? trace_lock_release+0x2d/0xd0
[   37.261471]  ? lock_release+0x27/0x2c0
[   37.263148]  ? stack_trace_save+0x70/0x70
[   37.264976]  ? is_bpf_text_address+0x11a/0x120
[   37.266947]  ? kernel_text_address+0xa8/0xc0
[   37.268834]  ? __kernel_text_address+0x9/0x40
[   37.270752]  ? unwind_get_return_address+0x12/0x20
[   37.272839]  ? arch_stack_walk+0x98/0xe0
[   37.275409]  ? stack_trace_save+0x43/0x70
[   37.277364]  ? save_trace+0x3d/0x2c0
[   37.279136]  check_noncircular+0x123/0x130
[   37.281006]  validate_chain+0xba0/0x2ae0
[   37.282757]  ? validate_chain+0x104/0x2ae0
[   37.284612]  ? validate_chain+0x104/0x2ae0
[   37.286465]  ? validate_chain+0x104/0x2ae0
[   37.288278]  ? validate_chain+0x104/0x2ae0
[   37.290075]  ? validate_chain+0x104/0x2ae0
[   37.292662]  ? validate_chain+0x104/0x2ae0
[   37.294480]  ? validate_chain+0x104/0x2ae0
[   37.296248]  ? rcu_lock_acquire+0x30/0x30
[   37.298066]  ? __lock_acquire+0x901/0xbe0
[   37.299889]  __lock_acquire+0x8e0/0xbe0
[   37.301623]  lock_acquire+0xc7/0x1d0
[   37.303135]  ? flush_workqueue+0x70/0x560
[   37.304778]  ? __raw_spin_lock_init+0x35/0x60
[   37.306742]  flush_workqueue+0x8c/0x560
[   37.308890]  ? flush_workqueue+0x70/0x560
[   37.310811]  drain_workqueue+0x80/0x140
[   37.312501]  destroy_workqueue+0x36/0x3d0
[   37.314193]  __loop_clr_fd+0x98/0x350 [loop]
[   37.315978]  blkdev_put+0x14b/0x1c0
[   37.317482]  blkdev_close+0x12/0x20
[   37.318984]  __fput+0xfb/0x230
[   37.320377]  task_work_run+0x69/0xc0
[   37.321958]  exit_to_user_mode_loop+0x144/0x160
[   37.323781]  exit_to_user_mode_prepare+0xbd/0x130
[   37.326347]  syscall_exit_to_user_mode+0x26/0x60
[   37.328160]  do_syscall_64+0x49/0x90
[   37.329658]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.331588] RIP: 0033:0x7f4537d62097
[   37.333027] Code: ff e8 6d 11 02 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 33 c8 f7 ff
[   37.339995] RSP: 002b:00007ffd77fdb008 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[   37.343761] RAX: 0000000000000000 RBX: 00007f45377d56c0 RCX: 00007f4537d62097
[   37.346617] RDX: 0000556b97cb0723 RSI: 0000556ec02be94c RDI: 0000000000000006
[   37.349287] RBP: 0000000000000006 R08: 0000556ec1273090 R09: 0000000000000000
[   37.351892] R10: 00007f45377d56c0 R11: 0000000000000246 R12: 0000000000000000
[   37.354543] R13: 0000000000000000 R14: 00007ffd77fdb090 R15: 00007ffd77fdb074
[   37.357171]  </TASK>
----------------------------------------

I was able to confirm that just writing a device number (e.g. major=7,minor=0) to
/sys/power/resume from shell causes system_transition_mutex to be held, and disk->open_mutex
is held with system_transition_mutex held. Therefore, while software_resume() says

        /*
         * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
         * is configured into the kernel. Since the regular hibernate
         * trigger path is via sysfs which takes a buffer mutex before
         * calling hibernate functions (which take system_transition_mutex)
         * this can cause lockdep to complain about a possible ABBA deadlock
         * which cannot happen since we're in the boot code here and
         * sysfs can't be invoked yet. Therefore, we use a subclass
         * here to avoid lockdep complaining.
         */
        mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);

, system_transition_mutex => disk->open_mutex is a real dependency which can happen
outside of the boot code. I feel this _nested annotation might be wrong.

Anyway, many other locks are held while holding disk->open_mutex.

----------------------------------------
ffff888119656528 OPS:      25 FD:  470 BD:    1 +.+.: (wq_completion)loop0
 -> [ffffffffa0353078] (work_completion)(&worker->work)
 -> [ffffffffa0353088] (work_completion)(&lo->rootcg_work)
 
 ffffffffa0353078 OPS:     235 FD:  467 BD:    2 +.+.: (work_completion)(&worker->work)
 -> [ffffffffa0353068] &lo->lo_work_lock
 -> [ffffffff828683e0] mapping.invalidate_lock#2
 -> [ffffffff842cd5b8] &folio_wait_table[i]
 -> [ffffffff82f3a2c0] &rq->__lock
 -> [ffff88813b238410] lock#6
 -> [ffffffff82f36ea8] &p->pi_lock
 -> [ffffffff82868390] sb_writers#7

ffffffff82868390 OPS:    2993 FD:  466 BD:    4 .+.+: sb_writers#7
 -> [ffffffff82610710] mount_lock
 -> [ffff88813b238bc8] lock#2
 -> [ffffffff842ce7a8] &____s->seqcount
 -> [ffffffff842da468] &xa->xa_lock#3
 -> [ffff88813b238410] lock#6
 -> [ffffffff842d6050] &n->list_lock
 -> [ffffffff842da458] &mapping->private_lock
 -> [ffffffff828223c8] tk_core.seq.seqcount
 -> [ffffffff842f3310] &dd->lock
 -> [ffffffff82f36e98] &____s->seqcount#2
 -> [ffffffff82f3a2c0] &rq->__lock
 -> [ffffffff842f33f0] &obj_hash[i].lock
 -> [ffffffff82f3a590] bit_wait_table + i
 -> [ffffffff842c4e70] rcu_node_0
 -> [ffffffff828ab120] pool_lock
 -> [ffffffff828683f0] &type->i_mutex_dir_key#6
 -> [ffffffff82f36e38] &mm->mmap_lock#2
 -> [ffffffff828683d0] &sb->s_type->i_mutex_key#14
 -> [ffffffff82843c10] fs_reclaim
 -> [ffffffff828683f1] &type->i_mutex_dir_key#6/1
 -> [ffffffff842df2f0] &ei->xattr_sem
 -> [ffffffff82f3af58] &sem->wait_lock
 -> [ffffffff82f36ea8] &p->pi_lock
 -> [ffffffff828683c0] &sb->s_type->i_lock_key#23
 -> [ffffffff842da3a0] &dentry->d_lock
 -> [ffffffff842d7af0] &s->s_inode_list_lock
 -> [ffffffff828683b0] sb_internal
 -> [ffffffff82610698] inode_hash_lock
 -> [ffffffff842ce230] &wb->list_lock
 -> [ffffffff842cd8a0] &lruvec->lru_lock
 -> [ffffffff842da4e0] &p->lock

ffffffff842da4e0 OPS:   22875 FD:  455 BD:    5 +.+.: &p->lock
 -> [ffffffff82843c10] fs_reclaim
 -> [ffff88813b238bc8] lock#2
 -> [ffffffff842ce7a8] &____s->seqcount
 -> [ffffffff842d6050] &n->list_lock
 -> [ffffffff82f36e38] &mm->mmap_lock#2
 -> [ffffffff8284d058] file_systems_lock
 -> [ffffffff82f36e78] &p->alloc_lock
 -> [ffffffff82f36f38] &sighand->siglock
 -> [ffffffff842de040] &of->mutex
 -> [ffffffff82f3a2c0] &rq->__lock
 -> [ffffffff8284c4e8] chrdevs_lock
 -> [ffffffff828a87d8] major_names_spinlock
 -> [ffffffff828223c8] tk_core.seq.seqcount
 -> [ffffffff84423f20] &k->k_lock
 -> [ffffffff842f33f0] &obj_hash[i].lock
 -> [ffffffff828fda48] cpufreq_driver_lock
 -> [ffffffff82825be0] cgroup_mutex
 -> [ffffffff82655610] wq_pool_attach_mutex
 -> [ffffffff8284d140] namespace_sem
 -> [ffffffff82844988] swapon_mutex
 -> [ffffffff82f36e98] &____s->seqcount#2
 -> [ffffffff842d7af0] &s->s_inode_list_lock
 -> [ffffffff828448a0] swap_lock
 -> [ffffffff842ce3f8] key#11
 -> [ffffffff842c4e70] rcu_node_0
 -> [ffffffff8263e198] pgd_lock
 -> [ffffffff828fb3f0] pers_lock
 -> [ffffffff828fb440] all_mddevs_lock
 -> [ffffffff82652208] resource_lock
 -> [ffffffff82825b98] cgroup_mutex.wait_lock
 -> [ffffffff82f36ea8] &p->pi_lock
 -> [ffffffff8284d100] namespace_sem.wait_lock

ffffffff842de040 OPS:   22212 FD:  441 BD:    8 +.+.: &of->mutex
 -> [ffffffff82f3a2c0] &rq->__lock
 -> [ffffffff82825be0] cgroup_mutex
 -> [ffffffff8264f378] cpu_hotplug_lock
 -> [ffffffff8282ac58] cpuset_hotplug_work
 -> [ffffffff82825b98] cgroup_mutex.wait_lock
 -> [ffffffff82f36ea8] &p->pi_lock

ffffffff82656231 OPS:       2 FD:  333 BD:   10 +.+.: system_transition_mutex/1
 -> [ffffffff82610698] inode_hash_lock
 -> [ffffffff82610818] bdev_lock
 -> [ffffffff842f2f70] &disk->open_mutex
 -> [ffffffff842d6050] &n->list_lock
 -> [ffffffff828223c8] tk_core.seq.seqcount
 -> [ffffffff842f2568] &x->wait#24
 -> [ffffffff82f3a2c0] &rq->__lock
 -> [ffffffff842f33f0] &obj_hash[i].lock
 -> [ffffffff842c5118] &base->lock
 -> [ffffffff842c50d8] (&timer.timer)

ffffffff842f2f70 OPS:     493 FD:  331 BD:   14 +.+.: &disk->open_mutex
 -> [ffffffffa0212688] sd_ref_mutex
 -> [ffffffff82843c10] fs_reclaim
 -> [ffffffff842d6050] &n->list_lock
 -> [ffffffff828223c8] tk_core.seq.seqcount
 -> [ffffffff842f33f0] &obj_hash[i].lock
 -> [ffffffff842f2710] &hctx->lock
 -> [ffffffff842f26e0] &x->wait#21
 -> [ffff88813b238bc8] lock#2
 -> [ffffffff842ce7a8] &____s->seqcount
 -> [ffffffff842f25d8] &q->sysfs_dir_lock
 -> [ffffffff842f1dd0] &bdev->bd_size_lock
 -> [ffffffff828439d8] free_vmap_area_lock
 -> [ffffffff82843998] vmap_area_lock
 -> [ffffffff842da468] &xa->xa_lock#3
 -> [ffff88813b238410] lock#6
 -> [ffffffff842da458] &mapping->private_lock
 -> [ffffffff842f3310] &dd->lock
 -> [ffffffff842cd5b8] &folio_wait_table[i]
 -> [ffffffff82f3a2c0] &rq->__lock
 -> [ffffffff8265c630] (console_sem).lock
 -> [ffffffff828a59e8] &sb->s_type->i_lock_key#3
 -> [ffffffff842d7af0] &s->s_inode_list_lock
 -> [ffffffff8283d0d0] pcpu_alloc_mutex
 -> [ffffffff84432b38] &x->wait#9
 -> [ffffffff84423f30] &k->list_lock
 -> [ffff88813b2397c0] lock#3
 -> [ffffffff842de000] &root->kernfs_rwsem
 -> [ffffffff828b0480] bus_type_sem
 -> [ffffffff82857b78] sysfs_symlink_target_lock
 -> [ffffffff84432878] &dev->power.lock
 -> [ffffffff828c92f0] dpm_list_mtx
 -> [ffffffff828c8c18] req_lock
 -> [ffffffff82f36ea8] &p->pi_lock
 -> [ffffffff84432a48] &x->wait#10
 -> [ffffffff84423f20] &k->k_lock
 -> [ffffffff842f2f88] subsys mutex#49
 -> [ffffffff842f2f98] &xa->xa_lock#7
 -> [ffffffff82610698] inode_hash_lock
 -> [ffffffff82843b08] purge_vmap_area_lock
 -> [ffffffff842cd8a0] &lruvec->lru_lock
 -> [ffffffff828d0d18] sr_ref_mutex
 -> [ffffffff842f2fe8] &ev->block_mutex
 -> [ffffffff842f2fd8] &ev->lock
 -> [ffffffff842c5118] &base->lock
 -> [ffffffff842c50d8] (&timer.timer)
 -> [ffffffff8263e198] pgd_lock
 -> [ffffffff8284c3f8] sb_lock
 -> [ffffffff84433890] &cd->lock
 -> [ffffffff82610818] bdev_lock
 -> [ffffffff842ce230] &wb->list_lock
 -> [ffffffffa0353048] &lo->lo_mutex
 -> [ffffffff82f3af48] &lock->wait_lock
 -> [ffffffffa03532b0] loop_validate_mutex
 -> [ffffffff842f2608] &q->mq_freeze_lock
 -> [ffffffff828ab040] percpu_ref_switch_lock
 -> [ffffffff842f25f8] &q->mq_freeze_wq
 -> [ffffffff82f37a18] &wq->mutex
 -> [ffffffff826554d8] wq_pool_mutex
 -> [ffffffff82f37d19] &pool->lock/1
 -> [ffffffffa0353068] &lo->lo_work_lock
 -> [ffffffffa0353098] (&lo->timer)
 -> [ffffffffa0353058] &lo->lo_lock
 -> [ffffffffa0353350] kn->active#145
 -> [ffffffffa0353388] kn->active#144
 -> [ffffffffa03533c0] kn->active#146
 -> [ffffffffa03533f8] kn->active#143
 -> [ffffffffa0353430] kn->active#147
 -> [ffffffffa0353468] kn->active#148
 -> [ffffffff82f3af58] &sem->wait_lock
 -> [ffffffff828abae0] uevent_sock_mutex
 -> [ffffffff828aba98] uevent_sock_mutex.wait_lock
----------------------------------------

Therefore, to reduce locking dependency with disk->open_mutex held, while just avoiding
flush_workqueue() ( https://lkml.kernel.org/r/03f43407-c34b-b7b2-68cd-d4ca93a993b8@i-love.sakura.ne.jp )
would be possible, I think doing things as much as possible without disk->open_mutex
( https://lkml.kernel.org/r/9eff2034-2f32-54a3-e476-d0f609ab49c0@i-love.sakura.ne.jp ) is preferable.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] loop: use a global workqueue
  2021-12-23 11:25 ` [PATCH 1/2] loop: use a global workqueue Christoph Hellwig
  2021-12-23 14:37   ` Tetsuo Handa
@ 2021-12-29  2:23   ` Tetsuo Handa
  1 sibling, 0 replies; 13+ messages in thread
From: Tetsuo Handa @ 2021-12-29  2:23 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe; +Cc: Jan Kara, Dan Schatzberg, linux-block

On 2021/12/23 20:25, Christoph Hellwig wrote:
> Using a per-device unbound workqueue is a bit of an anti-pattern and
> in this case also creates lock ordering problems.  Just use a global
> concurrencymanaged workqueue instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/block/loop.c | 36 +++++++++++++++---------------------
>  drivers/block/loop.h |  1 -
>  2 files changed, 15 insertions(+), 22 deletions(-)
> 

If a "struct work_struct" for an I/O request depends on more "struct work_struct"
for that I/O request, WQ can throttle and choke. It seems that use of single global
workquque is prone to I/O hung due to hitting WQ's max active limit.

---------- recursive-loop.c start ----------
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <linux/loop.h>

int main(int argc, char *argv[])
{
	int i;
	static char buffer[4096] = { };
	const int loop_ctl_fd = open("/dev/loop-control", 3);
	int file_fd = open("testfile", O_RDWR | O_CREAT | O_TRUNC, 0600);
	ftruncate(file_fd, 1048576);
	for (i = 0; i < 512; i++) {
		int loop_fd;
		snprintf(buffer, sizeof(buffer) - 1, "/dev/loop%d", ioctl(loop_ctl_fd,  LOOP_CTL_GET_FREE, 0));
		loop_fd = open(buffer, O_RDWR);
		ioctl(loop_fd, LOOP_SET_FD, file_fd);
		close(file_fd);
		file_fd = loop_fd;
	}
	printf("Writing to %s\n", buffer);
	i = write(file_fd, buffer, sizeof(buffer));
	printf("Wrote %d, flushing.\n", i);
	fsync(file_fd);
	printf("Done.\n");
	return 0;
}
---------- recursive-loop.c end ----------

----------------------------------------
# uname -r
5.16.0-rc4-next-20211210
# time ./recursive-loop
Writing to /dev/loop511
Wrote 4096, flushing.
Done.

real    0m55.531s
user    0m0.005s
sys     0m13.628s
# losetup -D
# time ./recursive-loop
Writing to /dev/loop511
Wrote 4096, flushing.
Done.

real    0m4.734s
user    0m0.005s
sys     0m2.094s
----------------------------------------

With "[PATCH 1/2] loop: use a global workqueue" applied.

----------------------------------------
root@fuzz:~# uname -r
5.16.0-rc4-next-20211210+
root@fuzz:~# time ./recursive-loop
Writing to /dev/loop511
Wrote 4096, flushing.
----------------------------------------

Task hung because the task cannot return from fsync().
SysRq-t shows that a BIO from blkdev_fsync() can never complete because
recursive BIOs from other blkdev_fsync() can't start due to "active=256/256".

----------------------------------------
[  250.030246] task:kworker/1:251   state:D stack:28896 pid: 5511 ppid:     2 flags:0x00004000
[  250.030270] Workqueue: loop loop_rootcg_workfn [loop]
[  250.030289] Call Trace:
[  250.030294]  <TASK>
[  250.030313]  __schedule+0x8fc/0xa50
[  250.030356]  schedule+0xc1/0x120
[  250.030372]  schedule_timeout+0x2b/0x190
[  250.030414]  io_schedule_timeout+0x6d/0xa0
[  250.030423]  ? yield_to+0x2a0/0x2a0
[  250.030441]  do_wait_for_common+0x162/0x200
[  250.030460]  ? yield_to+0x2a0/0x2a0
[  250.030491]  wait_for_completion_io+0x46/0x60
[  250.030505]  submit_bio_wait+0xba/0xf0
[  250.030548]  blkdev_issue_flush+0xa1/0xd0
[  250.030573]  ? submit_bio_wait+0xf0/0xf0
[  250.030608]  blkdev_fsync+0x3d/0x50
[  250.030626]  loop_process_work+0x35c/0xf10 [loop]
[  250.030725]  process_one_work+0x40a/0x630
[  250.030782]  worker_thread+0x4d7/0x9b0
[  250.030808]  ? _raw_spin_unlock_irqrestore+0x3f/0xb0
[  250.030826]  ? preempt_count_sub+0xf/0xc0
[  250.030864]  kthread+0x27c/0x2a0
[  250.030875]  ? rcu_lock_release+0x20/0x20
[  250.030883]  ? kthread_blkcg+0x50/0x50
[  250.030903]  ret_from_fork+0x1f/0x30
[  250.030959]  </TASK>

[  250.054300] Showing busy workqueues and worker pools:
[  250.054352] workqueue loop: flags=0xc
[  250.054391]   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=256/256 refcnt=258
[  250.054402]     in-flight: 5511:loop_rootcg_workfn [loop], 5409:loop_rootcg_workfn [loop], 5396:loop_rootcg_workfn [loop], 5365:loop_rootcg_workfn [loop], 5314:loop_rootcg_workfn [loop], 5303:loop_rootcg_workfn [loop], 5280:loop_rootcg_workfn [loop], 5493:loop_rootcg_workfn [loop], 5449:loop_rootcg_workfn [loop], 5410:loop_rootcg_workfn [loop], 5406:loop_rootcg_workfn [loop], 5325:loop_rootcg_workfn [loop], 5467:loop_rootcg_workfn [loop], 5429:loop_rootcg_workfn [loop], 5407:loop_rootcg_workfn [loop], 5322:loop_rootcg_workfn [loop], 5295:loop_rootcg_workfn [loop], 5272:loop_rootcg_workfn [loop], 5372:loop_rootcg_workfn [loop], 5364:loop_rootcg_workfn [loop], 5312:loop_rootcg_workfn [loop], 5264:loop_rootcg_workfn [loop], 5453:loop_rootcg_workfn [loop], 5345:loop_rootcg_workfn [loop], 5490:loop_rootcg_workfn [loop], 5423:loop_rootcg_workfn [loop], 5472:loop_rootcg_workfn [loop], 5463:loop_rootcg_workfn [loop], 5442:loop_rootcg_workfn [loop], 2302:loop_rootcg_workfn [loop]
[  250.054771] , 5501:loop_rootcg_workfn [loop], 5388:loop_rootcg_workfn [loop], 5316:loop_rootcg_workfn [loop], 5302:loop_rootcg_workfn [loop], 5268:loop_rootcg_workfn [loop], 5516:loop_rootcg_workfn [loop], 5434:loop_rootcg_workfn [loop], 5404:loop_rootcg_workfn [loop], 5354:loop_rootcg_workfn [loop], 5399:loop_rootcg_workfn [loop], 5343:loop_rootcg_workfn [loop], 5319:loop_rootcg_workfn [loop], 5471:loop_rootcg_workfn [loop], 5378:loop_rootcg_workfn [loop], 5360:loop_rootcg_workfn [loop], 5420:loop_rootcg_workfn [loop], 5398:loop_rootcg_workfn [loop], 5318:loop_rootcg_workfn [loop], 5311:loop_rootcg_workfn [loop], 5486:loop_rootcg_workfn [loop], 5470:loop_rootcg_workfn [loop], 5309:loop_rootcg_workfn [loop], 5298:loop_rootcg_workfn [loop], 5464:loop_rootcg_workfn [loop], 5342:loop_rootcg_workfn [loop], 5315:loop_rootcg_workfn [loop], 5281:loop_rootcg_workfn [loop], 5459:loop_rootcg_workfn [loop], 5424:loop_rootcg_workfn [loop], 5418:loop_rootcg_workfn [loop]
[  250.055144] , 5353:loop_rootcg_workfn [loop], 5341:loop_rootcg_workfn [loop], 5299:loop_rootcg_workfn [loop], 5284:loop_rootcg_workfn [loop], 5414:loop_rootcg_workfn [loop], 5363:loop_rootcg_workfn [loop], 5304:loop_rootcg_workfn [loop], 5285:loop_rootcg_workfn [loop], 5419:loop_rootcg_workfn [loop], 5397:loop_rootcg_workfn [loop], 5387:loop_rootcg_workfn [loop], 5379:loop_rootcg_workfn [loop], 5338:loop_rootcg_workfn [loop], 5253:loop_rootcg_workfn [loop], 5489:loop_rootcg_workfn [loop], 5445:loop_rootcg_workfn [loop], 5432:loop_rootcg_workfn [loop], 5356:loop_rootcg_workfn [loop], 5408:loop_rootcg_workfn [loop], 5310:loop_rootcg_workfn [loop], 5279:loop_rootcg_workfn [loop], 5358:loop_rootcg_workfn [loop], 2295:loop_rootcg_workfn [loop], 5287:loop_rootcg_workfn [loop], 5265:loop_rootcg_workfn [loop], 5261:loop_rootcg_workfn [loop], 2298:loop_rootcg_workfn [loop], 5508:loop_rootcg_workfn [loop], 5368:loop_rootcg_workfn [loop], 5488:loop_rootcg_workfn [loop]
[  250.055494] , 5469:loop_rootcg_workfn [loop], 5385:loop_rootcg_workfn [loop], 5340:loop_rootcg_workfn [loop], 5290:loop_rootcg_workfn [loop], 5494:loop_rootcg_workfn [loop], 5481:loop_rootcg_workfn [loop], 5361:loop_rootcg_workfn [loop], 5348:loop_rootcg_workfn [loop], 5286:loop_rootcg_workfn [loop], 5273:loop_rootcg_workfn [loop], 5495:loop_rootcg_workfn [loop], 5454:loop_rootcg_workfn [loop], 5392:loop_rootcg_workfn [loop], 5355:loop_rootcg_workfn [loop], 5327:loop_rootcg_workfn [loop], 5457:loop_rootcg_workfn [loop], 5417:loop_rootcg_workfn [loop], 5413:loop_rootcg_workfn [loop], 5306:loop_rootcg_workfn [loop], 5267:loop_rootcg_workfn [loop], 2308:loop_rootcg_workfn [loop], 2306:loop_rootcg_workfn [loop], 5500:loop_rootcg_workfn [loop], 5446:loop_rootcg_workfn [loop], 5331:loop_rootcg_workfn [loop], 5289:loop_rootcg_workfn [loop], 5474:loop_rootcg_workfn [loop], 5438:loop_rootcg_workfn [loop], 5390:loop_rootcg_workfn [loop], 5382:loop_rootcg_workfn [loop]
[  250.055846] , 5381:loop_rootcg_workfn [loop], 5357:loop_rootcg_workfn [loop], 5324:loop_rootcg_workfn [loop], 2304:loop_rootcg_workfn [loop], 5482:loop_rootcg_workfn [loop], 5475:loop_rootcg_workfn [loop], 5351:loop_rootcg_workfn [loop], 5300:loop_rootcg_workfn [loop], 5266:loop_rootcg_workfn [loop], 5430:loop_rootcg_workfn [loop], 5339:loop_rootcg_workfn [loop], 45:loop_rootcg_workfn [loop], 5461:loop_rootcg_workfn [loop], 5460:loop_rootcg_workfn [loop], 5394:loop_rootcg_workfn [loop], 5375:loop_rootcg_workfn [loop], 5259:loop_rootcg_workfn [loop], 2303:loop_rootcg_workfn [loop], 5507:loop_rootcg_workfn [loop], 5499:loop_rootcg_workfn [loop], 5497:loop_rootcg_workfn [loop], 2305:loop_rootcg_workfn [loop], 5405:loop_rootcg_workfn [loop], 5313:loop_rootcg_workfn [loop], 5478:loop_rootcg_workfn [loop], 5465:loop_rootcg_workfn [loop], 5376:loop_rootcg_workfn [loop], 104:loop_rootcg_workfn [loop], 5505:loop_rootcg_workfn [loop], 5462:loop_rootcg_workfn [loop], 5389:loop_rootcg_workfn [loop]
[  250.056207] , 5293:loop_rootcg_workfn [loop], 2307:loop_rootcg_workfn [loop], 5503:loop_rootcg_workfn [loop], 5403:loop_rootcg_workfn [loop], 5393:loop_rootcg_workfn [loop], 5366:loop_rootcg_workfn [loop], 5332:loop_rootcg_workfn [loop], 5278:loop_rootcg_workfn [loop], 5254:loop_rootcg_workfn [loop], 5484:loop_rootcg_workfn [loop], 5452:loop_rootcg_workfn [loop], 5439:loop_rootcg_workfn [loop], 5395:loop_rootcg_workfn [loop], 5380:loop_rootcg_workfn [loop], 5444:loop_rootcg_workfn [loop], 5416:loop_rootcg_workfn [loop], 5402:loop_rootcg_workfn [loop], 5350:loop_rootcg_workfn [loop], 5411:loop_rootcg_workfn [loop], 5369:loop_rootcg_workfn [loop], 5371:loop_rootcg_workfn [loop], 5370:loop_rootcg_workfn [loop], 5307:loop_rootcg_workfn [loop], 5271:loop_rootcg_workfn [loop], 5256:loop_rootcg_workfn [loop], 5421:loop_rootcg_workfn [loop], 5336:loop_rootcg_workfn [loop], 5277:loop_rootcg_workfn [loop], 5443:loop_rootcg_workfn [loop], 5283:loop_rootcg_workfn [loop]
[  250.056557] , 5255:loop_rootcg_workfn [loop], 2301:loop_rootcg_workfn [loop], 5502:loop_rootcg_workfn [loop], 5450:loop_rootcg_workfn [loop], 5391:loop_rootcg_workfn [loop], 5367:loop_rootcg_workfn [loop], 5352:loop_rootcg_workfn [loop], 5276:loop_rootcg_workfn [loop], 5506:loop_rootcg_workfn [loop], 5308:loop_rootcg_workfn [loop], 5305:loop_rootcg_workfn [loop], 5301:loop_rootcg_workfn [loop], 5294:loop_rootcg_workfn [loop], 5427:loop_rootcg_workfn [loop], 5492:loop_rootcg_workfn [loop], 5422:loop_rootcg_workfn [loop], 5401:loop_rootcg_workfn [loop], 5344:loop_rootcg_workfn [loop], 5320:loop_rootcg_workfn [loop], 4429:loop_rootcg_workfn [loop], 5509:loop_rootcg_workfn [loop], 5468:loop_rootcg_workfn [loop], 5458:loop_rootcg_workfn [loop], 5441:loop_rootcg_workfn [loop], 5362:loop_rootcg_workfn [loop], 5330:loop_rootcg_workfn [loop], 2300:loop_rootcg_workfn [loop], 5480:loop_rootcg_workfn [loop], 5282:loop_rootcg_workfn [loop], 5274:loop_rootcg_workfn [loop]
[  250.056930] , 5498:loop_rootcg_workfn [loop], 5323:loop_rootcg_workfn [loop], 5262:loop_rootcg_workfn [loop], 497:loop_rootcg_workfn [loop], 5435:loop_rootcg_workfn [loop], 5426:loop_rootcg_workfn [loop], 5400:loop_rootcg_workfn [loop], 5347:loop_rootcg_workfn [loop], 5275:loop_rootcg_workfn [loop], 5514:loop_rootcg_workfn [loop], 5431:loop_rootcg_workfn [loop], 5328:loop_rootcg_workfn [loop], 2299:loop_rootcg_workfn [loop], 5510:loop_rootcg_workfn [loop], 5428:loop_rootcg_workfn [loop], 5377:loop_rootcg_workfn [loop], 5496:loop_rootcg_workfn [loop], 5451:loop_rootcg_workfn [loop], 5383:loop_rootcg_workfn [loop], 5374:loop_rootcg_workfn [loop], 5288:loop_rootcg_workfn [loop], 5257:loop_rootcg_workfn [loop], 5483:loop_rootcg_workfn [loop], 5436:loop_rootcg_workfn [loop], 5425:loop_rootcg_workfn [loop], 5349:loop_rootcg_workfn [loop], 19:loop_rootcg_workfn [loop], 5433:loop_rootcg_workfn [loop], 5384:loop_rootcg_workfn [loop], 5373:loop_rootcg_workfn [loop], 5359:loop_rootcg_workfn [loop]
[  250.057292] , 5269:loop_rootcg_workfn [loop], 5258:loop_rootcg_workfn [loop], 5513:loop_rootcg_workfn [loop], 5292:loop_rootcg_workfn [loop], 5291:loop_rootcg_workfn [loop], 5477:loop_rootcg_workfn [loop], 5270:loop_rootcg_workfn [loop], 5515:loop_rootcg_workfn [loop], 5448:loop_rootcg_workfn [loop], 5447:loop_rootcg_workfn [loop], 5415:loop_rootcg_workfn [loop], 5386:loop_rootcg_workfn [loop], 5296:loop_rootcg_workfn [loop], 5260:loop_rootcg_workfn [loop]
[  250.057477]     inactive: loop_workfn [loop]
[  250.057496] pool 2: cpus=1 node=0 flags=0x0 nice=0 hung=1s workers=258 idle: 5526 5518


[  360.319488] task:kworker/1:251   state:D stack:28896 pid: 5511 ppid:     2 flags:0x00004000
[  360.319512] Workqueue: loop loop_rootcg_workfn [loop]
[  360.319533] Call Trace:
[  360.319538]  <TASK>
[  360.319558]  __schedule+0x8fc/0xa50
[  360.319603]  schedule+0xc1/0x120
[  360.319619]  schedule_timeout+0x2b/0x190
[  360.319663]  io_schedule_timeout+0x6d/0xa0
[  360.319673]  ? yield_to+0x2a0/0x2a0
[  360.319691]  do_wait_for_common+0x162/0x200
[  360.319710]  ? yield_to+0x2a0/0x2a0
[  360.319743]  wait_for_completion_io+0x46/0x60
[  360.319757]  submit_bio_wait+0xba/0xf0
[  360.319802]  blkdev_issue_flush+0xa1/0xd0
[  360.319828]  ? submit_bio_wait+0xf0/0xf0
[  360.319865]  blkdev_fsync+0x3d/0x50
[  360.319883]  loop_process_work+0x35c/0xf10 [loop]
[  360.319986]  process_one_work+0x40a/0x630
[  360.320045]  worker_thread+0x4d7/0x9b0
[  360.320072]  ? _raw_spin_unlock_irqrestore+0x3f/0xb0
[  360.320091]  ? preempt_count_sub+0xf/0xc0
[  360.320130]  kthread+0x27c/0x2a0
[  360.320141]  ? rcu_lock_release+0x20/0x20
[  360.320149]  ? kthread_blkcg+0x50/0x50
[  360.320171]  ret_from_fork+0x1f/0x30
[  360.320229]  </TASK>

[  360.344504] Showing busy workqueues and worker pools:
[  360.344556] workqueue loop: flags=0xc
[  360.344572]   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=256/256 refcnt=258
[  360.344582]     in-flight: 5511:loop_rootcg_workfn [loop], 5409:loop_rootcg_workfn [loop], 5396:loop_rootcg_workfn [loop], 5365:loop_rootcg_workfn [loop], 5314:loop_rootcg_workfn [loop], 5303:loop_rootcg_workfn [loop], 5280:loop_rootcg_workfn [loop], 5493:loop_rootcg_workfn [loop], 5449:loop_rootcg_workfn [loop], 5410:loop_rootcg_workfn [loop], 5406:loop_rootcg_workfn [loop], 5325:loop_rootcg_workfn [loop], 5467:loop_rootcg_workfn [loop], 5429:loop_rootcg_workfn [loop], 5407:loop_rootcg_workfn [loop], 5322:loop_rootcg_workfn [loop], 5295:loop_rootcg_workfn [loop], 5272:loop_rootcg_workfn [loop], 5372:loop_rootcg_workfn [loop], 5364:loop_rootcg_workfn [loop], 5312:loop_rootcg_workfn [loop], 5264:loop_rootcg_workfn [loop], 5453:loop_rootcg_workfn [loop], 5345:loop_rootcg_workfn [loop], 5490:loop_rootcg_workfn [loop], 5423:loop_rootcg_workfn [loop], 5472:loop_rootcg_workfn [loop], 5463:loop_rootcg_workfn [loop], 5442:loop_rootcg_workfn [loop], 2302:loop_rootcg_workfn [loop]
[  360.344951] , 5501:loop_rootcg_workfn [loop], 5388:loop_rootcg_workfn [loop], 5316:loop_rootcg_workfn [loop], 5302:loop_rootcg_workfn [loop], 5268:loop_rootcg_workfn [loop], 5516:loop_rootcg_workfn [loop], 5434:loop_rootcg_workfn [loop], 5404:loop_rootcg_workfn [loop], 5354:loop_rootcg_workfn [loop], 5399:loop_rootcg_workfn [loop], 5343:loop_rootcg_workfn [loop], 5319:loop_rootcg_workfn [loop], 5471:loop_rootcg_workfn [loop], 5378:loop_rootcg_workfn [loop], 5360:loop_rootcg_workfn [loop], 5420:loop_rootcg_workfn [loop], 5398:loop_rootcg_workfn [loop], 5318:loop_rootcg_workfn [loop], 5311:loop_rootcg_workfn [loop], 5486:loop_rootcg_workfn [loop], 5470:loop_rootcg_workfn [loop], 5309:loop_rootcg_workfn [loop], 5298:loop_rootcg_workfn [loop], 5464:loop_rootcg_workfn [loop], 5342:loop_rootcg_workfn [loop], 5315:loop_rootcg_workfn [loop], 5281:loop_rootcg_workfn [loop], 5459:loop_rootcg_workfn [loop], 5424:loop_rootcg_workfn [loop], 5418:loop_rootcg_workfn [loop]
[  360.345363] , 5353:loop_rootcg_workfn [loop], 5341:loop_rootcg_workfn [loop], 5299:loop_rootcg_workfn [loop], 5284:loop_rootcg_workfn [loop], 5414:loop_rootcg_workfn [loop], 5363:loop_rootcg_workfn [loop], 5304:loop_rootcg_workfn [loop], 5285:loop_rootcg_workfn [loop], 5419:loop_rootcg_workfn [loop], 5397:loop_rootcg_workfn [loop], 5387:loop_rootcg_workfn [loop], 5379:loop_rootcg_workfn [loop], 5338:loop_rootcg_workfn [loop], 5253:loop_rootcg_workfn [loop], 5489:loop_rootcg_workfn [loop], 5445:loop_rootcg_workfn [loop], 5432:loop_rootcg_workfn [loop], 5356:loop_rootcg_workfn [loop], 5408:loop_rootcg_workfn [loop], 5310:loop_rootcg_workfn [loop], 5279:loop_rootcg_workfn [loop], 5358:loop_rootcg_workfn [loop], 2295:loop_rootcg_workfn [loop], 5287:loop_rootcg_workfn [loop], 5265:loop_rootcg_workfn [loop], 5261:loop_rootcg_workfn [loop], 2298:loop_rootcg_workfn [loop], 5508:loop_rootcg_workfn [loop], 5368:loop_rootcg_workfn [loop], 5488:loop_rootcg_workfn [loop]
[  360.345758] , 5469:loop_rootcg_workfn [loop], 5385:loop_rootcg_workfn [loop], 5340:loop_rootcg_workfn [loop], 5290:loop_rootcg_workfn [loop], 5494:loop_rootcg_workfn [loop], 5481:loop_rootcg_workfn [loop], 5361:loop_rootcg_workfn [loop], 5348:loop_rootcg_workfn [loop], 5286:loop_rootcg_workfn [loop], 5273:loop_rootcg_workfn [loop], 5495:loop_rootcg_workfn [loop], 5454:loop_rootcg_workfn [loop], 5392:loop_rootcg_workfn [loop], 5355:loop_rootcg_workfn [loop], 5327:loop_rootcg_workfn [loop], 5457:loop_rootcg_workfn [loop], 5417:loop_rootcg_workfn [loop], 5413:loop_rootcg_workfn [loop], 5306:loop_rootcg_workfn [loop], 5267:loop_rootcg_workfn [loop], 2308:loop_rootcg_workfn [loop], 2306:loop_rootcg_workfn [loop], 5500:loop_rootcg_workfn [loop], 5446:loop_rootcg_workfn [loop], 5331:loop_rootcg_workfn [loop], 5289:loop_rootcg_workfn [loop], 5474:loop_rootcg_workfn [loop], 5438:loop_rootcg_workfn [loop], 5390:loop_rootcg_workfn [loop], 5382:loop_rootcg_workfn [loop]
[  360.346122] , 5381:loop_rootcg_workfn [loop], 5357:loop_rootcg_workfn [loop], 5324:loop_rootcg_workfn [loop], 2304:loop_rootcg_workfn [loop], 5482:loop_rootcg_workfn [loop], 5475:loop_rootcg_workfn [loop], 5351:loop_rootcg_workfn [loop], 5300:loop_rootcg_workfn [loop], 5266:loop_rootcg_workfn [loop], 5430:loop_rootcg_workfn [loop], 5339:loop_rootcg_workfn [loop], 45:loop_rootcg_workfn [loop], 5461:loop_rootcg_workfn [loop], 5460:loop_rootcg_workfn [loop], 5394:loop_rootcg_workfn [loop], 5375:loop_rootcg_workfn [loop], 5259:loop_rootcg_workfn [loop], 2303:loop_rootcg_workfn [loop], 5507:loop_rootcg_workfn [loop], 5499:loop_rootcg_workfn [loop], 5497:loop_rootcg_workfn [loop], 2305:loop_rootcg_workfn [loop], 5405:loop_rootcg_workfn [loop], 5313:loop_rootcg_workfn [loop], 5478:loop_rootcg_workfn [loop], 5465:loop_rootcg_workfn [loop], 5376:loop_rootcg_workfn [loop], 104:loop_rootcg_workfn [loop], 5505:loop_rootcg_workfn [loop], 5462:loop_rootcg_workfn [loop], 5389:loop_rootcg_workfn [loop]
[  360.346496] , 5293:loop_rootcg_workfn [loop], 2307:loop_rootcg_workfn [loop], 5503:loop_rootcg_workfn [loop], 5403:loop_rootcg_workfn [loop], 5393:loop_rootcg_workfn [loop], 5366:loop_rootcg_workfn [loop], 5332:loop_rootcg_workfn [loop], 5278:loop_rootcg_workfn [loop], 5254:loop_rootcg_workfn [loop], 5484:loop_rootcg_workfn [loop], 5452:loop_rootcg_workfn [loop], 5439:loop_rootcg_workfn [loop], 5395:loop_rootcg_workfn [loop], 5380:loop_rootcg_workfn [loop], 5444:loop_rootcg_workfn [loop], 5416:loop_rootcg_workfn [loop], 5402:loop_rootcg_workfn [loop], 5350:loop_rootcg_workfn [loop], 5411:loop_rootcg_workfn [loop], 5369:loop_rootcg_workfn [loop], 5371:loop_rootcg_workfn [loop], 5370:loop_rootcg_workfn [loop], 5307:loop_rootcg_workfn [loop], 5271:loop_rootcg_workfn [loop], 5256:loop_rootcg_workfn [loop], 5421:loop_rootcg_workfn [loop], 5336:loop_rootcg_workfn [loop], 5277:loop_rootcg_workfn [loop], 5443:loop_rootcg_workfn [loop], 5283:loop_rootcg_workfn [loop]
[  360.346857] , 5255:loop_rootcg_workfn [loop], 2301:loop_rootcg_workfn [loop], 5502:loop_rootcg_workfn [loop], 5450:loop_rootcg_workfn [loop], 5391:loop_rootcg_workfn [loop], 5367:loop_rootcg_workfn [loop], 5352:loop_rootcg_workfn [loop], 5276:loop_rootcg_workfn [loop], 5506:loop_rootcg_workfn [loop], 5308:loop_rootcg_workfn [loop], 5305:loop_rootcg_workfn [loop], 5301:loop_rootcg_workfn [loop], 5294:loop_rootcg_workfn [loop], 5427:loop_rootcg_workfn [loop], 5492:loop_rootcg_workfn [loop], 5422:loop_rootcg_workfn [loop], 5401:loop_rootcg_workfn [loop], 5344:loop_rootcg_workfn [loop], 5320:loop_rootcg_workfn [loop], 4429:loop_rootcg_workfn [loop], 5509:loop_rootcg_workfn [loop], 5468:loop_rootcg_workfn [loop], 5458:loop_rootcg_workfn [loop], 5441:loop_rootcg_workfn [loop], 5362:loop_rootcg_workfn [loop], 5330:loop_rootcg_workfn [loop], 2300:loop_rootcg_workfn [loop], 5480:loop_rootcg_workfn [loop], 5282:loop_rootcg_workfn [loop], 5274:loop_rootcg_workfn [loop]
[  360.347219] , 5498:loop_rootcg_workfn [loop], 5323:loop_rootcg_workfn [loop], 5262:loop_rootcg_workfn [loop], 497:loop_rootcg_workfn [loop], 5435:loop_rootcg_workfn [loop], 5426:loop_rootcg_workfn [loop], 5400:loop_rootcg_workfn [loop], 5347:loop_rootcg_workfn [loop], 5275:loop_rootcg_workfn [loop], 5514:loop_rootcg_workfn [loop], 5431:loop_rootcg_workfn [loop], 5328:loop_rootcg_workfn [loop], 2299:loop_rootcg_workfn [loop], 5510:loop_rootcg_workfn [loop], 5428:loop_rootcg_workfn [loop], 5377:loop_rootcg_workfn [loop], 5496:loop_rootcg_workfn [loop], 5451:loop_rootcg_workfn [loop], 5383:loop_rootcg_workfn [loop], 5374:loop_rootcg_workfn [loop], 5288:loop_rootcg_workfn [loop], 5257:loop_rootcg_workfn [loop], 5483:loop_rootcg_workfn [loop], 5436:loop_rootcg_workfn [loop], 5425:loop_rootcg_workfn [loop], 5349:loop_rootcg_workfn [loop], 19:loop_rootcg_workfn [loop], 5433:loop_rootcg_workfn [loop], 5384:loop_rootcg_workfn [loop], 5373:loop_rootcg_workfn [loop], 5359:loop_rootcg_workfn [loop]
[  360.347592] , 5269:loop_rootcg_workfn [loop], 5258:loop_rootcg_workfn [loop], 5513:loop_rootcg_workfn [loop], 5292:loop_rootcg_workfn [loop], 5291:loop_rootcg_workfn [loop], 5477:loop_rootcg_workfn [loop], 5270:loop_rootcg_workfn [loop], 5515:loop_rootcg_workfn [loop], 5448:loop_rootcg_workfn [loop], 5447:loop_rootcg_workfn [loop], 5415:loop_rootcg_workfn [loop], 5386:loop_rootcg_workfn [loop], 5296:loop_rootcg_workfn [loop], 5260:loop_rootcg_workfn [loop]
[  360.347762]     inactive: loop_workfn [loop]
[  360.347854] pool 2: cpus=1 node=0 flags=0x0 nice=0 hung=0s workers=259 idle: 5695 5518 5526

[  628.625721] task:kworker/1:251   state:D stack:28896 pid: 5511 ppid:     2 flags:0x00004000
[  628.625746] Workqueue: loop loop_rootcg_workfn [loop]
[  628.625766] Call Trace:
[  628.625771]  <TASK>
[  628.625791]  __schedule+0x8fc/0xa50
[  628.625837]  schedule+0xc1/0x120
[  628.625853]  schedule_timeout+0x2b/0x190
[  628.625897]  io_schedule_timeout+0x6d/0xa0
[  628.625907]  ? yield_to+0x2a0/0x2a0
[  628.625925]  do_wait_for_common+0x162/0x200
[  628.625945]  ? yield_to+0x2a0/0x2a0
[  628.625977]  wait_for_completion_io+0x46/0x60
[  628.625992]  submit_bio_wait+0xba/0xf0
[  628.626037]  blkdev_issue_flush+0xa1/0xd0
[  628.626062]  ? submit_bio_wait+0xf0/0xf0
[  628.626099]  blkdev_fsync+0x3d/0x50
[  628.626117]  loop_process_work+0x35c/0xf10 [loop]
[  628.626220]  process_one_work+0x40a/0x630
[  628.626279]  worker_thread+0x4d7/0x9b0
[  628.626307]  ? _raw_spin_unlock_irqrestore+0x3f/0xb0
[  628.626325]  ? preempt_count_sub+0xf/0xc0
[  628.626364]  kthread+0x27c/0x2a0
[  628.626376]  ? rcu_lock_release+0x20/0x20
[  628.626384]  ? kthread_blkcg+0x50/0x50
[  628.626598]  ret_from_fork+0x1f/0x30
[  628.626660]  </TASK>

[  628.650621] Showing busy workqueues and worker pools:
[  628.650771] workqueue loop: flags=0xc
[  628.650790]   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=256/256 refcnt=258
[  628.650801]     in-flight: 5511:loop_rootcg_workfn [loop], 5409:loop_rootcg_workfn [loop], 5396:loop_rootcg_workfn [loop], 5365:loop_rootcg_workfn [loop], 5314:loop_rootcg_workfn [loop], 5303:loop_rootcg_workfn [loop], 5280:loop_rootcg_workfn [loop], 5493:loop_rootcg_workfn [loop], 5449:loop_rootcg_workfn [loop], 5410:loop_rootcg_workfn [loop], 5406:loop_rootcg_workfn [loop], 5325:loop_rootcg_workfn [loop], 5467:loop_rootcg_workfn [loop], 5429:loop_rootcg_workfn [loop], 5407:loop_rootcg_workfn [loop], 5322:loop_rootcg_workfn [loop], 5295:loop_rootcg_workfn [loop], 5272:loop_rootcg_workfn [loop], 5372:loop_rootcg_workfn [loop], 5364:loop_rootcg_workfn [loop], 5312:loop_rootcg_workfn [loop], 5264:loop_rootcg_workfn [loop], 5453:loop_rootcg_workfn [loop], 5345:loop_rootcg_workfn [loop], 5490:loop_rootcg_workfn [loop], 5423:loop_rootcg_workfn [loop], 5472:loop_rootcg_workfn [loop], 5463:loop_rootcg_workfn [loop], 5442:loop_rootcg_workfn [loop], 2302:loop_rootcg_workfn [loop]
[  628.651170] , 5501:loop_rootcg_workfn [loop], 5388:loop_rootcg_workfn [loop], 5316:loop_rootcg_workfn [loop], 5302:loop_rootcg_workfn [loop], 5268:loop_rootcg_workfn [loop], 5516:loop_rootcg_workfn [loop], 5434:loop_rootcg_workfn [loop], 5404:loop_rootcg_workfn [loop], 5354:loop_rootcg_workfn [loop], 5399:loop_rootcg_workfn [loop], 5343:loop_rootcg_workfn [loop], 5319:loop_rootcg_workfn [loop], 5471:loop_rootcg_workfn [loop], 5378:loop_rootcg_workfn [loop], 5360:loop_rootcg_workfn [loop], 5420:loop_rootcg_workfn [loop], 5398:loop_rootcg_workfn [loop], 5318:loop_rootcg_workfn [loop], 5311:loop_rootcg_workfn [loop], 5486:loop_rootcg_workfn [loop], 5470:loop_rootcg_workfn [loop], 5309:loop_rootcg_workfn [loop], 5298:loop_rootcg_workfn [loop], 5464:loop_rootcg_workfn [loop], 5342:loop_rootcg_workfn [loop], 5315:loop_rootcg_workfn [loop], 5281:loop_rootcg_workfn [loop], 5459:loop_rootcg_workfn [loop], 5424:loop_rootcg_workfn [loop], 5418:loop_rootcg_workfn [loop]
[  628.651533] , 5353:loop_rootcg_workfn [loop], 5341:loop_rootcg_workfn [loop], 5299:loop_rootcg_workfn [loop], 5284:loop_rootcg_workfn [loop], 5414:loop_rootcg_workfn [loop], 5363:loop_rootcg_workfn [loop], 5304:loop_rootcg_workfn [loop], 5285:loop_rootcg_workfn [loop], 5419:loop_rootcg_workfn [loop], 5397:loop_rootcg_workfn [loop], 5387:loop_rootcg_workfn [loop], 5379:loop_rootcg_workfn [loop], 5338:loop_rootcg_workfn [loop], 5253:loop_rootcg_workfn [loop], 5489:loop_rootcg_workfn [loop], 5445:loop_rootcg_workfn [loop], 5432:loop_rootcg_workfn [loop], 5356:loop_rootcg_workfn [loop], 5408:loop_rootcg_workfn [loop], 5310:loop_rootcg_workfn [loop], 5279:loop_rootcg_workfn [loop], 5358:loop_rootcg_workfn [loop], 2295:loop_rootcg_workfn [loop], 5287:loop_rootcg_workfn [loop], 5265:loop_rootcg_workfn [loop], 5261:loop_rootcg_workfn [loop], 2298:loop_rootcg_workfn [loop], 5508:loop_rootcg_workfn [loop], 5368:loop_rootcg_workfn [loop], 5488:loop_rootcg_workfn [loop]
[  628.651895] , 5469:loop_rootcg_workfn [loop], 5385:loop_rootcg_workfn [loop], 5340:loop_rootcg_workfn [loop], 5290:loop_rootcg_workfn [loop], 5494:loop_rootcg_workfn [loop], 5481:loop_rootcg_workfn [loop], 5361:loop_rootcg_workfn [loop], 5348:loop_rootcg_workfn [loop], 5286:loop_rootcg_workfn [loop], 5273:loop_rootcg_workfn [loop], 5495:loop_rootcg_workfn [loop], 5454:loop_rootcg_workfn [loop], 5392:loop_rootcg_workfn [loop], 5355:loop_rootcg_workfn [loop], 5327:loop_rootcg_workfn [loop], 5457:loop_rootcg_workfn [loop], 5417:loop_rootcg_workfn [loop], 5413:loop_rootcg_workfn [loop], 5306:loop_rootcg_workfn [loop], 5267:loop_rootcg_workfn [loop], 2308:loop_rootcg_workfn [loop], 2306:loop_rootcg_workfn [loop], 5500:loop_rootcg_workfn [loop], 5446:loop_rootcg_workfn [loop], 5331:loop_rootcg_workfn [loop], 5289:loop_rootcg_workfn [loop], 5474:loop_rootcg_workfn [loop], 5438:loop_rootcg_workfn [loop], 5390:loop_rootcg_workfn [loop], 5382:loop_rootcg_workfn [loop]
[  628.652258] , 5381:loop_rootcg_workfn [loop], 5357:loop_rootcg_workfn [loop], 5324:loop_rootcg_workfn [loop], 2304:loop_rootcg_workfn [loop], 5482:loop_rootcg_workfn [loop], 5475:loop_rootcg_workfn [loop], 5351:loop_rootcg_workfn [loop], 5300:loop_rootcg_workfn [loop], 5266:loop_rootcg_workfn [loop], 5430:loop_rootcg_workfn [loop], 5339:loop_rootcg_workfn [loop], 45:loop_rootcg_workfn [loop], 5461:loop_rootcg_workfn [loop], 5460:loop_rootcg_workfn [loop], 5394:loop_rootcg_workfn [loop], 5375:loop_rootcg_workfn [loop], 5259:loop_rootcg_workfn [loop], 2303:loop_rootcg_workfn [loop], 5507:loop_rootcg_workfn [loop], 5499:loop_rootcg_workfn [loop], 5497:loop_rootcg_workfn [loop], 2305:loop_rootcg_workfn [loop], 5405:loop_rootcg_workfn [loop], 5313:loop_rootcg_workfn [loop], 5478:loop_rootcg_workfn [loop], 5465:loop_rootcg_workfn [loop], 5376:loop_rootcg_workfn [loop], 104:loop_rootcg_workfn [loop], 5505:loop_rootcg_workfn [loop], 5462:loop_rootcg_workfn [loop], 5389:loop_rootcg_workfn [loop]
[  628.652633] , 5293:loop_rootcg_workfn [loop], 2307:loop_rootcg_workfn [loop], 5503:loop_rootcg_workfn [loop], 5403:loop_rootcg_workfn [loop], 5393:loop_rootcg_workfn [loop], 5366:loop_rootcg_workfn [loop], 5332:loop_rootcg_workfn [loop], 5278:loop_rootcg_workfn [loop], 5254:loop_rootcg_workfn [loop], 5484:loop_rootcg_workfn [loop], 5452:loop_rootcg_workfn [loop], 5439:loop_rootcg_workfn [loop], 5395:loop_rootcg_workfn [loop], 5380:loop_rootcg_workfn [loop], 5444:loop_rootcg_workfn [loop], 5416:loop_rootcg_workfn [loop], 5402:loop_rootcg_workfn [loop], 5350:loop_rootcg_workfn [loop], 5411:loop_rootcg_workfn [loop], 5369:loop_rootcg_workfn [loop], 5371:loop_rootcg_workfn [loop], 5370:loop_rootcg_workfn [loop], 5307:loop_rootcg_workfn [loop], 5271:loop_rootcg_workfn [loop], 5256:loop_rootcg_workfn [loop], 5421:loop_rootcg_workfn [loop], 5336:loop_rootcg_workfn [loop], 5277:loop_rootcg_workfn [loop], 5443:loop_rootcg_workfn [loop], 5283:loop_rootcg_workfn [loop]
[  628.652994] , 5255:loop_rootcg_workfn [loop], 2301:loop_rootcg_workfn [loop], 5502:loop_rootcg_workfn [loop], 5450:loop_rootcg_workfn [loop], 5391:loop_rootcg_workfn [loop], 5367:loop_rootcg_workfn [loop], 5352:loop_rootcg_workfn [loop], 5276:loop_rootcg_workfn [loop], 5506:loop_rootcg_workfn [loop], 5308:loop_rootcg_workfn [loop], 5305:loop_rootcg_workfn [loop], 5301:loop_rootcg_workfn [loop], 5294:loop_rootcg_workfn [loop], 5427:loop_rootcg_workfn [loop], 5492:loop_rootcg_workfn [loop], 5422:loop_rootcg_workfn [loop], 5401:loop_rootcg_workfn [loop], 5344:loop_rootcg_workfn [loop], 5320:loop_rootcg_workfn [loop], 4429:loop_rootcg_workfn [loop], 5509:loop_rootcg_workfn [loop], 5468:loop_rootcg_workfn [loop], 5458:loop_rootcg_workfn [loop], 5441:loop_rootcg_workfn [loop], 5362:loop_rootcg_workfn [loop], 5330:loop_rootcg_workfn [loop], 2300:loop_rootcg_workfn [loop], 5480:loop_rootcg_workfn [loop], 5282:loop_rootcg_workfn [loop], 5274:loop_rootcg_workfn [loop]
[  628.653356] , 5498:loop_rootcg_workfn [loop], 5323:loop_rootcg_workfn [loop], 5262:loop_rootcg_workfn [loop], 497:loop_rootcg_workfn [loop], 5435:loop_rootcg_workfn [loop], 5426:loop_rootcg_workfn [loop], 5400:loop_rootcg_workfn [loop], 5347:loop_rootcg_workfn [loop], 5275:loop_rootcg_workfn [loop], 5514:loop_rootcg_workfn [loop], 5431:loop_rootcg_workfn [loop], 5328:loop_rootcg_workfn [loop], 2299:loop_rootcg_workfn [loop], 5510:loop_rootcg_workfn [loop], 5428:loop_rootcg_workfn [loop], 5377:loop_rootcg_workfn [loop], 5496:loop_rootcg_workfn [loop], 5451:loop_rootcg_workfn [loop], 5383:loop_rootcg_workfn [loop], 5374:loop_rootcg_workfn [loop], 5288:loop_rootcg_workfn [loop], 5257:loop_rootcg_workfn [loop], 5483:loop_rootcg_workfn [loop], 5436:loop_rootcg_workfn [loop], 5425:loop_rootcg_workfn [loop], 5349:loop_rootcg_workfn [loop], 19:loop_rootcg_workfn [loop], 5433:loop_rootcg_workfn [loop], 5384:loop_rootcg_workfn [loop], 5373:loop_rootcg_workfn [loop], 5359:loop_rootcg_workfn [loop]
[  628.653729] , 5269:loop_rootcg_workfn [loop], 5258:loop_rootcg_workfn [loop], 5513:loop_rootcg_workfn [loop], 5292:loop_rootcg_workfn [loop], 5291:loop_rootcg_workfn [loop], 5477:loop_rootcg_workfn [loop], 5270:loop_rootcg_workfn [loop], 5515:loop_rootcg_workfn [loop], 5448:loop_rootcg_workfn [loop], 5447:loop_rootcg_workfn [loop], 5415:loop_rootcg_workfn [loop], 5386:loop_rootcg_workfn [loop], 5296:loop_rootcg_workfn [loop], 5260:loop_rootcg_workfn [loop]
[  628.653898]     inactive: loop_workfn [loop]
[  628.653919] pool 2: cpus=1 node=0 flags=0x0 nice=0 hung=27s workers=259 idle: 5695 5518 5526
----------------------------------------

Of course, we could limit recursive LOOP_SET_FD usage. But we should be aware that
"active=256/256" situation would be possible via multiple concurrent fsync() requests.
We should try to make sure that execution context for "struct work_struct" is always
available (even under memory pressure where a new workqueue thread cannot be created).

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: fix loop autoclear for xfstets xfs/049
  2021-12-26  7:09     ` Tetsuo Handa
@ 2021-12-29 17:20       ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-29 17:20 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jan Kara, Jens Axboe, Dan Schatzberg, linux-block

On Sun, Dec 26, 2021 at 04:09:18PM +0900, Tetsuo Handa wrote:
> Here is a simplified reproducer and a log. It was difficult to find a reproducer
> because /proc/lockdep zaps dependency chain when a dynamically created object is destroyed.

Can you send this for inclusion in blktests?  I think this is an
important enough thing to always have at hand.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] loop: use a global workqueue
  2021-12-24 12:05       ` Tetsuo Handa
  2021-12-24 14:05         ` Tetsuo Handa
@ 2021-12-29 17:21         ` Christoph Hellwig
  1 sibling, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2021-12-29 17:21 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Jan Kara, Dan Schatzberg, linux-block

On Fri, Dec 24, 2021 at 09:05:53PM +0900, Tetsuo Handa wrote:
> By the way, is it safe to use single global WQ if (4) is a synchronous I/O request?
> Since there can be up to 1048576 loop devices, and one loop device can use another
> loop device as lo->lo_backing_file (unless loop_validate_file() finds a circular
> usage), one synchronous I/O request in (4) might recursively involve up to 1048576
> works (which would be too many concurrency to be handled by a WQ) ?

Indeed, this will cause problems with stacked loop devices.

> Also, is
> 
> 	blk_mq_start_request(rq);
> 
> 	if (lo->lo_state != Lo_bound)
> 		return BLK_STS_IOERR;
> 
> in loop_queue_rq() correct? (Not only lo->lo_state test is racy, but wants
> blk_mq_end_request() like lo_complete_rq() does?

Besides the racyness this should be fine, the caller can cope with
errors both before and after blk_mq_start_request is called.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-12-29 17:21 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-23 11:25 fix loop autoclear for xfstets xfs/049 Christoph Hellwig
2021-12-23 11:25 ` [PATCH 1/2] loop: use a global workqueue Christoph Hellwig
2021-12-23 14:37   ` Tetsuo Handa
2021-12-24  6:03     ` Christoph Hellwig
2021-12-24 12:05       ` Tetsuo Handa
2021-12-24 14:05         ` Tetsuo Handa
2021-12-29 17:21         ` Christoph Hellwig
2021-12-29  2:23   ` Tetsuo Handa
2021-12-23 11:25 ` [PATCH 2/2] loop: make autoclear operation synchronous again Christoph Hellwig
2021-12-23 13:40 ` fix loop autoclear for xfstets xfs/049 Jan Kara
2021-12-24  6:02   ` Christoph Hellwig
2021-12-26  7:09     ` Tetsuo Handa
2021-12-29 17:20       ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.