* [PATCH 1/8] loop: de-duplicate the idle worker freeing code
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:00 ` [PATCH 2/8] loop: initialize the worker tracking fields once Christoph Hellwig
` (6 subsequent siblings)
7 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
Use a common helper for both timer based and uncoditional freeing of idle
workers.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 73 +++++++++++++++++++++-----------------------
1 file changed, 35 insertions(+), 38 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 01cbbfc4e9e24..b268bca6e4fb7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -804,7 +804,6 @@ struct loop_worker {
static void loop_workfn(struct work_struct *work);
static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
#ifdef CONFIG_BLK_CGROUP
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -888,6 +887,39 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
spin_unlock_irq(&lo->lo_work_lock);
}
+static void loop_set_timer(struct loop_device *lo)
+{
+ timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
+{
+ struct loop_worker *pos, *worker;
+
+ spin_lock_irq(&lo->lo_work_lock);
+ list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+ idle_list) {
+ if (!delete_all &&
+ time_is_after_jiffies(worker->last_ran_at +
+ LOOP_IDLE_WORKER_TIMEOUT))
+ break;
+ list_del(&worker->idle_list);
+ rb_erase(&worker->rb_node, &lo->worker_tree);
+ css_put(worker->blkcg_css);
+ kfree(worker);
+ }
+ if (!list_empty(&lo->idle_worker_list))
+ loop_set_timer(lo);
+ spin_unlock_irq(&lo->lo_work_lock);
+}
+
+static void loop_free_idle_workers_timer(struct timer_list *timer)
+{
+ struct loop_device *lo = container_of(timer, struct loop_device, timer);
+
+ return loop_free_idle_workers(lo, false);
+}
+
static void loop_update_rotational(struct loop_device *lo)
{
struct file *file = lo->lo_backing_file;
@@ -1022,7 +1054,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
INIT_LIST_HEAD(&lo->idle_worker_list);
lo->worker_tree = RB_ROOT;
- timer_setup(&lo->timer, loop_free_idle_workers,
+ timer_setup(&lo->timer, loop_free_idle_workers_timer,
TIMER_DEFERRABLE);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
@@ -1086,7 +1118,6 @@ static void __loop_clr_fd(struct loop_device *lo)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- struct loop_worker *pos, *worker;
/*
* Flush loop_configure() and loop_change_fd(). It is acceptable for
@@ -1116,15 +1147,7 @@ static void __loop_clr_fd(struct loop_device *lo)
blk_mq_freeze_queue(lo->lo_queue);
destroy_workqueue(lo->workqueue);
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- spin_unlock_irq(&lo->lo_work_lock);
+ loop_free_idle_workers(lo, true);
del_timer_sync(&lo->timer);
spin_lock_irq(&lo->lo_lock);
@@ -1871,11 +1894,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
}
-static void loop_set_timer(struct loop_device *lo)
-{
- timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
static void loop_process_work(struct loop_worker *worker,
struct list_head *cmd_list, struct loop_device *lo)
{
@@ -1924,27 +1942,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
}
-static void loop_free_idle_workers(struct timer_list *timer)
-{
- struct loop_device *lo = container_of(timer, struct loop_device, timer);
- struct loop_worker *pos, *worker;
-
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- if (time_is_after_jiffies(worker->last_ran_at +
- LOOP_IDLE_WORKER_TIMEOUT))
- break;
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- if (!list_empty(&lo->idle_worker_list))
- loop_set_timer(lo);
- spin_unlock_irq(&lo->lo_work_lock);
-}
-
static const struct blk_mq_ops loop_mq_ops = {
.queue_rq = loop_queue_rq,
.complete = lo_complete_rq,
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 2/8] loop: initialize the worker tracking fields once
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
2022-01-28 13:00 ` [PATCH 1/8] loop: de-duplicate the idle worker freeing code Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:00 ` [PATCH 3/8] loop: remove the racy bd_inode->i_mapping->nrpages asserts Christoph Hellwig
` (5 subsequent siblings)
7 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
There is no need to reinitialize idle_worker_list, worker_tree and timer
every time a loop device is configured. Just initialize them once at
allocation time.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b268bca6e4fb7..6ec55a5d9dfc4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1052,10 +1052,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
- INIT_LIST_HEAD(&lo->idle_worker_list);
- lo->worker_tree = RB_ROOT;
- timer_setup(&lo->timer, loop_free_idle_workers_timer,
- TIMER_DEFERRABLE);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
@@ -1957,6 +1953,9 @@ static int loop_add(int i)
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
if (!lo)
goto out;
+ lo->worker_tree = RB_ROOT;
+ INIT_LIST_HEAD(&lo->idle_worker_list);
+ timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
lo->lo_state = Lo_unbound;
err = mutex_lock_killable(&loop_ctl_mutex);
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 3/8] loop: remove the racy bd_inode->i_mapping->nrpages asserts
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
2022-01-28 13:00 ` [PATCH 1/8] loop: de-duplicate the idle worker freeing code Christoph Hellwig
2022-01-28 13:00 ` [PATCH 2/8] loop: initialize the worker tracking fields once Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:36 ` Jan Kara
2022-01-28 13:00 ` [PATCH 4/8] loop: only take lo_mutex for the last reference in lo_release Christoph Hellwig
` (4 subsequent siblings)
7 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
Nothing prevents a file system or userspace opener of the block device
from redirtying the page right afte sync_blockdev returned. Fortunately
data in the page cache during a block device change is mostly harmless
anyway.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 20 --------------------
1 file changed, 20 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6ec55a5d9dfc4..d3a7f281ce1b6 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1278,15 +1278,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
- if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
- /* If any pages were dirtied after invalidate_bdev(), try again */
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
prev_lo_flags = lo->lo_flags;
err = loop_set_status_from_info(lo, info);
@@ -1497,21 +1488,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
-
- /* invalidate_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
return err;
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 3/8] loop: remove the racy bd_inode->i_mapping->nrpages asserts
2022-01-28 13:00 ` [PATCH 3/8] loop: remove the racy bd_inode->i_mapping->nrpages asserts Christoph Hellwig
@ 2022-01-28 13:36 ` Jan Kara
0 siblings, 0 replies; 20+ messages in thread
From: Jan Kara @ 2022-01-28 13:36 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Tetsuo Handa, Jan Kara, linux-block, Ming Lei,
Darrick J . Wong
On Fri 28-01-22 14:00:17, Christoph Hellwig wrote:
> Nothing prevents a file system or userspace opener of the block device
> from redirtying the page right afte sync_blockdev returned. Fortunately
> data in the page cache during a block device change is mostly harmless
> anyway.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Darrick J. Wong <djwong@kernel.org>
OK. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> drivers/block/loop.c | 20 --------------------
> 1 file changed, 20 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 6ec55a5d9dfc4..d3a7f281ce1b6 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1278,15 +1278,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
> /* I/O need to be drained during transfer transition */
> blk_mq_freeze_queue(lo->lo_queue);
>
> - if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
> - /* If any pages were dirtied after invalidate_bdev(), try again */
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> -
> prev_lo_flags = lo->lo_flags;
>
> err = loop_set_status_from_info(lo, info);
> @@ -1497,21 +1488,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
> invalidate_bdev(lo->lo_device);
>
> blk_mq_freeze_queue(lo->lo_queue);
> -
> - /* invalidate_bdev should have truncated all the pages */
> - if (lo->lo_device->bd_inode->i_mapping->nrpages) {
> - err = -EAGAIN;
> - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
> - __func__, lo->lo_number, lo->lo_file_name,
> - lo->lo_device->bd_inode->i_mapping->nrpages);
> - goto out_unfreeze;
> - }
> -
> blk_queue_logical_block_size(lo->lo_queue, arg);
> blk_queue_physical_block_size(lo->lo_queue, arg);
> blk_queue_io_min(lo->lo_queue, arg);
> loop_update_dio(lo);
> -out_unfreeze:
> blk_mq_unfreeze_queue(lo->lo_queue);
>
> return err;
> --
> 2.30.2
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 4/8] loop: only take lo_mutex for the last reference in lo_release
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
` (2 preceding siblings ...)
2022-01-28 13:00 ` [PATCH 3/8] loop: remove the racy bd_inode->i_mapping->nrpages asserts Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:37 ` Jan Kara
2022-01-28 13:00 ` [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open Christoph Hellwig
` (3 subsequent siblings)
7 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
lo_refcnt is only incremented in lo_open and decremented in lo_release,
and thus protected by open_mutex. Only take lo_mutex when lo_release
actually takes action for the final release.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d3a7f281ce1b6..b58dc95f80d96 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1740,10 +1740,14 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
- mutex_lock(&lo->lo_mutex);
- if (atomic_dec_return(&lo->lo_refcnt))
- goto out_unlock;
+ /*
+ * Note: this requires disk->open_mutex to protect against races
+ * with lo_open.
+ */
+ if (!atomic_dec_and_test(&lo->lo_refcnt))
+ return;
+ mutex_lock(&lo->lo_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
if (lo->lo_state != Lo_bound)
goto out_unlock;
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 4/8] loop: only take lo_mutex for the last reference in lo_release
2022-01-28 13:00 ` [PATCH 4/8] loop: only take lo_mutex for the last reference in lo_release Christoph Hellwig
@ 2022-01-28 13:37 ` Jan Kara
0 siblings, 0 replies; 20+ messages in thread
From: Jan Kara @ 2022-01-28 13:37 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Tetsuo Handa, Jan Kara, linux-block, Ming Lei,
Darrick J . Wong
On Fri 28-01-22 14:00:18, Christoph Hellwig wrote:
> lo_refcnt is only incremented in lo_open and decremented in lo_release,
> and thus protected by open_mutex. Only take lo_mutex when lo_release
> actually takes action for the final release.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Darrick J. Wong <djwong@kernel.org>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> drivers/block/loop.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index d3a7f281ce1b6..b58dc95f80d96 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1740,10 +1740,14 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
> {
> struct loop_device *lo = disk->private_data;
>
> - mutex_lock(&lo->lo_mutex);
> - if (atomic_dec_return(&lo->lo_refcnt))
> - goto out_unlock;
> + /*
> + * Note: this requires disk->open_mutex to protect against races
> + * with lo_open.
> + */
> + if (!atomic_dec_and_test(&lo->lo_refcnt))
> + return;
>
> + mutex_lock(&lo->lo_mutex);
> if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
> if (lo->lo_state != Lo_bound)
> goto out_unlock;
> --
> 2.30.2
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
` (3 preceding siblings ...)
2022-01-28 13:00 ` [PATCH 4/8] loop: only take lo_mutex for the last reference in lo_release Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:13 ` Tetsuo Handa
2022-01-28 13:38 ` Jan Kara
2022-01-28 13:00 ` [PATCH 6/8] loop: don't freeze the queue in lo_release Christoph Hellwig
` (2 subsequent siblings)
7 siblings, 2 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
lo_refcnt is only incremented in lo_open and decremented in lo_release,
and thus protected by open_mutex. Only take lo_mutex when lo_open is
called the first time, as only for the first open there is any affect
on the driver state (incremental opens on partitions don't end up in
lo_open at all already).
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b58dc95f80d96..f349ddfc0e84a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1725,13 +1725,20 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
+ /*
+ * Note: this requires disk->open_mutex to protect against races
+ * with lo_release.
+ */
+ if (atomic_inc_return(&lo->lo_refcnt) > 1)
+ return 0;
+
err = mutex_lock_killable(&lo->lo_mutex);
if (err)
return err;
- if (lo->lo_state == Lo_deleting)
+ if (lo->lo_state == Lo_deleting) {
+ atomic_dec(&lo->lo_refcnt);
err = -ENXIO;
- else
- atomic_inc(&lo->lo_refcnt);
+ }
mutex_unlock(&lo->lo_mutex);
return err;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-01-28 13:00 ` [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open Christoph Hellwig
@ 2022-01-28 13:13 ` Tetsuo Handa
2022-02-05 0:28 ` Tetsuo Handa
2022-01-28 13:38 ` Jan Kara
1 sibling, 1 reply; 20+ messages in thread
From: Tetsuo Handa @ 2022-01-28 13:13 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe
Cc: Jan Kara, linux-block, Ming Lei, Darrick J . Wong
On 2022/01/28 22:00, Christoph Hellwig wrote:
> + if (atomic_inc_return(&lo->lo_refcnt) > 1)
> + return 0;
> +
> err = mutex_lock_killable(&lo->lo_mutex);
> if (err)
You did not notice my diff here...
> return err;
> - if (lo->lo_state == Lo_deleting)
> + if (lo->lo_state == Lo_deleting) {
> + atomic_dec(&lo->lo_refcnt);
> err = -ENXIO;
> - else
> - atomic_inc(&lo->lo_refcnt);
> + }
Why do we need [PATCH 1/8] [PATCH 2/8] [PATCH 3/8] in this series?
Shouldn't we first make a clean revert, and keep the changes for
this release cycle as small as possible?
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-01-28 13:13 ` Tetsuo Handa
@ 2022-02-05 0:28 ` Tetsuo Handa
2022-02-06 7:10 ` Tetsuo Handa
2022-02-08 13:45 ` Jan Kara
0 siblings, 2 replies; 20+ messages in thread
From: Tetsuo Handa @ 2022-02-05 0:28 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe
Cc: Jan Kara, linux-block, Ming Lei, Darrick J . Wong
Ping?
I sent https://lkml.kernel.org/r/20220129071500.3566-1-penguin-kernel@I-love.SAKURA.ne.jp
based on ideas from your series.
Since automated kernel tests are failing, can't we apply
[PATCH 1/7] loop: revert "make autoclear operation asynchronous"
for now if we can't come to a conclusion?
On 2022/01/28 22:13, Tetsuo Handa wrote:
> On 2022/01/28 22:00, Christoph Hellwig wrote:
>> + if (atomic_inc_return(&lo->lo_refcnt) > 1)
>> + return 0;
>> +
>> err = mutex_lock_killable(&lo->lo_mutex);
>> if (err)
>
> You did not notice my diff here...
You need to drop lo->lo_refcnt before return.
But my latest series no longer uses task work context and no longer
holds lo->lo_mutex from lo_open()/lo_release().
>
>> return err;
>> - if (lo->lo_state == Lo_deleting)
>> + if (lo->lo_state == Lo_deleting) {
>> + atomic_dec(&lo->lo_refcnt);
>> err = -ENXIO;
>> - else
>> - atomic_inc(&lo->lo_refcnt);
>> + }
>
> Why do we need [PATCH 1/8] [PATCH 2/8] [PATCH 3/8] in this series?
> Shouldn't we first make a clean revert, and keep the changes for
> this release cycle as small as possible?
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-02-05 0:28 ` Tetsuo Handa
@ 2022-02-06 7:10 ` Tetsuo Handa
2022-02-08 13:45 ` Jan Kara
1 sibling, 0 replies; 20+ messages in thread
From: Tetsuo Handa @ 2022-02-06 7:10 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe, Jan Kara
Cc: linux-block, Ming Lei, Darrick J . Wong
On 2022/02/05 9:28, Tetsuo Handa wrote:
> Ping?
>
> I sent https://lkml.kernel.org/r/20220129071500.3566-1-penguin-kernel@I-love.SAKURA.ne.jp
> based on ideas from your series.
>
I tested my latest series and found that there are two problems.
One is that there still is disk->open_mutex => loop_validate_mutex =>
lo->lo_mutex chain. We would need to consider something like
https://lkml.kernel.org/r/fffda32f-848a-712b-f50e-8a6d7d086039@i-love.sakura.ne.jp
if we go with avoiding lo->lo_mutex while holding disk->open_mutex.
The other is that my latest series which does not hold lo->lo_mutex from
lo_open() causes occasional I/O error (due to
if (lo->lo_state != Lo_bound)
return BLK_STS_IOERR;
check in loop_queue_rq()) when systemd-udevd opens a loop device and
reads from it before loop_configure() updates lo->lo_state to Lo_bound.
(And I think the same problem exists for your series because we assumed
that we need to care about only loop_control_remove().)
If we want to avoid I/O error, open() of a loop device (currently lo_open())
needs to wait for loop_configure() to complete, by waiting for lo->lo_mutex
to be released. But this implies that we can't avoid lo->lo_mutex while holding
disk->open_mutex. Use of task work context (my previous series which is intended
for waiting for __loop_clr_fd() to complete) will allow waiting for lo->lo_mutex
without holding disk->open_mutex, but we want to avoid task work context if
possible...
[ 3069.404881] loop184: detected capacity change from 0 to 2048
[ 3069.452117] loop185: detected capacity change from 0 to 2048
[ 3069.457155] I/O error, dev loop185, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0
[ 3069.465309] loop186: detected capacity change from 0 to 2048
[ 3069.506238] loop187: detected capacity change from 0 to 2048
[ 3069.525483] loop188: detected capacity change from 0 to 2048
[ 3069.551447] loop189: detected capacity change from 0 to 2048
[ 3069.576062] loop190: detected capacity change from 0 to 2048
[ 3069.592206] loop191: detected capacity change from 0 to 2048
[ 3069.681413] loop192: detected capacity change from 0 to 2048
[ 3069.911000] loop193: detected capacity change from 0 to 2048
[ 3070.049889] loop194: detected capacity change from 0 to 2048
[ 3070.063847] loop195: detected capacity change from 0 to 2048
[ 3070.168926] loop196: detected capacity change from 0 to 2048
[ 3070.242869] loop197: detected capacity change from 0 to 2048
[ 3070.383477] loop198: detected capacity change from 0 to 2048
[ 3070.439035] loop199: detected capacity change from 0 to 2048
[ 3070.464548] loop200: detected capacity change from 0 to 2048
[ 3070.501891] loop201: detected capacity change from 0 to 2048
[ 3070.513600] loop202: detected capacity change from 0 to 2048
[ 3070.523923] I/O error, dev loop202, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0
[ 3070.531281] I/O error, dev loop202, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
[ 3070.541353] Buffer I/O error on dev loop202, logical block 0, async page read
[ 3070.684234] loop203: detected capacity change from 0 to 2048
[ 3070.705911] loop204: detected capacity change from 0 to 2048
[ 3070.716153] loop205: detected capacity change from 0 to 2048
[ 3070.735178] loop206: detected capacity change from 0 to 2048
[ 3070.833832] loop207: detected capacity change from 0 to 2048
[ 3070.835086] I/O error, dev loop207, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0
[ 3070.851433] loop208: detected capacity change from 0 to 2048
[ 3070.873609] loop209: detected capacity change from 0 to 2048
[ 3070.890193] loop210: detected capacity change from 0 to 2048
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-02-05 0:28 ` Tetsuo Handa
2022-02-06 7:10 ` Tetsuo Handa
@ 2022-02-08 13:45 ` Jan Kara
2022-02-23 14:24 ` Tetsuo Handa
1 sibling, 1 reply; 20+ messages in thread
From: Jan Kara @ 2022-02-08 13:45 UTC (permalink / raw)
To: Tetsuo Handa
Cc: Christoph Hellwig, Jens Axboe, Jan Kara, linux-block, Ming Lei,
Darrick J . Wong
On Sat 05-02-22 09:28:33, Tetsuo Handa wrote:
> Ping?
>
> I sent https://lkml.kernel.org/r/20220129071500.3566-1-penguin-kernel@I-love.SAKURA.ne.jp
> based on ideas from your series.
>
> Since automated kernel tests are failing, can't we apply
> [PATCH 1/7] loop: revert "make autoclear operation asynchronous"
> for now if we can't come to a conclusion?
That's certainly a good start so feel free to add my Acked-by to the
revert. I agree it should be merged quickly as I think it is better to have
a theoretical deadlock in the code than userspace breakage hit in the wild.
I'll find some more time to look into this but it will take a while.
Honza
>
> On 2022/01/28 22:13, Tetsuo Handa wrote:
> > On 2022/01/28 22:00, Christoph Hellwig wrote:
> >> + if (atomic_inc_return(&lo->lo_refcnt) > 1)
> >> + return 0;
> >> +
> >> err = mutex_lock_killable(&lo->lo_mutex);
> >> if (err)
> >
> > You did not notice my diff here...
>
> You need to drop lo->lo_refcnt before return.
>
> But my latest series no longer uses task work context and no longer
> holds lo->lo_mutex from lo_open()/lo_release().
>
> >
> >> return err;
> >> - if (lo->lo_state == Lo_deleting)
> >> + if (lo->lo_state == Lo_deleting) {
> >> + atomic_dec(&lo->lo_refcnt);
> >> err = -ENXIO;
> >> - else
> >> - atomic_inc(&lo->lo_refcnt);
> >> + }
> >
> > Why do we need [PATCH 1/8] [PATCH 2/8] [PATCH 3/8] in this series?
> > Shouldn't we first make a clean revert, and keep the changes for
> > this release cycle as small as possible?
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-02-08 13:45 ` Jan Kara
@ 2022-02-23 14:24 ` Tetsuo Handa
0 siblings, 0 replies; 20+ messages in thread
From: Tetsuo Handa @ 2022-02-23 14:24 UTC (permalink / raw)
To: Jan Kara
Cc: Christoph Hellwig, Jens Axboe, linux-block, Ming Lei, Darrick J . Wong
On 2022/02/08 22:45, Jan Kara wrote:
> On Sat 05-02-22 09:28:33, Tetsuo Handa wrote:
>> Ping?
>>
>> I sent https://lkml.kernel.org/r/20220129071500.3566-1-penguin-kernel@I-love.SAKURA.ne.jp
>> based on ideas from your series.
>>
>> Since automated kernel tests are failing, can't we apply
>> [PATCH 1/7] loop: revert "make autoclear operation asynchronous"
>> for now if we can't come to a conclusion?
>
> That's certainly a good start so feel free to add my Acked-by to the
> revert. I agree it should be merged quickly as I think it is better to have
> a theoretical deadlock in the code than userspace breakage hit in the wild.
> I'll find some more time to look into this but it will take a while.
Did you get a chance to look into this? As far as I tested, I found two problems
( https://lkml.kernel.org/r/a72c59c6-298b-e4ba-b1f5-2275afab49a1@I-love.SAKURA.ne.jp ).
That is, waiting for lo->lo_mutex upon open is not only for /bin/mount but for other
programs.
I found that we can use anon_inodes approach (basic idea is shown below) as a way
to use task_work context. If we can agree with this approach, I can re-implement
https://lkml.kernel.org/r/20220121114006.3633-1-penguin-kernel@I-love.SAKURA.ne.jp
without exporting task_work_add().
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 19fe19eaa50e..6bd6af1836c6 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -80,6 +80,7 @@
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
+#include <linux/anon_inodes.h>
#include "loop.h"
@@ -1736,6 +1737,25 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
return err;
}
+static int loop_no_open(struct inode *inode, struct file *file)
+{
+ return -ENXIO;
+}
+
+static int loop_post_release(struct inode *inode, struct file *file)
+{
+ struct loop_device *lo = file->private_data;
+
+ pr_info("Performing autoclear operation.\n");
+ __loop_clr_fd(lo, false);
+ return 0;
+}
+
+static const struct file_operations loop_close_fops = {
+ .open = loop_no_open,
+ .release = loop_post_release,
+};
+
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
@@ -1745,6 +1765,8 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
goto out_unlock;
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
+ struct file *file;
+
if (lo->lo_state != Lo_bound)
goto out_unlock;
lo->lo_state = Lo_rundown;
@@ -1753,7 +1775,9 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- __loop_clr_fd(lo, true);
+ file = anon_inode_getfile("loop.close", &loop_close_fops, lo, O_CLOEXEC);
+ if (!IS_ERR(file))
+ fput(file);
return;
} else if (lo->lo_state == Lo_bound) {
/*
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open
2022-01-28 13:00 ` [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open Christoph Hellwig
2022-01-28 13:13 ` Tetsuo Handa
@ 2022-01-28 13:38 ` Jan Kara
1 sibling, 0 replies; 20+ messages in thread
From: Jan Kara @ 2022-01-28 13:38 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Tetsuo Handa, Jan Kara, linux-block, Ming Lei,
Darrick J . Wong
On Fri 28-01-22 14:00:19, Christoph Hellwig wrote:
> lo_refcnt is only incremented in lo_open and decremented in lo_release,
> and thus protected by open_mutex. Only take lo_mutex when lo_open is
> called the first time, as only for the first open there is any affect
> on the driver state (incremental opens on partitions don't end up in
> lo_open at all already).
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Darrick J. Wong <djwong@kernel.org>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> drivers/block/loop.c | 13 ++++++++++---
> 1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index b58dc95f80d96..f349ddfc0e84a 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1725,13 +1725,20 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
> struct loop_device *lo = bdev->bd_disk->private_data;
> int err;
>
> + /*
> + * Note: this requires disk->open_mutex to protect against races
> + * with lo_release.
> + */
> + if (atomic_inc_return(&lo->lo_refcnt) > 1)
> + return 0;
> +
> err = mutex_lock_killable(&lo->lo_mutex);
> if (err)
> return err;
> - if (lo->lo_state == Lo_deleting)
> + if (lo->lo_state == Lo_deleting) {
> + atomic_dec(&lo->lo_refcnt);
> err = -ENXIO;
> - else
> - atomic_inc(&lo->lo_refcnt);
> + }
> mutex_unlock(&lo->lo_mutex);
> return err;
> }
> --
> 2.30.2
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 6/8] loop: don't freeze the queue in lo_release
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
` (4 preceding siblings ...)
2022-01-28 13:00 ` [PATCH 5/8] loop: only take lo_mutex for the first reference in lo_open Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:00 ` [PATCH 7/8] loop: only freeze the queue in __loop_clr_fd when needed Christoph Hellwig
2022-01-28 13:00 ` [PATCH 8/8] loop: make autoclear operation synchronous again Christoph Hellwig
7 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
By the time the final ->release is called there can't be outstanding I/O.
For non-final ->release there is no need for driver action at all.
Thus remove the useless queue freeze.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f349ddfc0e84a..1ca70f735b5bc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1766,13 +1766,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
*/
loop_schedule_rundown(lo);
return;
- } else if (lo->lo_state == Lo_bound) {
- /*
- * Otherwise keep thread (if running) and config,
- * but flush possible ongoing bios in thread.
- */
- blk_mq_freeze_queue(lo->lo_queue);
- blk_mq_unfreeze_queue(lo->lo_queue);
}
out_unlock:
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 7/8] loop: only freeze the queue in __loop_clr_fd when needed
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
` (5 preceding siblings ...)
2022-01-28 13:00 ` [PATCH 6/8] loop: don't freeze the queue in lo_release Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
2022-01-28 13:00 ` [PATCH 8/8] loop: make autoclear operation synchronous again Christoph Hellwig
7 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Darrick J . Wong
->release is only called after all outstanding I/O has completed, so only
freeze the queue when clearing the backing file of a live loop device.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Darrick J. Wong <djwong@kernel.org>
---
drivers/block/loop.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1ca70f735b5bc..0cdbc60037642 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1110,7 +1110,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return error;
}
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
@@ -1139,8 +1139,13 @@ static void __loop_clr_fd(struct loop_device *lo)
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
blk_queue_write_cache(lo->lo_queue, false, false);
- /* freeze request queue during the transition */
- blk_mq_freeze_queue(lo->lo_queue);
+ /*
+ * Freeze the request queue when unbinding on a live file descriptor and
+ * thus an open device. When called from ->release we are guaranteed
+ * that there is no I/O in progress already.
+ */
+ if (!release)
+ blk_mq_freeze_queue(lo->lo_queue);
destroy_workqueue(lo->workqueue);
loop_free_idle_workers(lo, true);
@@ -1163,7 +1168,8 @@ static void __loop_clr_fd(struct loop_device *lo)
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
mapping_set_gfp_mask(filp->f_mapping, gfp);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (!release)
+ blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1201,7 +1207,7 @@ static void loop_rundown_workfn(struct work_struct *work)
struct block_device *bdev = lo->lo_device;
struct gendisk *disk = lo->lo_disk;
- __loop_clr_fd(lo);
+ __loop_clr_fd(lo, true);
kobject_put(&bdev->bd_device.kobj);
module_put(disk->fops->owner);
loop_rundown_completed(lo);
@@ -1247,7 +1253,7 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
- __loop_clr_fd(lo);
+ __loop_clr_fd(lo, false);
loop_rundown_completed(lo);
return 0;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 8/8] loop: make autoclear operation synchronous again
2022-01-28 13:00 yet another approach to fix loop autoclear for xfstets xfs/049 v2 Christoph Hellwig
` (6 preceding siblings ...)
2022-01-28 13:00 ` [PATCH 7/8] loop: only freeze the queue in __loop_clr_fd when needed Christoph Hellwig
@ 2022-01-28 13:00 ` Christoph Hellwig
7 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-01-28 13:00 UTC (permalink / raw)
To: Jens Axboe
Cc: Tetsuo Handa, Jan Kara, linux-block, Ming Lei, Tetsuo Handa,
kernel test robot, Darrick J . Wong, syzbot
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
The kernel test robot is reporting that xfstest can fail at
umount ext2 on xfs
umount xfs
sequence, for commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous") broke what commit ("loop: Make explicit loop device
destruction lazy") wanted to achieve.
Although we cannot guarantee that nobody is holding a reference when
"umount xfs" is called, we should try to close a race window opened
by asynchronous autoclear operation.
It turned out that there is no need to call flush_workqueue() from
__loop_clr_fd(), for blk_mq_freeze_queue() blocks until all pending
"struct work_struct" are processed by loop_process_work().
Revert commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous"), and simply defer calling destroy_workqueue() till
loop_remove() after ensuring that the worker rbtree and repaing
timer are kept alive while the loop device exists.
Since a loop device is likely reused after once created thanks to
ioctl(LOOP_CTL_GET_FREE), being unable to destroy corresponding WQ
when ioctl(LOOP_CLR_FD) is called should be an acceptable waste.
Reported-by: kernel test robot <oliver.sang@intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
[hch: rebased, keep the work rbtree and timer around longer]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: syzbot <syzbot+643e4ce4b6ad1347d372@syzkaller.appspotmail.com>
---
drivers/block/loop.c | 71 ++++++++++++++------------------------------
drivers/block/loop.h | 1 -
2 files changed, 23 insertions(+), 49 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0cdbc60037642..68d01166f7c38 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1038,10 +1038,10 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
- lo->workqueue = alloc_workqueue("loop%d",
- WQ_UNBOUND | WQ_FREEZABLE,
- 0,
- lo->lo_number);
+ if (!lo->workqueue)
+ lo->workqueue = alloc_workqueue("loop%d",
+ WQ_UNBOUND | WQ_FREEZABLE,
+ 0, lo->lo_number);
if (!lo->workqueue) {
error = -ENOMEM;
goto out_unlock;
@@ -1147,10 +1147,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
if (!release)
blk_mq_freeze_queue(lo->lo_queue);
- destroy_workqueue(lo->workqueue);
- loop_free_idle_workers(lo, true);
- del_timer_sync(&lo->timer);
-
spin_lock_irq(&lo->lo_lock);
filp = lo->lo_backing_file;
lo->lo_backing_file = NULL;
@@ -1176,9 +1172,11 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
int err;
- mutex_lock(&lo->lo_disk->open_mutex);
+ if (!release)
+ mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- mutex_unlock(&lo->lo_disk->open_mutex);
+ if (!release)
+ mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
@@ -1189,39 +1187,17 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
- fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
module_put(THIS_MODULE);
-}
-
-static void loop_rundown_workfn(struct work_struct *work)
-{
- struct loop_device *lo = container_of(work, struct loop_device,
- rundown_work);
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
- __loop_clr_fd(lo, true);
- kobject_put(&bdev->bd_device.kobj);
- module_put(disk->fops->owner);
- loop_rundown_completed(lo);
-}
-
-static void loop_schedule_rundown(struct loop_device *lo)
-{
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __module_get(disk->fops->owner);
- kobject_get(&bdev->bd_device.kobj);
- INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
- queue_work(system_long_wq, &lo->rundown_work);
+ /*
+ * Need not hold lo_mutex to fput backing file. Calling fput holding
+ * lo_mutex triggers a circular lock dependency possibility warning as
+ * fput can take open_mutex which is usually taken before lo_mutex.
+ */
+ fput(filp);
}
static int loop_clr_fd(struct loop_device *lo)
@@ -1254,7 +1230,6 @@ static int loop_clr_fd(struct loop_device *lo)
mutex_unlock(&lo->lo_mutex);
__loop_clr_fd(lo, false);
- loop_rundown_completed(lo);
return 0;
}
@@ -1761,20 +1736,14 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
return;
mutex_lock(&lo->lo_mutex);
- if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
- if (lo->lo_state != Lo_bound)
- goto out_unlock;
+ if (lo->lo_state == Lo_bound &&
+ (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
- /*
- * In autoclear mode, stop the loop thread
- * and remove configuration after last close.
- */
- loop_schedule_rundown(lo);
+ __loop_clr_fd(lo, true);
return;
}
-out_unlock:
mutex_unlock(&lo->lo_mutex);
}
@@ -2064,6 +2033,12 @@ static void loop_remove(struct loop_device *lo)
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
+
+ if (lo->workqueue)
+ destroy_workqueue(lo->workqueue);
+ loop_free_idle_workers(lo, true);
+ del_timer_sync(&lo->timer);
+
/* There is no route which can find this loop device. */
mutex_destroy(&lo->lo_mutex);
kfree(lo);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc0259..082d4b6bfc6a6 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
- struct work_struct rundown_work;
};
struct loop_cmd {
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread