From: Christoph Hellwig <hch@lst.de>
To: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
Hillf Danton <hdanton@sina.com>,
Pavel Tatashin <pasha.tatashin@soleen.com>,
Tyler Hicks <tyhicks@linux.microsoft.com>,
linux-block <linux-block@vger.kernel.org>
Subject: Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
Date: Fri, 27 Aug 2021 20:43:02 +0200 [thread overview]
Message-ID: <20210827184302.GA29967@lst.de> (raw)
In-Reply-To: <2642808b-a7d0-28ff-f288-0f4eabc562f7@i-love.sakura.ne.jp>
On Fri, Aug 27, 2021 at 01:03:45AM +0900, Tetsuo Handa wrote:
>
> loop_unregister_transfer() which is called from cleanup_cryptoloop()
> currently lacks serialization between kfree() from loop_remove() from
> loop_control_remove() and mutex_lock() from unregister_transfer_cb().
> We can use refcount and loop_idr_spinlock for serialization between
> these functions.
So before we start complicating things for loop_release_xfer - how
do you actually reproduce loop_unregister_transfer finding a loop
device with a transfer set? AFAICS loop_unregister_transfer is only
called form exit_cryptoloop, which can only be called when
cryptoloop has a zero reference count. But as long as a transfer
is registered an extra refcount is held on its owner.
> @@ -2313,20 +2320,20 @@ static int loop_add(int i)
> goto out;
> lo->lo_state = Lo_unbound;
>
> - err = mutex_lock_killable(&loop_ctl_mutex);
> - if (err)
> - goto out_free_dev;
> -
> /* allocate id, if @id >= 0, we're requesting that specific id */
> + idr_preload(GFP_KERNEL);
> + spin_lock(&loop_idr_spinlock);
> if (i >= 0) {
> - err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
> + err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC);
> if (err == -ENOSPC)
> err = -EEXIST;
> } else {
> - err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
> + err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC);
> }
> + spin_unlock(&loop_idr_spinlock);
> + idr_preload_end();
Can you explain why the mutex is switched to a spinlock? I could not
find any caller that can't block, so there doesn't seem to be a real
need for a spinlock, while a spinlock requires extra work and GFP_ATOMIC
allocations here. Dropping the _killable probably makes some sense,
but seems like a separate cleanup.
> + if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) {
> + spin_unlock(&loop_idr_spinlock);
> + return -ENODEV;
> }
> + spin_unlock(&loop_idr_spinlock);
> + refcount_dec(&lo->idr_visible);
> + /*
> + * Try to wait for concurrent callers (they should complete shortly due to
> + * lo->lo_state == Lo_deleting) operating on this loop device, in order to
> + * help that subsequent loop_add() will not to fail with -EEXIST.
> + * Note that this is best effort.
> + */
> + for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++)
> + schedule_timeout_killable(1);
> + ret = 0;
This dance looks pretty strange to me. I think just making idr_visible
an atomic_t and using atomic_cmpxchg with just 0 and 1 as valid versions
will make this much simpler, as it avoids the need to deal with a > 1
count, and it also serializes multiple removal calls.
I quickly hacked this up as a slight variant of your patch, and it's been
running the syzbot reproducer you pointed me to for quite while now:
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf4..69ced1feb18d5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2113,28 +2113,29 @@ int loop_register_transfer(struct loop_func_table *funcs)
return 0;
}
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
- struct loop_device *lo = ptr;
- struct loop_func_table *xfer = data;
-
- mutex_lock(&lo->lo_mutex);
- if (lo->lo_encryption == xfer)
- loop_release_xfer(lo);
- mutex_unlock(&lo->lo_mutex);
- return 0;
-}
-
int loop_unregister_transfer(int number)
{
unsigned int n = number;
struct loop_func_table *xfer;
+ struct loop_device *lo;
+ int id;
if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
return -EINVAL;
xfer_funcs[n] = NULL;
- idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
+
+ /*
+ * loop_unregister_transfer is only called from cryptoloop module
+ * unload. Given that each loop device that has a transfer enabled
+ * hold a reference to the module implementing it we should never
+ * get here with a transfer that is set.
+ */
+ mutex_lock(&loop_ctl_mutex);
+ idr_for_each_entry(&loop_index_idr, lo, id)
+ WARN_ON_ONCE(lo->lo_encryption == xfer);
+ mutex_unlock(&loop_ctl_mutex);
+
return 0;
}
@@ -2325,8 +2326,9 @@ static int loop_add(int i)
} else {
err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
}
+ mutex_unlock(&loop_ctl_mutex);
if (err < 0)
- goto out_unlock;
+ goto out_free_dev;
i = err;
err = -ENOMEM;
@@ -2392,15 +2394,17 @@ static int loop_add(int i)
disk->private_data = lo;
disk->queue = lo->lo_queue;
sprintf(disk->disk_name, "loop%d", i);
+ /* Make this loop device reachable from pathname. */
add_disk(disk);
- mutex_unlock(&loop_ctl_mutex);
+ /* Show this loop device. */
+ atomic_set(&lo->idr_visible, 1);
return i;
out_cleanup_tags:
blk_mq_free_tag_set(&lo->tag_set);
out_free_idr:
+ mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, i);
-out_unlock:
mutex_unlock(&loop_ctl_mutex);
out_free_dev:
kfree(lo);
@@ -2410,9 +2414,14 @@ static int loop_add(int i)
static void loop_remove(struct loop_device *lo)
{
+ /* Make this loop device unreachable from pathname. */
del_gendisk(lo->lo_disk);
blk_cleanup_disk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
+ mutex_lock(&loop_ctl_mutex);
+ idr_remove(&loop_index_idr, lo->lo_number);
+ mutex_unlock(&loop_ctl_mutex);
+ /* There is no route which can find this loop device. */
mutex_destroy(&lo->lo_mutex);
kfree(lo);
}
@@ -2440,29 +2449,40 @@ static int loop_control_remove(int idx)
if (ret)
return ret;
+ /*
+ * Identify the loop device to remove. Skip the device if it is owned by
+ * loop_remove()/loop_add() where it is not safe to access lo_mutex.
+ * The loop device is marked invisible even if we bail out of the
+ * removal, but the only other place checking the visibility is the
+ * LOOP_CTL_GET_FREE ioctl, which checks the same flags as we do below,
+ * and which is fundamentally racy anyway.
+ */
lo = idr_find(&loop_index_idr, idx);
- if (!lo) {
- ret = -ENODEV;
- goto out_unlock_ctrl;
+ if (!lo || atomic_cmpxchg(&lo->idr_visible, 1, 0) == 0) {
+ mutex_unlock(&loop_ctl_mutex);
+ return -ENODEV;
}
+ mutex_unlock(&loop_ctl_mutex);
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
- goto out_unlock_ctrl;
+ goto mark_visible;
if (lo->lo_state != Lo_unbound ||
atomic_read(&lo->lo_refcnt) > 0) {
mutex_unlock(&lo->lo_mutex);
ret = -EBUSY;
- goto out_unlock_ctrl;
+ goto mark_visible;
}
+ /* Mark this loop device no longer open()-able. */
lo->lo_state = Lo_deleting;
mutex_unlock(&lo->lo_mutex);
- idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
-out_unlock_ctrl:
- mutex_unlock(&loop_ctl_mutex);
- return ret;
+ return 0;
+
+mark_visible:
+ atomic_inc(&lo->idr_visible);
+ return -EBUSY;
}
static int loop_control_get_free(int idx)
@@ -2474,7 +2494,8 @@ static int loop_control_get_free(int idx)
if (ret)
return ret;
idr_for_each_entry(&loop_index_idr, lo, id) {
- if (lo->lo_state == Lo_unbound)
+ if (atomic_read(&lo->idr_visible) &&
+ lo->lo_state == Lo_unbound)
goto found;
}
mutex_unlock(&loop_ctl_mutex);
@@ -2590,10 +2611,12 @@ static void __exit loop_exit(void)
unregister_blkdev(LOOP_MAJOR, "loop");
misc_deregister(&loop_misc);
- mutex_lock(&loop_ctl_mutex);
+ /*
+ * There is no need to use loop_ctl_mutex here, for nobody else can
+ * access loop_index_idr when this module is unloading.
+ */
idr_for_each_entry(&loop_index_idr, lo, id)
loop_remove(lo);
- mutex_unlock(&loop_ctl_mutex);
idr_destroy(&loop_index_idr);
}
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63ac..1ec5135da54a7 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
struct mutex lo_mutex;
+ atomic_t idr_visible; /* a bool in reality */
};
struct loop_cmd {
next prev parent reply other threads:[~2021-08-27 18:43 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-26 16:03 [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock Tetsuo Handa
2021-08-27 18:43 ` Christoph Hellwig [this message]
2021-08-28 1:10 ` Tetsuo Handa
2021-08-28 2:22 ` Tetsuo Handa
2021-08-28 7:18 ` Christoph Hellwig
2021-08-28 13:50 ` Tetsuo Handa
2021-08-29 1:22 ` [PATCH v2] loop: reduce the loop_ctl_mutex scope Tetsuo Handa
2021-08-29 13:47 ` [PATCH v3] " Tetsuo Handa
2021-08-30 7:13 ` Christoph Hellwig
2021-09-01 5:36 ` Christoph Hellwig
2021-09-01 5:47 ` Tetsuo Handa
2021-09-01 6:10 ` Christoph Hellwig
2021-09-02 0:07 ` [PATCH v3 (repost)] " Tetsuo Handa
2021-09-04 4:16 ` [PATCH v3] " Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210827184302.GA29967@lst.de \
--to=hch@lst.de \
--cc=axboe@kernel.dk \
--cc=hdanton@sina.com \
--cc=linux-block@vger.kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=penguin-kernel@i-love.sakura.ne.jp \
--cc=tyhicks@linux.microsoft.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).