* [PATCH] btrfs: qgroups, fix rescan worker running races
@ 2020-02-06 8:17 Qu Wenruo
2020-02-06 15:54 ` Josef Bacik
0 siblings, 1 reply; 2+ messages in thread
From: Qu Wenruo @ 2020-02-06 8:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Jeff Mahoney
[BUG]
There are some reports about btrfs wait forever to unmount itself, with
the following call trace:
INFO: task umount:4631 blocked for more than 491 seconds.
Tainted: G X 5.3.8-2-default #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
umount D 0 4631 3337 0x00000000
Call Trace:
([<00000000174adf7a>] __schedule+0x342/0x748)
[<00000000174ae3ca>] schedule+0x4a/0xd8
[<00000000174b1f08>] schedule_timeout+0x218/0x420
[<00000000174af10c>] wait_for_common+0x104/0x1d8
[<000003ff804d6994>] btrfs_qgroup_wait_for_completion+0x84/0xb0 [btrfs]
[<000003ff8044a616>] close_ctree+0x4e/0x380 [btrfs]
[<0000000016fa3136>] generic_shutdown_super+0x8e/0x158
[<0000000016fa34d6>] kill_anon_super+0x26/0x40
[<000003ff8041ba88>] btrfs_kill_super+0x28/0xc8 [btrfs]
[<0000000016fa39f8>] deactivate_locked_super+0x68/0x98
[<0000000016fcb198>] cleanup_mnt+0xc0/0x140
[<0000000016d6a846>] task_work_run+0xc6/0x110
[<0000000016d04f76>] do_notify_resume+0xae/0xb8
[<00000000174b30ae>] system_call+0xe2/0x2c8
[CAUSE]
The problem can happen like this:
Qgroup ioctl thread | Unmount thread
----------------------------------------+-----------------------------------
Fs has QGROUP_STATUS_RESCAN bit set |
And is mounted RO |
|
open_ctree() |
|- btrfs_read_qgroup_config() |
| |- qgroup_rescan_init() |
| |- qgroup_rescan_running = true; |
|- btrfs_qgroup_rescan_resume() |
| |- rescan work queued |
| but not yet executing |
-- open_ctree() returned |
| close_ctree()
| |- btrfs_qgroup_wait_for_completion()
| |- running == true;
| |- wait_for_completion();
|
btrfs_qgroup_rescan_worker() |
Which is expected to be run here, |
Since rescan worker is not yet executed , no one will wake up
btrfs_qgroup_wait_for_completion().
[FIX]
This patch will introduce a new status (qgroup_rescan_queued) to ensure
above race won't happen.
Now the lifespan of qgroup enable/rescan looks like this:
qgroup_rescan_init() --
|- qgroup_rescan_queued = true; | Section A
qgroup_rescan_running is still false |
--
btrfs_qgroup_rescan_worker() |
|- qgroup_rescan_queued = false; |
|- qgroup_rescan_running = true; | Section B
--
No cross section can happen since qgroup_rescan_* are all protected by
qgroup_rescan_lock.
In section A, btrfs_qgroup_wait_for_completion() will exit as rescan is
not running.
In section B, btrfs_qgroup_wait_for_completion() will fail current
rescan to finish.
So that no race can happen now.
Fixes: 8d9eddad194 (Btrfs: fix qgroup rescan worker initialization)
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
[Move the queued = false to btrfs_qgroup_rescan_worker, commit message
update]
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
The original version still has a window as that running bit is still set
before rescan worker get running, thus it only reduces the window, not
eliminate it.
---
fs/btrfs/ctree.h | 2 ++
fs/btrfs/ioctl.c | 4 +++-
fs/btrfs/qgroup.c | 15 +++++++--------
3 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54efb21c2727..d3bf4b62df83 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -847,6 +847,8 @@ struct btrfs_fs_info {
struct btrfs_workqueue *qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
struct btrfs_work qgroup_rescan_work;
+ /* qgroup rescan worker queued, but not yet executed */
+ bool qgroup_rescan_queued; /* protected by qgroup_rescan_lock */
bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
/* filesystem state */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 18e328ce4b54..505a36196fb9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4963,10 +4963,12 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
if (!qsa)
return -ENOMEM;
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ if (fs_info->qgroup_rescan_queued || fs_info->qgroup_rescan_running) {
qsa->flags = 1;
qsa->progress = fs_info->qgroup_rescan_progress.objectid;
}
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
if (copy_to_user(arg, qsa, sizeof(*qsa)))
ret = -EFAULT;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d4282e12f2a6..1ee057cc2125 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2458,7 +2458,7 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
}
mutex_lock(&fs_info->qgroup_rescan_lock);
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_queued || fs_info->qgroup_rescan_running) {
if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
mutex_unlock(&fs_info->qgroup_rescan_lock);
ret = 0;
@@ -3144,6 +3144,11 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
path->search_commit_root = 1;
path->skip_locking = 1;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_queued = false;
+ fs_info->qgroup_rescan_running = true;
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
err = 0;
while (!err && !btrfs_fs_closing(fs_info)) {
trans = btrfs_start_transaction(fs_info->fs_root, 0);
@@ -3246,7 +3251,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
}
mutex_lock(&fs_info->qgroup_rescan_lock);
- spin_lock(&fs_info->qgroup_lock);
if (init_flags) {
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
@@ -3261,7 +3265,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
}
if (ret) {
- spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
return ret;
}
@@ -3272,9 +3275,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
- fs_info->qgroup_rescan_running = true;
-
- spin_unlock(&fs_info->qgroup_lock);
+ fs_info->qgroup_rescan_queued = true;
mutex_unlock(&fs_info->qgroup_rescan_lock);
btrfs_init_work(&fs_info->qgroup_rescan_work,
@@ -3348,9 +3349,7 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
int ret = 0;
mutex_lock(&fs_info->qgroup_rescan_lock);
- spin_lock(&fs_info->qgroup_lock);
running = fs_info->qgroup_rescan_running;
- spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
if (!running)
--
2.25.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] btrfs: qgroups, fix rescan worker running races
2020-02-06 8:17 [PATCH] btrfs: qgroups, fix rescan worker running races Qu Wenruo
@ 2020-02-06 15:54 ` Josef Bacik
0 siblings, 0 replies; 2+ messages in thread
From: Josef Bacik @ 2020-02-06 15:54 UTC (permalink / raw)
To: Qu Wenruo, linux-btrfs; +Cc: Jeff Mahoney
On 2/6/20 3:17 AM, Qu Wenruo wrote:
> [BUG]
> There are some reports about btrfs wait forever to unmount itself, with
> the following call trace:
> INFO: task umount:4631 blocked for more than 491 seconds.
> Tainted: G X 5.3.8-2-default #1
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> umount D 0 4631 3337 0x00000000
> Call Trace:
> ([<00000000174adf7a>] __schedule+0x342/0x748)
> [<00000000174ae3ca>] schedule+0x4a/0xd8
> [<00000000174b1f08>] schedule_timeout+0x218/0x420
> [<00000000174af10c>] wait_for_common+0x104/0x1d8
> [<000003ff804d6994>] btrfs_qgroup_wait_for_completion+0x84/0xb0 [btrfs]
> [<000003ff8044a616>] close_ctree+0x4e/0x380 [btrfs]
> [<0000000016fa3136>] generic_shutdown_super+0x8e/0x158
> [<0000000016fa34d6>] kill_anon_super+0x26/0x40
> [<000003ff8041ba88>] btrfs_kill_super+0x28/0xc8 [btrfs]
> [<0000000016fa39f8>] deactivate_locked_super+0x68/0x98
> [<0000000016fcb198>] cleanup_mnt+0xc0/0x140
> [<0000000016d6a846>] task_work_run+0xc6/0x110
> [<0000000016d04f76>] do_notify_resume+0xae/0xb8
> [<00000000174b30ae>] system_call+0xe2/0x2c8
>
> [CAUSE]
> The problem can happen like this:
>
> Qgroup ioctl thread | Unmount thread
> ----------------------------------------+-----------------------------------
> Fs has QGROUP_STATUS_RESCAN bit set |
> And is mounted RO |
> |
> open_ctree() |
> |- btrfs_read_qgroup_config() |
> | |- qgroup_rescan_init() |
> | |- qgroup_rescan_running = true; |
> |- btrfs_qgroup_rescan_resume() |
> | |- rescan work queued |
> | but not yet executing |
> -- open_ctree() returned |
> | close_ctree()
> | |- btrfs_qgroup_wait_for_completion()
> | |- running == true;
> | |- wait_for_completion();
> |
> btrfs_qgroup_rescan_worker() |
> Which is expected to be run here, |
>
> Since rescan worker is not yet executed , no one will wake up
> btrfs_qgroup_wait_for_completion().
>
> [FIX]
> This patch will introduce a new status (qgroup_rescan_queued) to ensure
> above race won't happen.
>
> Now the lifespan of qgroup enable/rescan looks like this:
>
> qgroup_rescan_init() --
> |- qgroup_rescan_queued = true; | Section A
> qgroup_rescan_running is still false |
> --
> btrfs_qgroup_rescan_worker() |
> |- qgroup_rescan_queued = false; |
> |- qgroup_rescan_running = true; | Section B
> --
>
> No cross section can happen since qgroup_rescan_* are all protected by
> qgroup_rescan_lock.
>
> In section A, btrfs_qgroup_wait_for_completion() will exit as rescan is
> not running.
> In section B, btrfs_qgroup_wait_for_completion() will fail current
> rescan to finish.
>
> So that no race can happen now.
>
> Fixes: 8d9eddad194 (Btrfs: fix qgroup rescan worker initialization)
> Signed-off-by: Jeff Mahoney <jeffm@suse.com>
> [Move the queued = false to btrfs_qgroup_rescan_worker, commit message
> update]
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> The original version still has a window as that running bit is still set
> before rescan worker get running, thus it only reduces the window, not
> eliminate it.
> ---
> fs/btrfs/ctree.h | 2 ++
> fs/btrfs/ioctl.c | 4 +++-
> fs/btrfs/qgroup.c | 15 +++++++--------
> 3 files changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 54efb21c2727..d3bf4b62df83 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -847,6 +847,8 @@ struct btrfs_fs_info {
> struct btrfs_workqueue *qgroup_rescan_workers;
> struct completion qgroup_rescan_completion;
> struct btrfs_work qgroup_rescan_work;
> + /* qgroup rescan worker queued, but not yet executed */
> + bool qgroup_rescan_queued; /* protected by qgroup_rescan_lock */
> bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
>
> /* filesystem state */
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 18e328ce4b54..505a36196fb9 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -4963,10 +4963,12 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
> if (!qsa)
> return -ENOMEM;
>
> - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> + if (fs_info->qgroup_rescan_queued || fs_info->qgroup_rescan_running) {
> qsa->flags = 1;
> qsa->progress = fs_info->qgroup_rescan_progress.objectid;
> }
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
>
> if (copy_to_user(arg, qsa, sizeof(*qsa)))
> ret = -EFAULT;
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index d4282e12f2a6..1ee057cc2125 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -2458,7 +2458,7 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
> }
>
> mutex_lock(&fs_info->qgroup_rescan_lock);
> - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> + if (fs_info->qgroup_rescan_queued || fs_info->qgroup_rescan_running) {
> if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
> mutex_unlock(&fs_info->qgroup_rescan_lock);
> ret = 0;
> @@ -3144,6 +3144,11 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> path->search_commit_root = 1;
> path->skip_locking = 1;
>
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> + fs_info->qgroup_rescan_queued = false;
> + fs_info->qgroup_rescan_running = true;
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> err = 0;
> while (!err && !btrfs_fs_closing(fs_info)) {
> trans = btrfs_start_transaction(fs_info->fs_root, 0);
> @@ -3246,7 +3251,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
> }
>
> mutex_lock(&fs_info->qgroup_rescan_lock);
> - spin_lock(&fs_info->qgroup_lock);
>
> if (init_flags) {
> if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> @@ -3261,7 +3265,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
> }
>
> if (ret) {
> - spin_unlock(&fs_info->qgroup_lock);
> mutex_unlock(&fs_info->qgroup_rescan_lock);
> return ret;
> }
> @@ -3272,9 +3275,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
> sizeof(fs_info->qgroup_rescan_progress));
> fs_info->qgroup_rescan_progress.objectid = progress_objectid;
> init_completion(&fs_info->qgroup_rescan_completion);
> - fs_info->qgroup_rescan_running = true;
> -
> - spin_unlock(&fs_info->qgroup_lock);
I had to go look at the code to figure out why you were doing this. You are
adding a flag and changing the locking rules, I'd rather you do the locking
separately. Do the change to add qgroup_rescan_queued, and then a followup
patch dropping the spin lock because everything is now protected soley by the
qgroup_rescan_lock. Thanks,
Josef
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-02-06 15:54 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-06 8:17 [PATCH] btrfs: qgroups, fix rescan worker running races Qu Wenruo
2020-02-06 15:54 ` Josef Bacik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).