From: Coly Li <colyli@suse.de>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, linux-bcache@vger.kernel.org,
Mingzhe Zou <mingzhe.zou@easystack.cn>,
stable@vger.kernel.org, Coly Li <colyli@suse.de>
Subject: [PATCH 06/10] bcache: fixup lock c->root error
Date: Mon, 20 Nov 2023 13:24:59 +0800 [thread overview]
Message-ID: <20231120052503.6122-7-colyli@suse.de> (raw)
In-Reply-To: <20231120052503.6122-1-colyli@suse.de>
From: Mingzhe Zou <mingzhe.zou@easystack.cn>
We had a problem with io hung because it was waiting for c->root to
release the lock.
crash> cache_set.root -l cache_set.list ffffa03fde4c0050
root = 0xffff802ef454c800
crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
[ffff802ef454c858] struct rw_semaphore lock;
crash> struct rw_semaphore ffff802ef454c858
struct rw_semaphore {
count = {
counter = -4294967297
},
wait_list = {
next = 0xffff00006786fc28,
prev = 0xffff00005d0efac8
},
wait_lock = {
raw_lock = {
{
val = {
counter = 0
},
{
locked = 0 '\000',
pending = 0 '\000'
},
{
locked_pending = 0,
tail = 0
}
}
}
},
osq = {
tail = {
counter = 0
}
},
owner = 0xffffa03fdc586603
}
The "counter = -4294967297" means that lock count is -1 and a write lock
is being attempted. Then, we found that there is a btree with a counter
of 1 in btree_cache_freeable.
crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
[ffffa03fde4c1140] struct list_head btree_cache;
[ffffa03fde4c1150] struct list_head btree_cache_freeable;
[ffffa03fde4c1160] struct list_head btree_cache_freed;
[ffffa03fde4c1170] unsigned int btree_cache_used;
[ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
[ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
crash> list -H ffffa03fde4c1140|wc -l
973
crash> list -H ffffa03fde4c1150|wc -l
1123
crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
btree_cache_used = 2097
crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^ lock = {" > btree_cache.txt
crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^ lock = {" > btree_cache_freeable.txt
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
/var/crash/127.0.0.1-2023-08-04-16:40:28
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
counter = 1
We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
(1). Thread X has locked c->root(A) write.
(2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
(3). Thread X bch_btree_set_root() changes c->root from A to B.
(4). Thread X releases the lock(c->root A).
(5). Thread Y successfully locks c->root(A).
(6). Thread Y releases the lock(c->root B).
down_write locked ---(1)----------------------┐
| |
| down_read waiting ---(2)----┐ |
| | ┌-------------┐ ┌-------------┐
bch_btree_set_root ===(3)========>> | c->root A | | c->root B |
| | └-------------┘ └-------------┘
up_write ---(4)---------------------┘ | |
| | |
down_read locked ---(5)-----------┘ |
| |
up_read ---(6)-----------------------------┘
Since c->root may change, the correct steps to lock c->root should be
the same as bch_root_usage(), compare after locking.
static unsigned int bch_root_usage(struct cache_set *c)
{
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
goto lock_root;
do {
rw_unlock(false, b);
lock_root:
b = c->root;
rw_lock(false, b, b->level);
} while (b != c->root);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
bytes += bkey_bytes(k);
rw_unlock(false, b);
return (bytes * 100) / btree_bytes(c);
}
Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded")
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Cc: stable@vger.kernel.org
Signed-off-by: Coly Li <colyli@suse.de>
---
drivers/md/bcache/writeback.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 77fb72ac6b81..a1d760916246 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void)
void bch_sectors_dirty_init(struct bcache_device *d)
{
int i;
+ struct btree *b = NULL;
struct bkey *k = NULL;
struct btree_iter iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;
+retry_lock:
+ b = c->root;
+ rw_lock(0, b, b->level);
+ if (b != c->root) {
+ rw_unlock(0, b);
+ goto retry_lock;
+ }
+
/* Just count root keys if no leaf node */
- rw_lock(0, c->root, c->root->level);
if (c->root->level == 0) {
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
@@ -997,7 +1005,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
sectors_dirty_init_fn(&op.op, c->root, k);
}
- rw_unlock(0, c->root);
+ rw_unlock(0, b);
return;
}
@@ -1033,7 +1041,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
out:
/* Must wait for all threads to stop. */
wait_event(state.wait, atomic_read(&state.started) == 0);
- rw_unlock(0, c->root);
+ rw_unlock(0, b);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
--
2.35.3
next prev parent reply other threads:[~2023-11-20 5:44 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-20 5:24 [PATCH 00/10] bcache-next 20231120 Coly Li
2023-11-20 5:24 ` [PATCH 01/10] bcache: avoid oversize memory allocation by small stripe_size Coly Li
2023-11-20 5:24 ` [PATCH 02/10] bcache: check return value from btree_node_alloc_replacement() Coly Li
2023-11-20 5:24 ` [PATCH 03/10] bcache: remove redundant assignment to variable cur_idx Coly Li
2023-11-20 5:24 ` [PATCH 04/10] bcache: prevent potential division by zero error Coly Li
2023-11-20 5:24 ` [PATCH 05/10] bcache: fixup init dirty data errors Coly Li
2023-11-20 5:24 ` Coly Li [this message]
2023-11-20 5:25 ` [PATCH 07/10] bcache: fixup multi-threaded bch_sectors_dirty_init() wake-up race Coly Li
2023-11-20 5:25 ` [PATCH 08/10] bcache: replace a mistaken IS_ERR() by IS_ERR_OR_NULL() in btree_gc_coalesce() Coly Li
2023-11-20 5:25 ` [PATCH 09/10] bcache: add code comments for bch_btree_node_get() and __bch_btree_node_alloc() Coly Li
2023-11-20 5:25 ` [PATCH 10/10] bcache: avoid NULL checking to c->root in run_cache_set() Coly Li
2023-11-20 16:18 ` [PATCH 00/10] bcache-next 20231120 Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231120052503.6122-7-colyli@suse.de \
--to=colyli@suse.de \
--cc=axboe@kernel.dk \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=mingzhe.zou@easystack.cn \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).