All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bcache: fix deadlock in bcache_allocator()
@ 2019-07-10  9:31 Andrea Righi
  2019-07-10 15:11 ` Coly Li
  0 siblings, 1 reply; 5+ messages in thread
From: Andrea Righi @ 2019-07-10  9:31 UTC (permalink / raw)
  To: Coly Li, Kent Overstreet; +Cc: linux-bcache, linux-kernel

bcache_allocator() can call the following:

 bch_allocator_thread()
  -> bch_prio_write()
     -> bch_bucket_alloc()
        -> wait on &ca->set->bucket_wait

But the wake up event on bucket_wait is supposed to come from
bch_allocator_thread() itself => deadlock:

 [ 242.888435] INFO: task bcache_allocato:9015 blocked for more than 120 seconds.
 [ 242.893786] Not tainted 4.20.0-042000rc3-generic #201811182231
 [ 242.896669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 [ 242.900428] bcache_allocato D 0 9015 2 0x80000000
 [ 242.900434] Call Trace:
 [ 242.900448] __schedule+0x2a2/0x880
 [ 242.900455] ? __schedule+0x2aa/0x880
 [ 242.900462] schedule+0x2c/0x80
 [ 242.900480] bch_bucket_alloc+0x19d/0x380 [bcache]
 [ 242.900503] ? wait_woken+0x80/0x80
 [ 242.900519] bch_prio_write+0x190/0x340 [bcache]
 [ 242.900530] bch_allocator_thread+0x482/0xd10 [bcache]
 [ 242.900535] kthread+0x120/0x140
 [ 242.900546] ? bch_invalidate_one_bucket+0x80/0x80 [bcache]
 [ 242.900549] ? kthread_park+0x90/0x90
 [ 242.900554] ret_from_fork+0x35/0x40

Fix by making the call to bch_prio_write() non-blocking, so that
bch_allocator_thread() never waits on itself.

Moreover, make sure to wake up the garbage collector thread when
bch_prio_write() is failing to allocate buckets.

BugLink: https://bugs.launchpad.net/bugs/1784665
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
---
 drivers/md/bcache/alloc.c  |  6 +++++-
 drivers/md/bcache/bcache.h |  2 +-
 drivers/md/bcache/super.c  | 13 +++++++++----
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index f8986effcb50..0797587600c7 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -377,7 +377,11 @@ static int bch_allocator_thread(void *arg)
 			if (!fifo_full(&ca->free_inc))
 				goto retry_invalidate;
 
-			bch_prio_write(ca);
+			if (bch_prio_write(ca, false) < 0) {
+				ca->invalidate_needs_gc = 1;
+				wake_up_gc(ca->set);
+				goto retry_invalidate;
+			}
 		}
 	}
 out:
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index fdf75352e16a..dc5106b21260 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -979,7 +979,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
 __printf(2, 3)
 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
 
-void bch_prio_write(struct cache *ca);
+int bch_prio_write(struct cache *ca, bool wait);
 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
 
 extern struct workqueue_struct *bcache_wq;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1b63ac876169..6598b457df1a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -525,7 +525,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
 	closure_sync(cl);
 }
 
-void bch_prio_write(struct cache *ca)
+int bch_prio_write(struct cache *ca, bool wait)
 {
 	int i;
 	struct bucket *b;
@@ -560,8 +560,12 @@ void bch_prio_write(struct cache *ca)
 		p->magic	= pset_magic(&ca->sb);
 		p->csum		= bch_crc64(&p->magic, bucket_bytes(ca) - 8);
 
-		bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
-		BUG_ON(bucket == -1);
+		bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
+		if (bucket == -1) {
+			if (!wait)
+				return -ENOMEM;
+			BUG_ON(1);
+		}
 
 		mutex_unlock(&ca->set->bucket_lock);
 		prio_io(ca, bucket, REQ_OP_WRITE, 0);
@@ -589,6 +593,7 @@ void bch_prio_write(struct cache *ca)
 
 		ca->prio_last_buckets[i] = ca->prio_buckets[i];
 	}
+	return 0;
 }
 
 static void prio_read(struct cache *ca, uint64_t bucket)
@@ -1903,7 +1908,7 @@ static int run_cache_set(struct cache_set *c)
 
 		mutex_lock(&c->bucket_lock);
 		for_each_cache(ca, c, i)
-			bch_prio_write(ca);
+			bch_prio_write(ca, true);
 		mutex_unlock(&c->bucket_lock);
 
 		err = "cannot allocate new UUID bucket";
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] bcache: fix deadlock in bcache_allocator()
  2019-07-10  9:31 [PATCH] bcache: fix deadlock in bcache_allocator() Andrea Righi
@ 2019-07-10 15:11 ` Coly Li
  2019-07-10 15:46   ` Andrea Righi
  0 siblings, 1 reply; 5+ messages in thread
From: Coly Li @ 2019-07-10 15:11 UTC (permalink / raw)
  To: Andrea Righi; +Cc: Kent Overstreet, linux-bcache, linux-kernel

On 2019/7/10 5:31 下午, Andrea Righi wrote:
> bcache_allocator() can call the following:
> 
>  bch_allocator_thread()
>   -> bch_prio_write()
>      -> bch_bucket_alloc()
>         -> wait on &ca->set->bucket_wait
> 
> But the wake up event on bucket_wait is supposed to come from
> bch_allocator_thread() itself => deadlock:
> 
>  [ 242.888435] INFO: task bcache_allocato:9015 blocked for more than 120 seconds.
>  [ 242.893786] Not tainted 4.20.0-042000rc3-generic #201811182231
>  [ 242.896669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>  [ 242.900428] bcache_allocato D 0 9015 2 0x80000000
>  [ 242.900434] Call Trace:
>  [ 242.900448] __schedule+0x2a2/0x880
>  [ 242.900455] ? __schedule+0x2aa/0x880
>  [ 242.900462] schedule+0x2c/0x80
>  [ 242.900480] bch_bucket_alloc+0x19d/0x380 [bcache]
>  [ 242.900503] ? wait_woken+0x80/0x80
>  [ 242.900519] bch_prio_write+0x190/0x340 [bcache]
>  [ 242.900530] bch_allocator_thread+0x482/0xd10 [bcache]
>  [ 242.900535] kthread+0x120/0x140
>  [ 242.900546] ? bch_invalidate_one_bucket+0x80/0x80 [bcache]
>  [ 242.900549] ? kthread_park+0x90/0x90
>  [ 242.900554] ret_from_fork+0x35/0x40
> 
> Fix by making the call to bch_prio_write() non-blocking, so that
> bch_allocator_thread() never waits on itself.
> 
> Moreover, make sure to wake up the garbage collector thread when
> bch_prio_write() is failing to allocate buckets.
> 
> BugLink: https://bugs.launchpad.net/bugs/1784665
> Signed-off-by: Andrea Righi <andrea.righi@canonical.com>

Hi Andrea,

From the BugLink, it seems several critical bcache fixes are missing.
Could you please to try current 5.3-rc kernel, and try whether such
problem exists or not ?

For this patch itself, it looks good except that I am not sure whether
invoking garbage collection is a proper method. Because bch_prio_write()
is called right after garbage collection gets done, jump back to
retry_invalidate: again may just hide a non-space long time waiting
condition.

Could you please give me some hint, on how to reproduce such hang
timeout situation. If I am lucky to reproduce such problem on 5.3-rc
kernel, it may be very helpful to understand what exact problem your
patch fixes.

Thanks in advance.

Coly Li


> ---
>  drivers/md/bcache/alloc.c  |  6 +++++-
>  drivers/md/bcache/bcache.h |  2 +-
>  drivers/md/bcache/super.c  | 13 +++++++++----
>  3 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> index f8986effcb50..0797587600c7 100644
> --- a/drivers/md/bcache/alloc.c
> +++ b/drivers/md/bcache/alloc.c
> @@ -377,7 +377,11 @@ static int bch_allocator_thread(void *arg)
>  			if (!fifo_full(&ca->free_inc))
>  				goto retry_invalidate;
>  
> -			bch_prio_write(ca);
> +			if (bch_prio_write(ca, false) < 0) {
> +				ca->invalidate_needs_gc = 1;
> +				wake_up_gc(ca->set);
> +				goto retry_invalidate;
> +			}
>  		}
>  	}
>  out:
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index fdf75352e16a..dc5106b21260 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -979,7 +979,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
>  __printf(2, 3)
>  bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
>  
> -void bch_prio_write(struct cache *ca);
> +int bch_prio_write(struct cache *ca, bool wait);
>  void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
>  
>  extern struct workqueue_struct *bcache_wq;
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 1b63ac876169..6598b457df1a 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -525,7 +525,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
>  	closure_sync(cl);
>  }
>  
> -void bch_prio_write(struct cache *ca)
> +int bch_prio_write(struct cache *ca, bool wait)
>  {
>  	int i;
>  	struct bucket *b;
> @@ -560,8 +560,12 @@ void bch_prio_write(struct cache *ca)
>  		p->magic	= pset_magic(&ca->sb);
>  		p->csum		= bch_crc64(&p->magic, bucket_bytes(ca) - 8);
>  
> -		bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
> -		BUG_ON(bucket == -1);
> +		bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
> +		if (bucket == -1) {
> +			if (!wait)
> +				return -ENOMEM;
> +			BUG_ON(1);
> +		}
>  
>  		mutex_unlock(&ca->set->bucket_lock);
>  		prio_io(ca, bucket, REQ_OP_WRITE, 0);
> @@ -589,6 +593,7 @@ void bch_prio_write(struct cache *ca)
>  
>  		ca->prio_last_buckets[i] = ca->prio_buckets[i];
>  	}
> +	return 0;
>  }
>  
>  static void prio_read(struct cache *ca, uint64_t bucket)
> @@ -1903,7 +1908,7 @@ static int run_cache_set(struct cache_set *c)
>  
>  		mutex_lock(&c->bucket_lock);
>  		for_each_cache(ca, c, i)
> -			bch_prio_write(ca);
> +			bch_prio_write(ca, true);
>  		mutex_unlock(&c->bucket_lock);
>  
>  		err = "cannot allocate new UUID bucket";
> 


-- 

Coly Li

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] bcache: fix deadlock in bcache_allocator()
  2019-07-10 15:11 ` Coly Li
@ 2019-07-10 15:46   ` Andrea Righi
  2019-07-10 15:57     ` Coly Li
  2019-08-06  9:12     ` Andrea Righi
  0 siblings, 2 replies; 5+ messages in thread
From: Andrea Righi @ 2019-07-10 15:46 UTC (permalink / raw)
  To: Coly Li; +Cc: Kent Overstreet, linux-bcache, linux-kernel

On Wed, Jul 10, 2019 at 11:11:37PM +0800, Coly Li wrote:
> On 2019/7/10 5:31 下午, Andrea Righi wrote:
> > bcache_allocator() can call the following:
> > 
> >  bch_allocator_thread()
> >   -> bch_prio_write()
> >      -> bch_bucket_alloc()
> >         -> wait on &ca->set->bucket_wait
> > 
> > But the wake up event on bucket_wait is supposed to come from
> > bch_allocator_thread() itself => deadlock:
> > 
> >  [ 242.888435] INFO: task bcache_allocato:9015 blocked for more than 120 seconds.
> >  [ 242.893786] Not tainted 4.20.0-042000rc3-generic #201811182231
> >  [ 242.896669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> >  [ 242.900428] bcache_allocato D 0 9015 2 0x80000000
> >  [ 242.900434] Call Trace:
> >  [ 242.900448] __schedule+0x2a2/0x880
> >  [ 242.900455] ? __schedule+0x2aa/0x880
> >  [ 242.900462] schedule+0x2c/0x80
> >  [ 242.900480] bch_bucket_alloc+0x19d/0x380 [bcache]
> >  [ 242.900503] ? wait_woken+0x80/0x80
> >  [ 242.900519] bch_prio_write+0x190/0x340 [bcache]
> >  [ 242.900530] bch_allocator_thread+0x482/0xd10 [bcache]
> >  [ 242.900535] kthread+0x120/0x140
> >  [ 242.900546] ? bch_invalidate_one_bucket+0x80/0x80 [bcache]
> >  [ 242.900549] ? kthread_park+0x90/0x90
> >  [ 242.900554] ret_from_fork+0x35/0x40
> > 
> > Fix by making the call to bch_prio_write() non-blocking, so that
> > bch_allocator_thread() never waits on itself.
> > 
> > Moreover, make sure to wake up the garbage collector thread when
> > bch_prio_write() is failing to allocate buckets.
> > 
> > BugLink: https://bugs.launchpad.net/bugs/1784665
> > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> 
> Hi Andrea,
> 

Hi Coly,

> >From the BugLink, it seems several critical bcache fixes are missing.
> Could you please to try current 5.3-rc kernel, and try whether such
> problem exists or not ?

Sure, I'll do a test with the latest 5.3-rc kernel. I just wanna mention
that I've been able to reproduce this problem after backporting all the
fixes (even those from linux-next), but I agree that testing 5.3-rc is a
better idea (I may have introduced bugs while backporting stuff).

> 
> For this patch itself, it looks good except that I am not sure whether
> invoking garbage collection is a proper method. Because bch_prio_write()
> is called right after garbage collection gets done, jump back to
> retry_invalidate: again may just hide a non-space long time waiting
> condition.

Honestly I was thinking the same, but if I don't call the garbage
collector bch_allocator_thread() gets stuck forever (or for a very very
long time) in the retry_invalidate loop...

> 
> Could you please give me some hint, on how to reproduce such hang
> timeout situation. If I am lucky to reproduce such problem on 5.3-rc
> kernel, it may be very helpful to understand what exact problem your
> patch fixes.

Fortunately I have a reproducer, here's the script that I'm using:

---
#!/bin/bash -x

BACKING=/sys/class/block/bcache0
CACHE=/sys/fs/bcache/*-*-*
while true; do
    echo "1" | tee ${BACKING}/bcache/stop
    echo "1" | tee ${CACHE}/stop
    udevadm settle
    [ ! -e "${BACKING}" -a ! -e "${CACHE}" ] && break
    sleep 1
done
wipefs --all --force /dev/vdc2
wipefs --all --force /dev/vdc1
wipefs --all --force /dev/vdc
wipefs --all --force /dev/vdd
blockdev --rereadpt /dev/vdc
blockdev --rereadpt /dev/vdd
udevadm settle

# create ext4 fs over bcache
parted /dev/vdc --script mklabel msdos || exit 1
udevadm settle --exit-if-exists=/dev/vdc
parted /dev/vdc --script mkpart primary 2048s 2047999s || exit 1
udevadm settle --exit-if-exists=/dev/vdc1
parted /dev/vdc --script mkpart primary 2048000s 20922367s || exit 1
udevadm settle --exit-if-exists=/dev/vdc2
make-bcache -C /dev/vdd || exit 1
while true; do
    udevadm settle
    CSET=`ls /sys/fs/bcache | grep -- -`
    [ -n "$CSET" ] && break;
    sleep 1
done
make-bcache -B /dev/vdc2 || exit 1
while true; do
    udevadm settle
    [ -e "${BACKING}" ] && break
    sleep 1;
done
echo $CSET | tee ${BACKING}/bcache/attach
udevadm settle --exit-if-exists=/dev/bcache0
bcache-super-show /dev/vdc2
udevadm settle
mkfs.ext4 -F -L boot-fs -U e9f00d20-95a0-11e8-82a2-525400123401 /dev/vdc1
udevadm settle
mkfs.ext4 -F -L root-fs -U e9f00d21-95a0-11e8-82a2-525400123401 /dev/bcache0 || exit 1
blkid
---

I just run this as root in a busy loop (something like
`while :; do ./test.sh; done`) on a kvm instance with two extra disks
(in addition to the root disk).

The extra disks are created as following:

 qemu-img create -f qcow2 disk1.qcow 10G
 qemu-img create -f qcow2 disk2.qcow 2G

I'm using these particular sizes, but I think we can reproduce the same
problem also using different sizes.

Thanks,
-Andrea

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] bcache: fix deadlock in bcache_allocator()
  2019-07-10 15:46   ` Andrea Righi
@ 2019-07-10 15:57     ` Coly Li
  2019-08-06  9:12     ` Andrea Righi
  1 sibling, 0 replies; 5+ messages in thread
From: Coly Li @ 2019-07-10 15:57 UTC (permalink / raw)
  To: Andrea Righi; +Cc: Kent Overstreet, linux-bcache, linux-kernel

On 2019/7/10 11:46 下午, Andrea Righi wrote:
> On Wed, Jul 10, 2019 at 11:11:37PM +0800, Coly Li wrote:
>> On 2019/7/10 5:31 下午, Andrea Righi wrote:
>>> bcache_allocator() can call the following:
>>>
>>>  bch_allocator_thread()
>>>   -> bch_prio_write()
>>>      -> bch_bucket_alloc()
>>>         -> wait on &ca->set->bucket_wait
>>>
>>> But the wake up event on bucket_wait is supposed to come from
>>> bch_allocator_thread() itself => deadlock:
>>>
>>>  [ 242.888435] INFO: task bcache_allocato:9015 blocked for more than 120 seconds.
>>>  [ 242.893786] Not tainted 4.20.0-042000rc3-generic #201811182231
>>>  [ 242.896669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>>>  [ 242.900428] bcache_allocato D 0 9015 2 0x80000000
>>>  [ 242.900434] Call Trace:
>>>  [ 242.900448] __schedule+0x2a2/0x880
>>>  [ 242.900455] ? __schedule+0x2aa/0x880
>>>  [ 242.900462] schedule+0x2c/0x80
>>>  [ 242.900480] bch_bucket_alloc+0x19d/0x380 [bcache]
>>>  [ 242.900503] ? wait_woken+0x80/0x80
>>>  [ 242.900519] bch_prio_write+0x190/0x340 [bcache]
>>>  [ 242.900530] bch_allocator_thread+0x482/0xd10 [bcache]
>>>  [ 242.900535] kthread+0x120/0x140
>>>  [ 242.900546] ? bch_invalidate_one_bucket+0x80/0x80 [bcache]
>>>  [ 242.900549] ? kthread_park+0x90/0x90
>>>  [ 242.900554] ret_from_fork+0x35/0x40
>>>
>>> Fix by making the call to bch_prio_write() non-blocking, so that
>>> bch_allocator_thread() never waits on itself.
>>>
>>> Moreover, make sure to wake up the garbage collector thread when
>>> bch_prio_write() is failing to allocate buckets.
>>>
>>> BugLink: https://bugs.launchpad.net/bugs/1784665
>>> Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
>>
>> Hi Andrea,
>>
> 
> Hi Coly,
> 

Hi Andrea,

>> >From the BugLink, it seems several critical bcache fixes are missing.
>> Could you please to try current 5.3-rc kernel, and try whether such
>> problem exists or not ?
> 
> Sure, I'll do a test with the latest 5.3-rc kernel. I just wanna mention
> that I've been able to reproduce this problem after backporting all the
> fixes (even those from linux-next), but I agree that testing 5.3-rc is a
> better idea (I may have introduced bugs while backporting stuff).
> 

Do you also back port the patches which are just merged into 5.3-rc ?
There are some fixes for deadlocking problems.

>>
>> For this patch itself, it looks good except that I am not sure whether
>> invoking garbage collection is a proper method. Because bch_prio_write()
>> is called right after garbage collection gets done, jump back to
>> retry_invalidate: again may just hide a non-space long time waiting
>> condition.
> 
> Honestly I was thinking the same, but if I don't call the garbage
> collector bch_allocator_thread() gets stuck forever (or for a very very
> long time) in the retry_invalidate loop...
> 
>>
>> Could you please give me some hint, on how to reproduce such hang
>> timeout situation. If I am lucky to reproduce such problem on 5.3-rc
>> kernel, it may be very helpful to understand what exact problem your
>> patch fixes.
> 
> Fortunately I have a reproducer, here's the script that I'm using:
> 

Great! Let me try this script, thank you very much :-)

Coly Li



> ---
> #!/bin/bash -x
> 
> BACKING=/sys/class/block/bcache0
> CACHE=/sys/fs/bcache/*-*-*
> while true; do
>     echo "1" | tee ${BACKING}/bcache/stop
>     echo "1" | tee ${CACHE}/stop
>     udevadm settle
>     [ ! -e "${BACKING}" -a ! -e "${CACHE}" ] && break
>     sleep 1
> done
> wipefs --all --force /dev/vdc2
> wipefs --all --force /dev/vdc1
> wipefs --all --force /dev/vdc
> wipefs --all --force /dev/vdd
> blockdev --rereadpt /dev/vdc
> blockdev --rereadpt /dev/vdd
> udevadm settle
> 
> # create ext4 fs over bcache
> parted /dev/vdc --script mklabel msdos || exit 1
> udevadm settle --exit-if-exists=/dev/vdc
> parted /dev/vdc --script mkpart primary 2048s 2047999s || exit 1
> udevadm settle --exit-if-exists=/dev/vdc1
> parted /dev/vdc --script mkpart primary 2048000s 20922367s || exit 1
> udevadm settle --exit-if-exists=/dev/vdc2
> make-bcache -C /dev/vdd || exit 1
> while true; do
>     udevadm settle
>     CSET=`ls /sys/fs/bcache | grep -- -`
>     [ -n "$CSET" ] && break;
>     sleep 1
> done
> make-bcache -B /dev/vdc2 || exit 1
> while true; do
>     udevadm settle
>     [ -e "${BACKING}" ] && break
>     sleep 1;
> done
> echo $CSET | tee ${BACKING}/bcache/attach
> udevadm settle --exit-if-exists=/dev/bcache0
> bcache-super-show /dev/vdc2
> udevadm settle
> mkfs.ext4 -F -L boot-fs -U e9f00d20-95a0-11e8-82a2-525400123401 /dev/vdc1
> udevadm settle
> mkfs.ext4 -F -L root-fs -U e9f00d21-95a0-11e8-82a2-525400123401 /dev/bcache0 || exit 1
> blkid
> ---
> 
> I just run this as root in a busy loop (something like
> `while :; do ./test.sh; done`) on a kvm instance with two extra disks
> (in addition to the root disk).
> 
> The extra disks are created as following:
> 
>  qemu-img create -f qcow2 disk1.qcow 10G
>  qemu-img create -f qcow2 disk2.qcow 2G
> 
> I'm using these particular sizes, but I think we can reproduce the same
> problem also using different sizes.
> 
> Thanks,
> -Andrea
> 


-- 

Coly Li

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] bcache: fix deadlock in bcache_allocator()
  2019-07-10 15:46   ` Andrea Righi
  2019-07-10 15:57     ` Coly Li
@ 2019-08-06  9:12     ` Andrea Righi
  1 sibling, 0 replies; 5+ messages in thread
From: Andrea Righi @ 2019-08-06  9:12 UTC (permalink / raw)
  To: Coly Li; +Cc: Kent Overstreet, linux-bcache, linux-kernel

On Wed, Jul 10, 2019 at 05:46:56PM +0200, Andrea Righi wrote:
> On Wed, Jul 10, 2019 at 11:11:37PM +0800, Coly Li wrote:
> > On 2019/7/10 5:31 下午, Andrea Righi wrote:
> > > bcache_allocator() can call the following:
> > > 
> > >  bch_allocator_thread()
> > >   -> bch_prio_write()
> > >      -> bch_bucket_alloc()
> > >         -> wait on &ca->set->bucket_wait
> > > 
> > > But the wake up event on bucket_wait is supposed to come from
> > > bch_allocator_thread() itself => deadlock:
> > > 
> > >  [ 242.888435] INFO: task bcache_allocato:9015 blocked for more than 120 seconds.
> > >  [ 242.893786] Not tainted 4.20.0-042000rc3-generic #201811182231
> > >  [ 242.896669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > >  [ 242.900428] bcache_allocato D 0 9015 2 0x80000000
> > >  [ 242.900434] Call Trace:
> > >  [ 242.900448] __schedule+0x2a2/0x880
> > >  [ 242.900455] ? __schedule+0x2aa/0x880
> > >  [ 242.900462] schedule+0x2c/0x80
> > >  [ 242.900480] bch_bucket_alloc+0x19d/0x380 [bcache]
> > >  [ 242.900503] ? wait_woken+0x80/0x80
> > >  [ 242.900519] bch_prio_write+0x190/0x340 [bcache]
> > >  [ 242.900530] bch_allocator_thread+0x482/0xd10 [bcache]
> > >  [ 242.900535] kthread+0x120/0x140
> > >  [ 242.900546] ? bch_invalidate_one_bucket+0x80/0x80 [bcache]
> > >  [ 242.900549] ? kthread_park+0x90/0x90
> > >  [ 242.900554] ret_from_fork+0x35/0x40
> > > 
> > > Fix by making the call to bch_prio_write() non-blocking, so that
> > > bch_allocator_thread() never waits on itself.
> > > 
> > > Moreover, make sure to wake up the garbage collector thread when
> > > bch_prio_write() is failing to allocate buckets.
> > > 
> > > BugLink: https://bugs.launchpad.net/bugs/1784665
> > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > 
> > Hi Andrea,
> > 
> 
> Hi Coly,
> 
> > >From the BugLink, it seems several critical bcache fixes are missing.
> > Could you please to try current 5.3-rc kernel, and try whether such
> > problem exists or not ?
> 
> Sure, I'll do a test with the latest 5.3-rc kernel. I just wanna mention
> that I've been able to reproduce this problem after backporting all the
> fixes (even those from linux-next), but I agree that testing 5.3-rc is a
> better idea (I may have introduced bugs while backporting stuff).

Finally I've been able to do a test with the latest 5.3.0-rc3 vanilla
kernel (from today's Linus git) and I confirm that I can reproduce the
same deadlock issue:

[ 1158.490744] INFO: task bcache_allocato:15861 blocked for more than 120 seconds.
[ 1158.495929]       Not tainted 5.3.0-050300rc3-generic #201908042232
[ 1158.500653] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1158.504413] bcache_allocato D    0 15861      2 0x80004000
[ 1158.504419] Call Trace:
[ 1158.504429]  __schedule+0x2a8/0x670
[ 1158.504432]  schedule+0x2d/0x90
[ 1158.504448]  bch_bucket_alloc+0xe5/0x370 [bcache]
[ 1158.504453]  ? wait_woken+0x80/0x80
[ 1158.504466]  bch_prio_write+0x1dc/0x390 [bcache]
[ 1158.504476]  bch_allocator_thread+0x233/0x490 [bcache]
[ 1158.504491]  kthread+0x121/0x140
[ 1158.504503]  ? invalidate_buckets+0x890/0x890 [bcache]
[ 1158.504506]  ? kthread_park+0xb0/0xb0
[ 1158.504510]  ret_from_fork+0x35/0x40

[ 1158.473567] INFO: task python3:13282 blocked for more than 120 seconds.
[ 1158.479846]       Not tainted 5.3.0-050300rc3-generic #201908042232
[ 1158.484503] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1158.490237] python3         D    0 13282  13274 0x00004000
[ 1158.490246] Call Trace:
[ 1158.490347]  __schedule+0x2a8/0x670
[ 1158.490360]  ? __switch_to_asm+0x40/0x70
[ 1158.490365]  schedule+0x2d/0x90
[ 1158.490433]  bch_bucket_alloc+0xe5/0x370 [bcache]
[ 1158.490468]  ? wait_woken+0x80/0x80
[ 1158.490484]  __bch_bucket_alloc_set+0x10d/0x160 [bcache]
[ 1158.490497]  bch_bucket_alloc_set+0x4e/0x70 [bcache]
[ 1158.490519]  __uuid_write+0x61/0x180 [bcache]
[ 1158.490538]  ? __write_super+0x154/0x190 [bcache]
[ 1158.490556]  bch_uuid_write+0x16/0x40 [bcache]
[ 1158.490573]  __cached_dev_store+0x668/0x8c0 [bcache]
[ 1158.490592]  bch_cached_dev_store+0x46/0x110 [bcache]
[ 1158.490623]  sysfs_kf_write+0x3c/0x50
[ 1158.490631]  kernfs_fop_write+0x125/0x1a0
[ 1158.490648]  __vfs_write+0x1b/0x40
[ 1158.490654]  vfs_write+0xb1/0x1a0
[ 1158.490658]  ksys_write+0xa7/0xe0
[ 1158.490663]  __x64_sys_write+0x1a/0x20
[ 1158.490675]  do_syscall_64+0x5a/0x130
[ 1158.490685]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

A better reproducer have been posted here:
https://launchpadlibrarian.net/435523192/curtin-nvme.sh

(see https://bugs.launchpad.net/curtin/+bug/1796292 for more details)

With this new reproducer script is very easy to hit the deadlock.

I've slightly modified my original patch and with that applied it seems
that I can't trigger any problem. I'm not sure if my patch is actually
the right thing to do, but it seems to prevent the deadlock from
happening.

I'll send a v2 soon.

-Andrea

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-08-06  9:12 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-10  9:31 [PATCH] bcache: fix deadlock in bcache_allocator() Andrea Righi
2019-07-10 15:11 ` Coly Li
2019-07-10 15:46   ` Andrea Righi
2019-07-10 15:57     ` Coly Li
2019-08-06  9:12     ` Andrea Righi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.