All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
@ 2017-01-23 13:20 colyli
  2017-01-23 14:16 ` Greg KH
  0 siblings, 1 reply; 9+ messages in thread
From: colyli @ 2017-01-23 13:20 UTC (permalink / raw)
  To: stable; +Cc: Kent Overstreet

Hi stable maintainers,

This patch is from Kent, upstream commit ID is be628be09563.
Olav Reinert <seroton10@gmail.com> reports a kerenl crash from
bcache (boo#1021260) and Oliver Nuekum points out this patch fixes the problem. 

I send this patch to stable@kernel.vger.org, hope this patch can be taken care
in stable kernels.

Thanks in advance.

Coly Li

Here I attach the original patch, just FYI.
---
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Wed, 26 Oct 2016 20:31:17 -0700
Subject: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
---
 drivers/md/bcache/bcache.h  |  4 ++--
 drivers/md/bcache/btree.c   | 39 ++++++++++++++++++++-------------------
 drivers/md/bcache/btree.h   |  3 +--
 drivers/md/bcache/request.c |  4 +---
 drivers/md/bcache/super.c   |  2 ++
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6b420a5..c3ea03c 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -425,7 +425,7 @@ struct cache {
 	 * until a gc finishes - otherwise we could pointlessly burn a ton of
 	 * cpu
 	 */
-	unsigned		invalidate_needs_gc:1;
+	unsigned		invalidate_needs_gc;
 
 	bool			discard; /* Get rid of? */
 
@@ -593,8 +593,8 @@ struct cache_set {
 
 	/* Counts how many sectors bio_insert has added to the cache */
 	atomic_t		sectors_to_gc;
+	wait_queue_head_t	gc_wait;
 
-	wait_queue_head_t	moving_gc_wait;
 	struct keybuf		moving_gc_keys;
 	/* Number of moving GC bios in flight */
 	struct semaphore	moving_in_flight;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 6fdd8e2..a43eedd 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1757,32 +1757,34 @@ static void bch_btree_gc(struct cache_set *c)
 	bch_moving_gc(c);
 }
 
-static int bch_gc_thread(void *arg)
+static bool gc_should_run(struct cache_set *c)
 {
-	struct cache_set *c = arg;
 	struct cache *ca;
 	unsigned i;
 
-	while (1) {
-again:
-		bch_btree_gc(c);
+	for_each_cache(ca, c, i)
+		if (ca->invalidate_needs_gc)
+			return true;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (kthread_should_stop())
-			break;
+	if (atomic_read(&c->sectors_to_gc) < 0)
+		return true;
 
-		mutex_lock(&c->bucket_lock);
+	return false;
+}
 
-		for_each_cache(ca, c, i)
-			if (ca->invalidate_needs_gc) {
-				mutex_unlock(&c->bucket_lock);
-				set_current_state(TASK_RUNNING);
-				goto again;
-			}
+static int bch_gc_thread(void *arg)
+{
+	struct cache_set *c = arg;
 
-		mutex_unlock(&c->bucket_lock);
+	while (1) {
+		wait_event_interruptible(c->gc_wait,
+			   kthread_should_stop() || gc_should_run(c));
 
-		schedule();
+		if (kthread_should_stop())
+			break;
+
+		set_gc_sectors(c);
+		bch_btree_gc(c);
 	}
 
 	return 0;
@@ -1790,11 +1792,10 @@ static int bch_gc_thread(void *arg)
 
 int bch_gc_thread_start(struct cache_set *c)
 {
-	c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
+	c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
 	if (IS_ERR(c->gc_thread))
 		return PTR_ERR(c->gc_thread);
 
-	set_task_state(c->gc_thread, TASK_INTERRUPTIBLE);
 	return 0;
 }
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 5c391fa..9b80417 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -260,8 +260,7 @@ void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
 
 static inline void wake_up_gc(struct cache_set *c)
 {
-	if (c->gc_thread)
-		wake_up_process(c->gc_thread);
+	wake_up(&c->gc_wait);
 }
 
 #define MAP_DONE	0
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index f49c541..76d2087 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -196,10 +196,8 @@ static void bch_data_insert_start(struct closure *cl)
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 	struct bio *bio = op->bio, *n;
 
-	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
-		set_gc_sectors(op->c);
+	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
 		wake_up_gc(op->c);
-	}
 
 	if (op->bypass)
 		return bch_data_invalidate(cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 2fb5bfe..b33dd3b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1489,6 +1489,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	mutex_init(&c->bucket_lock);
 	init_waitqueue_head(&c->btree_cache_wait);
 	init_waitqueue_head(&c->bucket_wait);
+	init_waitqueue_head(&c->gc_wait);
 	sema_init(&c->uuid_write_mutex, 1);
 
 	spin_lock_init(&c->btree_gc_time.lock);
@@ -1548,6 +1549,7 @@ static void run_cache_set(struct cache_set *c)
 
 	for_each_cache(ca, c, i)
 		c->nbuckets += ca->sb.nbuckets;
+	set_gc_sectors(c);
 
 	if (CACHE_SYNC(&c->sb)) {
 		LIST_HEAD(journal);
-- 
2.6.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-01-23 13:20 [PATCH] bcache: Make gc wakeup sane, remove set_task_state() colyli
@ 2017-01-23 14:16 ` Greg KH
  2017-01-23 14:45   ` Coly Li
  0 siblings, 1 reply; 9+ messages in thread
From: Greg KH @ 2017-01-23 14:16 UTC (permalink / raw)
  To: colyli; +Cc: stable, Kent Overstreet

On Mon, Jan 23, 2017 at 09:20:12PM +0800, colyli@suse.de wrote:
> Hi stable maintainers,
> 
> This patch is from Kent, upstream commit ID is be628be09563.
> Olav Reinert <seroton10@gmail.com> reports a kerenl crash from
> bcache (boo#1021260) and Oliver Nuekum points out this patch fixes the problem. 

"boo"?

> I send this patch to stable@kernel.vger.org, hope this patch can be taken care
> in stable kernels.
> 
> Thanks in advance.
> 
> Coly Li
> 
> Here I attach the original patch, just FYI.
> ---
> From: Kent Overstreet <kent.overstreet@gmail.com>
> Date: Wed, 26 Oct 2016 20:31:17 -0700
> Subject: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>

No changelog text?  Worst short changelog description ever?

This gives me no context of what is going on here.  Why does this fix a
bug?  What kernel(s) should it be backported to?

I need some more help here please.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-01-23 14:16 ` Greg KH
@ 2017-01-23 14:45   ` Coly Li
  2017-01-23 14:54     ` Greg KH
  0 siblings, 1 reply; 9+ messages in thread
From: Coly Li @ 2017-01-23 14:45 UTC (permalink / raw)
  To: Greg KH; +Cc: stable, Kent Overstreet

On 2017/1/23 下午10:16, Greg KH wrote:
> On Mon, Jan 23, 2017 at 09:20:12PM +0800, colyli@suse.de wrote:
>> Hi stable maintainers,
>>
>> This patch is from Kent, upstream commit ID is be628be09563.
>> Olav Reinert <seroton10@gmail.com> reports a kerenl crash from
>> bcache (boo#1021260) and Oliver Nuekum points out this patch fixes the problem. 
> 
> "boo"?
> 

Hi Greg,


"boo" is abbreviation of bugzilla.opensuse.org, I paste the original bug
report here,
==== start of bug report ==========
I have starting seeing errors like the one quoted below in the system
log. It occurs infrequently, but quite regularly, about 1-3 times a
week, on a server running 24x7.

Around the time it began, I started running a beta version of Leap 42.2,
upgraded from 42.1. Also, I enabled the "discard" option (SSD TRIM) on
the bcache cache about 3-6 months ago. I believe one of those two events
caused the bug to appear.

Not sure what other info is useful, please ask for whatever you need.


Oct 10 00:00:02 blackbox kernel: ------------[ cut here ]------------
Oct 10 00:00:02 blackbox kernel: WARNING: CPU: 4 PID: 1269 at
../kernel/sched/core.c:7891 __might_sleep+0x76/0x80()
Oct 10 00:00:02 blackbox kernel: do not call blocking ops when
!TASK_RUNNING; state=1 set at [<ffffffffa09e2325>]
bch_gc_thread+0x25/0x100 [
Oct 10 00:00:02 blackbox kernel: Modules linked in: vhost_net vhost
macvtap macvlan fuse ebt_arp ebt_ip ebtable_nat ebtable_filter ebtables
Oct 10 00:00:02 blackbox kernel:  mxm_wmi
Oct 10 00:00:02 blackbox kernel:  bcache aesni_intel raid1
snd_hda_codec_realtek aes_x86_64 lrw snd_hda_codec_generic gf128mul
md_mod glue_h
Oct 10 00:00:02 blackbox kernel:
Oct 10 00:00:02 blackbox kernel: CPU: 4 PID: 1269 Comm: bcache_gc Not
tainted 4.4.21-2-default #1
Oct 10 00:00:02 blackbox kernel: Hardware name: To be filled by O.E.M.
To be filled by O.E.M./M5A99X EVO R2.0, BIOS 2301 01/06/2014
Oct 10 00:00:02 blackbox kernel:  0000000000000000 ffffffff81326967
ffff8800b605be10 ffffffff81a5e431
Oct 10 00:00:02 blackbox kernel:  ffffffff8107e7d1 ffffffff81a5f54f
ffff8800b605be60 0000000000000061
Oct 10 00:00:02 blackbox kernel:  0000000000000000
Oct 10 00:00:02 blackbox kernel:  0000000000000000 ffffffff8107e84c
ffffffff81a4ef88
Oct 10 00:00:02 blackbox kernel: Call Trace:
Oct 10 00:00:02 blackbox kernel:  [<ffffffff81019e69>] dump_trace+0x59/0x320
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8101a22a>]
show_stack_log_lvl+0xfa/0x180
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8101afd1>] show_stack+0x21/0x40
Oct 10 00:00:02 blackbox kernel:  [<ffffffff81326967>] dump_stack+0x5c/0x85
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8107e7d1>]
warn_slowpath_common+0x81/0xb0
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8107e84c>]
warn_slowpath_fmt+0x4c/0x50
Oct 10 00:00:02 blackbox kernel:  [<ffffffff810a3026>]
__might_sleep+0x76/0x80
Oct 10 00:00:02 blackbox kernel:  [<ffffffff81605cac>] mutex_lock+0x1c/0x38
Oct 10 00:00:02 blackbox kernel:  [<ffffffffa09e2365>]
bch_gc_thread+0x65/0x100 [bcache]
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8109d268>] kthread+0xc8/0xe0
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8160828f>]
ret_from_fork+0x3f/0x70
Oct 10 00:00:02 blackbox kernel: DWARF2 unwinder stuck at
ret_from_fork+0x3f/0x70
Oct 10 00:00:02 blackbox kernel:
Oct 10 00:00:02 blackbox kernel: Leftover inexact backtrace:
Oct 10 00:00:02 blackbox kernel:  [<ffffffff8109d1a0>] ?
kthread_park+0x50/0x50
Oct 10 00:00:02 blackbox kernel: ---[ end trace c63abcb6c473e79b ]---
==== end of bug report ==========


# journalctl|grep "blocking ops"
Oct 10 00:00:02 blackbox kernel: do not call blocking ops when
!TASK_RUNNING; state=1 set at [<ffffffffa09e2325>]
bch_gc_thread+0x25/0x100 [bcache]
[snip repeated lines]



>> I send this patch to stable@kernel.vger.org, hope this patch can be taken care
>> in stable kernels.
>>
>> Thanks in advance.
>>
>> Coly Li
>>
>> Here I attach the original patch, just FYI.
>> ---
>> From: Kent Overstreet <kent.overstreet@gmail.com>
>> Date: Wed, 26 Oct 2016 20:31:17 -0700
>> Subject: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
>>
>> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
> 
> No changelog text?  Worst short changelog description ever?

There is no change log from original patch, I am not the author, and it
is in upstream already. So I think I am not the right person to change
its commit log.

This is the first time I encounter this situation, that send a patch to
stable which is not from me. I guess Kent does not notice that this
patch indeed fixes a kernel oops. But it does fix a bug report for Leap
42.2 and SLE12-SP2.
> 
> This gives me no context of what is going on here.  Why does this fix a
> bug?  What kernel(s) should it be backported to?
> 

The bug is reported on Linux 4.4 based kernel, so at least all kernels
since Linux 4.4 should have the fix. Maybe Kent can provide more
accurate suggestion.

Thanks.

Coly



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-01-23 14:45   ` Coly Li
@ 2017-01-23 14:54     ` Greg KH
  2017-02-20 12:31       ` Greg KH
  0 siblings, 1 reply; 9+ messages in thread
From: Greg KH @ 2017-01-23 14:54 UTC (permalink / raw)
  To: Coly Li; +Cc: stable, Kent Overstreet

On Mon, Jan 23, 2017 at 10:45:47PM +0800, Coly Li wrote:
> On 2017/1/23 下午10:16, Greg KH wrote:
> > On Mon, Jan 23, 2017 at 09:20:12PM +0800, colyli@suse.de wrote:
> >> Hi stable maintainers,
> >>
> >> This patch is from Kent, upstream commit ID is be628be09563.
> >> Olav Reinert <seroton10@gmail.com> reports a kerenl crash from
> >> bcache (boo#1021260) and Oliver Nuekum points out this patch fixes the problem. 
> > 
> > "boo"?
> > 
> 
> Hi Greg,
> 
> 
> "boo" is abbreviation of bugzilla.opensuse.org, I paste the original bug
> report here,
> ==== start of bug report ==========
> I have starting seeing errors like the one quoted below in the system
> log. It occurs infrequently, but quite regularly, about 1-3 times a
> week, on a server running 24x7.
> 
> Around the time it began, I started running a beta version of Leap 42.2,
> upgraded from 42.1. Also, I enabled the "discard" option (SSD TRIM) on
> the bcache cache about 3-6 months ago. I believe one of those two events
> caused the bug to appear.
> 
> Not sure what other info is useful, please ask for whatever you need.
> 
> 
> Oct 10 00:00:02 blackbox kernel: ------------[ cut here ]------------
> Oct 10 00:00:02 blackbox kernel: WARNING: CPU: 4 PID: 1269 at
> ../kernel/sched/core.c:7891 __might_sleep+0x76/0x80()
> Oct 10 00:00:02 blackbox kernel: do not call blocking ops when
> !TASK_RUNNING; state=1 set at [<ffffffffa09e2325>]
> bch_gc_thread+0x25/0x100 [
> Oct 10 00:00:02 blackbox kernel: Modules linked in: vhost_net vhost
> macvtap macvlan fuse ebt_arp ebt_ip ebtable_nat ebtable_filter ebtables
> Oct 10 00:00:02 blackbox kernel:  mxm_wmi
> Oct 10 00:00:02 blackbox kernel:  bcache aesni_intel raid1
> snd_hda_codec_realtek aes_x86_64 lrw snd_hda_codec_generic gf128mul
> md_mod glue_h
> Oct 10 00:00:02 blackbox kernel:
> Oct 10 00:00:02 blackbox kernel: CPU: 4 PID: 1269 Comm: bcache_gc Not
> tainted 4.4.21-2-default #1
> Oct 10 00:00:02 blackbox kernel: Hardware name: To be filled by O.E.M.
> To be filled by O.E.M./M5A99X EVO R2.0, BIOS 2301 01/06/2014
> Oct 10 00:00:02 blackbox kernel:  0000000000000000 ffffffff81326967
> ffff8800b605be10 ffffffff81a5e431
> Oct 10 00:00:02 blackbox kernel:  ffffffff8107e7d1 ffffffff81a5f54f
> ffff8800b605be60 0000000000000061
> Oct 10 00:00:02 blackbox kernel:  0000000000000000
> Oct 10 00:00:02 blackbox kernel:  0000000000000000 ffffffff8107e84c
> ffffffff81a4ef88
> Oct 10 00:00:02 blackbox kernel: Call Trace:
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff81019e69>] dump_trace+0x59/0x320
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8101a22a>]
> show_stack_log_lvl+0xfa/0x180
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8101afd1>] show_stack+0x21/0x40
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff81326967>] dump_stack+0x5c/0x85
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8107e7d1>]
> warn_slowpath_common+0x81/0xb0
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8107e84c>]
> warn_slowpath_fmt+0x4c/0x50
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff810a3026>]
> __might_sleep+0x76/0x80
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff81605cac>] mutex_lock+0x1c/0x38
> Oct 10 00:00:02 blackbox kernel:  [<ffffffffa09e2365>]
> bch_gc_thread+0x65/0x100 [bcache]
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8109d268>] kthread+0xc8/0xe0
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8160828f>]
> ret_from_fork+0x3f/0x70
> Oct 10 00:00:02 blackbox kernel: DWARF2 unwinder stuck at
> ret_from_fork+0x3f/0x70
> Oct 10 00:00:02 blackbox kernel:
> Oct 10 00:00:02 blackbox kernel: Leftover inexact backtrace:
> Oct 10 00:00:02 blackbox kernel:  [<ffffffff8109d1a0>] ?
> kthread_park+0x50/0x50
> Oct 10 00:00:02 blackbox kernel: ---[ end trace c63abcb6c473e79b ]---
> ==== end of bug report ==========
> 
> 
> # journalctl|grep "blocking ops"
> Oct 10 00:00:02 blackbox kernel: do not call blocking ops when
> !TASK_RUNNING; state=1 set at [<ffffffffa09e2325>]
> bch_gc_thread+0x25/0x100 [bcache]
> [snip repeated lines]
> 
> 
> 
> >> I send this patch to stable@kernel.vger.org, hope this patch can be taken care
> >> in stable kernels.
> >>
> >> Thanks in advance.
> >>
> >> Coly Li
> >>
> >> Here I attach the original patch, just FYI.
> >> ---
> >> From: Kent Overstreet <kent.overstreet@gmail.com>
> >> Date: Wed, 26 Oct 2016 20:31:17 -0700
> >> Subject: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
> >>
> >> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
> > 
> > No changelog text?  Worst short changelog description ever?
> 
> There is no change log from original patch, I am not the author, and it
> is in upstream already. So I think I am not the right person to change
> its commit log.

Oh, I didn't mean to complain to you, my complain was to Kent.

Kent, please go read the section, "The canonical patch format" in the
Documentation/SubmittingPatches file for how to do this properly.

> This is the first time I encounter this situation, that send a patch to
> stable which is not from me. I guess Kent does not notice that this
> patch indeed fixes a kernel oops. But it does fix a bug report for Leap
> 42.2 and SLE12-SP2.
> > 
> > This gives me no context of what is going on here.  Why does this fix a
> > bug?  What kernel(s) should it be backported to?
> > 
> 
> The bug is reported on Linux 4.4 based kernel, so at least all kernels
> since Linux 4.4 should have the fix. Maybe Kent can provide more
> accurate suggestion.

Kent, any hints?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-01-23 14:54     ` Greg KH
@ 2017-02-20 12:31       ` Greg KH
  2017-02-20 13:12         ` Kent Overstreet
  0 siblings, 1 reply; 9+ messages in thread
From: Greg KH @ 2017-02-20 12:31 UTC (permalink / raw)
  To: Coly Li; +Cc: stable, Kent Overstreet

On Mon, Jan 23, 2017 at 03:54:07PM +0100, Greg KH wrote:
> On Mon, Jan 23, 2017 at 10:45:47PM +0800, Coly Li wrote:
> > On 2017/1/23 下午10:16, Greg KH wrote:
> > > On Mon, Jan 23, 2017 at 09:20:12PM +0800, colyli@suse.de wrote:
> > >> Hi stable maintainers,
> > >>
> > >> This patch is from Kent, upstream commit ID is be628be09563.
> > >> Olav Reinert <seroton10@gmail.com> reports a kerenl crash from
> > >> bcache (boo#1021260) and Oliver Nuekum points out this patch fixes the problem. 
> > > 
> > > "boo"?
> > > 
> > 
> > Hi Greg,
> > 
> > 
> > "boo" is abbreviation of bugzilla.opensuse.org, I paste the original bug
> > report here,
> > ==== start of bug report ==========
> > I have starting seeing errors like the one quoted below in the system
> > log. It occurs infrequently, but quite regularly, about 1-3 times a
> > week, on a server running 24x7.
> > 
> > Around the time it began, I started running a beta version of Leap 42.2,
> > upgraded from 42.1. Also, I enabled the "discard" option (SSD TRIM) on
> > the bcache cache about 3-6 months ago. I believe one of those two events
> > caused the bug to appear.
> > 
> > Not sure what other info is useful, please ask for whatever you need.
> > 
> > 
> > Oct 10 00:00:02 blackbox kernel: ------------[ cut here ]------------
> > Oct 10 00:00:02 blackbox kernel: WARNING: CPU: 4 PID: 1269 at
> > ../kernel/sched/core.c:7891 __might_sleep+0x76/0x80()
> > Oct 10 00:00:02 blackbox kernel: do not call blocking ops when
> > !TASK_RUNNING; state=1 set at [<ffffffffa09e2325>]
> > bch_gc_thread+0x25/0x100 [
> > Oct 10 00:00:02 blackbox kernel: Modules linked in: vhost_net vhost
> > macvtap macvlan fuse ebt_arp ebt_ip ebtable_nat ebtable_filter ebtables
> > Oct 10 00:00:02 blackbox kernel:  mxm_wmi
> > Oct 10 00:00:02 blackbox kernel:  bcache aesni_intel raid1
> > snd_hda_codec_realtek aes_x86_64 lrw snd_hda_codec_generic gf128mul
> > md_mod glue_h
> > Oct 10 00:00:02 blackbox kernel:
> > Oct 10 00:00:02 blackbox kernel: CPU: 4 PID: 1269 Comm: bcache_gc Not
> > tainted 4.4.21-2-default #1
> > Oct 10 00:00:02 blackbox kernel: Hardware name: To be filled by O.E.M.
> > To be filled by O.E.M./M5A99X EVO R2.0, BIOS 2301 01/06/2014
> > Oct 10 00:00:02 blackbox kernel:  0000000000000000 ffffffff81326967
> > ffff8800b605be10 ffffffff81a5e431
> > Oct 10 00:00:02 blackbox kernel:  ffffffff8107e7d1 ffffffff81a5f54f
> > ffff8800b605be60 0000000000000061
> > Oct 10 00:00:02 blackbox kernel:  0000000000000000
> > Oct 10 00:00:02 blackbox kernel:  0000000000000000 ffffffff8107e84c
> > ffffffff81a4ef88
> > Oct 10 00:00:02 blackbox kernel: Call Trace:
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff81019e69>] dump_trace+0x59/0x320
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8101a22a>]
> > show_stack_log_lvl+0xfa/0x180
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8101afd1>] show_stack+0x21/0x40
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff81326967>] dump_stack+0x5c/0x85
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8107e7d1>]
> > warn_slowpath_common+0x81/0xb0
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8107e84c>]
> > warn_slowpath_fmt+0x4c/0x50
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff810a3026>]
> > __might_sleep+0x76/0x80
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff81605cac>] mutex_lock+0x1c/0x38
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffffa09e2365>]
> > bch_gc_thread+0x65/0x100 [bcache]
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8109d268>] kthread+0xc8/0xe0
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8160828f>]
> > ret_from_fork+0x3f/0x70
> > Oct 10 00:00:02 blackbox kernel: DWARF2 unwinder stuck at
> > ret_from_fork+0x3f/0x70
> > Oct 10 00:00:02 blackbox kernel:
> > Oct 10 00:00:02 blackbox kernel: Leftover inexact backtrace:
> > Oct 10 00:00:02 blackbox kernel:  [<ffffffff8109d1a0>] ?
> > kthread_park+0x50/0x50
> > Oct 10 00:00:02 blackbox kernel: ---[ end trace c63abcb6c473e79b ]---
> > ==== end of bug report ==========
> > 
> > 
> > # journalctl|grep "blocking ops"
> > Oct 10 00:00:02 blackbox kernel: do not call blocking ops when
> > !TASK_RUNNING; state=1 set at [<ffffffffa09e2325>]
> > bch_gc_thread+0x25/0x100 [bcache]
> > [snip repeated lines]
> > 
> > 
> > 
> > >> I send this patch to stable@kernel.vger.org, hope this patch can be taken care
> > >> in stable kernels.
> > >>
> > >> Thanks in advance.
> > >>
> > >> Coly Li
> > >>
> > >> Here I attach the original patch, just FYI.
> > >> ---
> > >> From: Kent Overstreet <kent.overstreet@gmail.com>
> > >> Date: Wed, 26 Oct 2016 20:31:17 -0700
> > >> Subject: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
> > >>
> > >> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
> > > 
> > > No changelog text?  Worst short changelog description ever?
> > 
> > There is no change log from original patch, I am not the author, and it
> > is in upstream already. So I think I am not the right person to change
> > its commit log.
> 
> Oh, I didn't mean to complain to you, my complain was to Kent.
> 
> Kent, please go read the section, "The canonical patch format" in the
> Documentation/SubmittingPatches file for how to do this properly.
> 
> > This is the first time I encounter this situation, that send a patch to
> > stable which is not from me. I guess Kent does not notice that this
> > patch indeed fixes a kernel oops. But it does fix a bug report for Leap
> > 42.2 and SLE12-SP2.
> > > 
> > > This gives me no context of what is going on here.  Why does this fix a
> > > bug?  What kernel(s) should it be backported to?
> > > 
> > 
> > The bug is reported on Linux 4.4 based kernel, so at least all kernels
> > since Linux 4.4 should have the fix. Maybe Kent can provide more
> > accurate suggestion.
> 
> Kent, any hints?

Without a response from the maintainer, I can't apply this...

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-02-20 12:31       ` Greg KH
@ 2017-02-20 13:12         ` Kent Overstreet
  2017-02-20 14:06           ` Greg KH
  0 siblings, 1 reply; 9+ messages in thread
From: Kent Overstreet @ 2017-02-20 13:12 UTC (permalink / raw)
  To: Greg KH; +Cc: Coly Li, stable

On Mon, Feb 20, 2017 at 01:31:37PM +0100, Greg KH wrote:
> On Mon, Jan 23, 2017 at 03:54:07PM +0100, Greg KH wrote:
> > Kent, any hints?
> 
> Without a response from the maintainer, I can't apply this...
> 
> greg k-h

Sorry I missed this - yes, this patch should be safe to apply and it does fix
that crash. The relevant code hasn't been changed in ages, 4.4 is definitely
fine.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-02-20 13:12         ` Kent Overstreet
@ 2017-02-20 14:06           ` Greg KH
  2017-02-20 14:36             ` Coly Li
  0 siblings, 1 reply; 9+ messages in thread
From: Greg KH @ 2017-02-20 14:06 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Coly Li, stable

On Mon, Feb 20, 2017 at 04:12:58AM -0900, Kent Overstreet wrote:
> On Mon, Feb 20, 2017 at 01:31:37PM +0100, Greg KH wrote:
> > On Mon, Jan 23, 2017 at 03:54:07PM +0100, Greg KH wrote:
> > > Kent, any hints?
> > 
> > Without a response from the maintainer, I can't apply this...
> > 
> > greg k-h
> 
> Sorry I missed this - yes, this patch should be safe to apply and it does fix
> that crash. The relevant code hasn't been changed in ages, 4.4 is definitely
> fine.

Ok, I've queued this up for 4.9, but for 4.4 it does not apply.  Coly,
can you provide a working backport for 4.4-stable?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-02-20 14:06           ` Greg KH
@ 2017-02-20 14:36             ` Coly Li
  2017-02-20 15:19               ` Greg KH
  0 siblings, 1 reply; 9+ messages in thread
From: Coly Li @ 2017-02-20 14:36 UTC (permalink / raw)
  To: Greg KH; +Cc: Kent Overstreet, stable

[-- Attachment #1: Type: text/plain, Size: 932 bytes --]

On 2017/2/20 下午10:06, Greg KH wrote:
> On Mon, Feb 20, 2017 at 04:12:58AM -0900, Kent Overstreet wrote:
>> On Mon, Feb 20, 2017 at 01:31:37PM +0100, Greg KH wrote:
>>> On Mon, Jan 23, 2017 at 03:54:07PM +0100, Greg KH wrote:
>>>> Kent, any hints?
>>>
>>> Without a response from the maintainer, I can't apply this...
>>>
>>> greg k-h
>>
>> Sorry I missed this - yes, this patch should be safe to apply and it does fix
>> that crash. The relevant code hasn't been changed in ages, 4.4 is definitely
>> fine.
> 
> Ok, I've queued this up for 4.9, but for 4.4 it does not apply.  Coly,
> can you provide a working backport for 4.4-stable?

Greg,

It is because the 'commit 29e6c57cc78e ("bcache: bch_gc_thread() is not
freezable")' remove a "try_to_freeze()" in bch_gc_thread(), which
happens in v4.7.

I just rebase Kent's fix to v4.4 kernel, solve the conflict. Could you
please check and try the attached patch ?

Thanks.

Coly

[-- Attachment #2: 0001-bcache-Make-gc-wakeup-sane-remove-set_task_state.patch --]
[-- Type: text/plain, Size: 4404 bytes --]

Subject: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
---
 drivers/md/bcache/bcache.h  |  4 ++--
 drivers/md/bcache/btree.c   | 40 ++++++++++++++++++++--------------------
 drivers/md/bcache/btree.h   |  3 +--
 drivers/md/bcache/request.c |  4 +---
 drivers/md/bcache/super.c   |  2 ++
 5 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6b420a5..c3ea03c 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -425,7 +425,7 @@ struct cache {
 	 * until a gc finishes - otherwise we could pointlessly burn a ton of
 	 * cpu
 	 */
-	unsigned		invalidate_needs_gc:1;
+	unsigned		invalidate_needs_gc;
 
 	bool			discard; /* Get rid of? */
 
@@ -593,8 +593,8 @@ struct cache_set {
 
 	/* Counts how many sectors bio_insert has added to the cache */
 	atomic_t		sectors_to_gc;
+	wait_queue_head_t	gc_wait;
 
-	wait_queue_head_t	moving_gc_wait;
 	struct keybuf		moving_gc_keys;
 	/* Number of moving GC bios in flight */
 	struct semaphore	moving_in_flight;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 83392f8..b5eccb5 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1761,33 +1761,34 @@ static void bch_btree_gc(struct cache_set *c)
 	bch_moving_gc(c);
 }
 
-static int bch_gc_thread(void *arg)
+static bool gc_should_run(struct cache_set *c)
 {
-	struct cache_set *c = arg;
 	struct cache *ca;
 	unsigned i;
 
-	while (1) {
-again:
-		bch_btree_gc(c);
+	for_each_cache(ca, c, i)
+		if (ca->invalidate_needs_gc)
+			return true;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (kthread_should_stop())
-			break;
+	if (atomic_read(&c->sectors_to_gc) < 0)
+		return true;
 
-		mutex_lock(&c->bucket_lock);
+	return false;
+}
 
-		for_each_cache(ca, c, i)
-			if (ca->invalidate_needs_gc) {
-				mutex_unlock(&c->bucket_lock);
-				set_current_state(TASK_RUNNING);
-				goto again;
-			}
+static int bch_gc_thread(void *arg)
+{
+	struct cache_set *c = arg;
 
-		mutex_unlock(&c->bucket_lock);
+	while (1) {
+		wait_event_interruptible(c->gc_wait,
+			   kthread_should_stop() || gc_should_run(c));
 
-		try_to_freeze();
-		schedule();
+		if (kthread_should_stop())
+			break;
+
+		set_gc_sectors(c);
+		bch_btree_gc(c);
 	}
 
 	return 0;
@@ -1795,11 +1796,10 @@ again:
 
 int bch_gc_thread_start(struct cache_set *c)
 {
-	c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
+	c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
 	if (IS_ERR(c->gc_thread))
 		return PTR_ERR(c->gc_thread);
 
-	set_task_state(c->gc_thread, TASK_INTERRUPTIBLE);
 	return 0;
 }
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 5c391fa..9b80417 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -260,8 +260,7 @@ void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
 
 static inline void wake_up_gc(struct cache_set *c)
 {
-	if (c->gc_thread)
-		wake_up_process(c->gc_thread);
+	wake_up(&c->gc_wait);
 }
 
 #define MAP_DONE	0
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 25fa844..2410df1 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -196,10 +196,8 @@ static void bch_data_insert_start(struct closure *cl)
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 	struct bio *bio = op->bio, *n;
 
-	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
-		set_gc_sectors(op->c);
+	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
 		wake_up_gc(op->c);
-	}
 
 	if (op->bypass)
 		return bch_data_invalidate(cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 679a093..81fef23 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1474,6 +1474,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	mutex_init(&c->bucket_lock);
 	init_waitqueue_head(&c->btree_cache_wait);
 	init_waitqueue_head(&c->bucket_wait);
+	init_waitqueue_head(&c->gc_wait);
 	sema_init(&c->uuid_write_mutex, 1);
 
 	spin_lock_init(&c->btree_gc_time.lock);
@@ -1532,6 +1533,7 @@ static void run_cache_set(struct cache_set *c)
 
 	for_each_cache(ca, c, i)
 		c->nbuckets += ca->sb.nbuckets;
+	set_gc_sectors(c);
 
 	if (CACHE_SYNC(&c->sb)) {
 		LIST_HEAD(journal);
-- 
2.10.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] bcache: Make gc wakeup sane, remove set_task_state()
  2017-02-20 14:36             ` Coly Li
@ 2017-02-20 15:19               ` Greg KH
  0 siblings, 0 replies; 9+ messages in thread
From: Greg KH @ 2017-02-20 15:19 UTC (permalink / raw)
  To: Coly Li; +Cc: Kent Overstreet, stable

On Mon, Feb 20, 2017 at 10:36:13PM +0800, Coly Li wrote:
> On 2017/2/20 下午10:06, Greg KH wrote:
> > On Mon, Feb 20, 2017 at 04:12:58AM -0900, Kent Overstreet wrote:
> >> On Mon, Feb 20, 2017 at 01:31:37PM +0100, Greg KH wrote:
> >>> On Mon, Jan 23, 2017 at 03:54:07PM +0100, Greg KH wrote:
> >>>> Kent, any hints?
> >>>
> >>> Without a response from the maintainer, I can't apply this...
> >>>
> >>> greg k-h
> >>
> >> Sorry I missed this - yes, this patch should be safe to apply and it does fix
> >> that crash. The relevant code hasn't been changed in ages, 4.4 is definitely
> >> fine.
> > 
> > Ok, I've queued this up for 4.9, but for 4.4 it does not apply.  Coly,
> > can you provide a working backport for 4.4-stable?
> 
> Greg,
> 
> It is because the 'commit 29e6c57cc78e ("bcache: bch_gc_thread() is not
> freezable")' remove a "try_to_freeze()" in bch_gc_thread(), which
> happens in v4.7.
> 
> I just rebase Kent's fix to v4.4 kernel, solve the conflict. Could you
> please check and try the attached patch ?

That worked, thanks!

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-02-20 15:20 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-23 13:20 [PATCH] bcache: Make gc wakeup sane, remove set_task_state() colyli
2017-01-23 14:16 ` Greg KH
2017-01-23 14:45   ` Coly Li
2017-01-23 14:54     ` Greg KH
2017-02-20 12:31       ` Greg KH
2017-02-20 13:12         ` Kent Overstreet
2017-02-20 14:06           ` Greg KH
2017-02-20 14:36             ` Coly Li
2017-02-20 15:19               ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.