All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 2/3] dm-writecache: convert wait queue to wake_up_process
@ 2018-06-06 15:31 Mikulas Patocka
  2018-06-07 15:48 ` [patch 2/3 v2] " Mikulas Patocka
  0 siblings, 1 reply; 8+ messages in thread
From: Mikulas Patocka @ 2018-06-06 15:31 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Mikulas Patocka

[-- Attachment #1: dm-writecache-wake_up_process.patch --]
[-- Type: text/plain, Size: 3889 bytes --]

If there's just one process that can wait on a queue, we can use
wake_up_process. According to Linus, it is safe to call wake_up_process
on a process even if the process may be doing something else.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm-writecache.c |   32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-05 22:54:49.000000000 +0200
+++ linux-2.6/drivers/md/dm-writecache.c	2018-06-05 23:02:00.000000000 +0200
@@ -10,7 +10,6 @@
 #include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
-#include <linux/swait.h>
 #include <linux/dm-io.h>
 #include <linux/dm-kcopyd.h>
 #include <linux/dax.h>
@@ -168,7 +167,7 @@ struct dm_writecache {
 
 	struct dm_io_client *dm_io;
 
-	struct swait_queue_head endio_thread_wait;
+	raw_spinlock_t endio_list_lock;
 	struct list_head endio_list;
 	struct task_struct *endio_thread;
 
@@ -1273,10 +1272,11 @@ static void writecache_writeback_endio(s
 	struct dm_writecache *wc = wb->wc;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
+	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
+	if (unlikely(list_empty(&wc->endio_list)))
+		wake_up_process(wc->endio_thread);
 	list_add_tail(&wb->endio_entry, &wc->endio_list);
-	swake_up_locked(&wc->endio_thread_wait);
-	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
+	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
 }
 
 static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
@@ -1286,10 +1286,11 @@ static void writecache_copy_endio(int re
 
 	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
 
-	raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+	raw_spin_lock_irq(&wc->endio_list_lock);
+	if (unlikely(list_empty(&wc->endio_list)))
+		wake_up_process(wc->endio_thread);
 	list_add_tail(&c->endio_entry, &wc->endio_list);
-	swake_up_locked(&wc->endio_thread_wait);
-	raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+	raw_spin_unlock_irq(&wc->endio_list_lock);
 }
 
 static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
@@ -1364,33 +1365,30 @@ static int writecache_endio_thread(void
 	struct dm_writecache *wc = data;
 
 	while (1) {
-		DECLARE_SWAITQUEUE(wait);
 		struct list_head list;
 
-		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+		raw_spin_lock_irq(&wc->endio_list_lock);
 continue_locked:
 		if (!list_empty(&wc->endio_list))
 			goto pop_from_list;
 		set_current_state(TASK_INTERRUPTIBLE);
-		__prepare_to_swait(&wc->endio_thread_wait, &wait);
-		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+		raw_spin_unlock_irq(&wc->endio_list_lock);
 
 		if (unlikely(kthread_should_stop())) {
-			finish_swait(&wc->endio_thread_wait, &wait);
+			set_current_state(TASK_RUNNING);
 			break;
 		}
 
 		schedule();
 
-		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
-		__finish_swait(&wc->endio_thread_wait, &wait);
+		raw_spin_lock_irq(&wc->endio_list_lock);
 		goto continue_locked;
 
 pop_from_list:
 		list = wc->endio_list;
 		list.next->prev = list.prev->next = &list;
 		INIT_LIST_HEAD(&wc->endio_list);
-		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+		raw_spin_unlock_irq(&wc->endio_list_lock);
 
 		if (!WC_MODE_FUA(wc))
 			writecache_disk_flush(wc, wc->dev);
@@ -1848,7 +1846,7 @@ static int writecache_ctr(struct dm_targ
 	INIT_WORK(&wc->writeback_work, writecache_writeback);
 	INIT_WORK(&wc->flush_work, writecache_flush_work);
 
-	init_swait_queue_head(&wc->endio_thread_wait);
+	raw_spin_lock_init(&wc->endio_list_lock);
 	INIT_LIST_HEAD(&wc->endio_list);
 	wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio");
 	if (IS_ERR(wc->endio_thread)) {

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-06 15:31 [patch 2/3] dm-writecache: convert wait queue to wake_up_process Mikulas Patocka
@ 2018-06-07 15:48 ` Mikulas Patocka
  2018-06-08 15:13   ` Mike Snitzer
  0 siblings, 1 reply; 8+ messages in thread
From: Mikulas Patocka @ 2018-06-07 15:48 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel

This is second version of this patch - it also removes the label 
continue_locked, because it is no longer needed. If forgot to refresh the 
patch before sending it, so I sent an olded version.


From: Mikulas Patocka <mpatocka@redhat.com>
Subject: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process

If there's just one process that can wait on a queue, we can use
wake_up_process. According to Linus, it is safe to call wake_up_process
on a process even if the process may be doing something else.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm-writecache.c |   34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-05 22:54:49.000000000 +0200
+++ linux-2.6/drivers/md/dm-writecache.c	2018-06-07 17:44:11.000000000 +0200
@@ -10,7 +10,6 @@
 #include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
-#include <linux/swait.h>
 #include <linux/dm-io.h>
 #include <linux/dm-kcopyd.h>
 #include <linux/dax.h>
@@ -168,7 +167,7 @@ struct dm_writecache {
 
 	struct dm_io_client *dm_io;
 
-	struct swait_queue_head endio_thread_wait;
+	raw_spinlock_t endio_list_lock;
 	struct list_head endio_list;
 	struct task_struct *endio_thread;
 
@@ -1273,10 +1272,11 @@ static void writecache_writeback_endio(s
 	struct dm_writecache *wc = wb->wc;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
+	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
+	if (unlikely(list_empty(&wc->endio_list)))
+		wake_up_process(wc->endio_thread);
 	list_add_tail(&wb->endio_entry, &wc->endio_list);
-	swake_up_locked(&wc->endio_thread_wait);
-	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
+	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
 }
 
 static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
@@ -1286,10 +1286,11 @@ static void writecache_copy_endio(int re
 
 	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
 
-	raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+	raw_spin_lock_irq(&wc->endio_list_lock);
+	if (unlikely(list_empty(&wc->endio_list)))
+		wake_up_process(wc->endio_thread);
 	list_add_tail(&c->endio_entry, &wc->endio_list);
-	swake_up_locked(&wc->endio_thread_wait);
-	raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+	raw_spin_unlock_irq(&wc->endio_list_lock);
 }
 
 static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
@@ -1364,33 +1365,28 @@ static int writecache_endio_thread(void
 	struct dm_writecache *wc = data;
 
 	while (1) {
-		DECLARE_SWAITQUEUE(wait);
 		struct list_head list;
 
-		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
-continue_locked:
+		raw_spin_lock_irq(&wc->endio_list_lock);
 		if (!list_empty(&wc->endio_list))
 			goto pop_from_list;
 		set_current_state(TASK_INTERRUPTIBLE);
-		__prepare_to_swait(&wc->endio_thread_wait, &wait);
-		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+		raw_spin_unlock_irq(&wc->endio_list_lock);
 
 		if (unlikely(kthread_should_stop())) {
-			finish_swait(&wc->endio_thread_wait, &wait);
+			set_current_state(TASK_RUNNING);
 			break;
 		}
 
 		schedule();
 
-		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
-		__finish_swait(&wc->endio_thread_wait, &wait);
-		goto continue_locked;
+		continue;
 
 pop_from_list:
 		list = wc->endio_list;
 		list.next->prev = list.prev->next = &list;
 		INIT_LIST_HEAD(&wc->endio_list);
-		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+		raw_spin_unlock_irq(&wc->endio_list_lock);
 
 		if (!WC_MODE_FUA(wc))
 			writecache_disk_flush(wc, wc->dev);
@@ -1848,7 +1844,7 @@ static int writecache_ctr(struct dm_targ
 	INIT_WORK(&wc->writeback_work, writecache_writeback);
 	INIT_WORK(&wc->flush_work, writecache_flush_work);
 
-	init_swait_queue_head(&wc->endio_thread_wait);
+	raw_spin_lock_init(&wc->endio_list_lock);
 	INIT_LIST_HEAD(&wc->endio_list);
 	wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio");
 	if (IS_ERR(wc->endio_thread)) {

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-07 15:48 ` [patch 2/3 v2] " Mikulas Patocka
@ 2018-06-08 15:13   ` Mike Snitzer
  2018-06-08 15:30     ` Mike Snitzer
  2018-06-08 20:59     ` Mikulas Patocka
  0 siblings, 2 replies; 8+ messages in thread
From: Mike Snitzer @ 2018-06-08 15:13 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel

On Thu, Jun 07 2018 at 11:48am -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> This is second version of this patch - it also removes the label 
> continue_locked, because it is no longer needed. If forgot to refresh the 
> patch before sending it, so I sent an olded version.
> 
> 
> From: Mikulas Patocka <mpatocka@redhat.com>
> Subject: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
> 
> If there's just one process that can wait on a queue, we can use
> wake_up_process. According to Linus, it is safe to call wake_up_process
> on a process even if the process may be doing something else.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> 
> ---
>  drivers/md/dm-writecache.c |   34 +++++++++++++++-------------------
>  1 file changed, 15 insertions(+), 19 deletions(-)
> 
> Index: linux-2.6/drivers/md/dm-writecache.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-05 22:54:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-writecache.c	2018-06-07 17:44:11.000000000 +0200
> @@ -1273,10 +1272,11 @@ static void writecache_writeback_endio(s
>  	struct dm_writecache *wc = wb->wc;
>  	unsigned long flags;
>  
> -	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
> +	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> +	if (unlikely(list_empty(&wc->endio_list)))
> +		wake_up_process(wc->endio_thread);
>  	list_add_tail(&wb->endio_entry, &wc->endio_list);
> -	swake_up_locked(&wc->endio_thread_wait);
> -	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
> +	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
>  }

I'm not following the logic you're using for the above pattern of using
wake_up_process if the list is empty.. seems unintuitive.

Given you add to the list (be it endio here, or flush elsewhere), why
not just add to the list and then always wake_up_process()?

Mike

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-08 15:13   ` Mike Snitzer
@ 2018-06-08 15:30     ` Mike Snitzer
  2018-06-08 21:06       ` Mikulas Patocka
  2018-06-08 20:59     ` Mikulas Patocka
  1 sibling, 1 reply; 8+ messages in thread
From: Mike Snitzer @ 2018-06-08 15:30 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel

On Fri, Jun 08 2018 at 11:13P -0400,
Mike Snitzer <snitzer@redhat.com> wrote:

> On Thu, Jun 07 2018 at 11:48am -0400,
> Mikulas Patocka <mpatocka@redhat.com> wrote:
> 
> > This is second version of this patch - it also removes the label 
> > continue_locked, because it is no longer needed. If forgot to refresh the 
> > patch before sending it, so I sent an olded version.
> > 
> > 
> > From: Mikulas Patocka <mpatocka@redhat.com>
> > Subject: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
> > 
> > If there's just one process that can wait on a queue, we can use
> > wake_up_process. According to Linus, it is safe to call wake_up_process
> > on a process even if the process may be doing something else.
> > 
> > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > 
> > ---
> >  drivers/md/dm-writecache.c |   34 +++++++++++++++-------------------
> >  1 file changed, 15 insertions(+), 19 deletions(-)
> > 
> > Index: linux-2.6/drivers/md/dm-writecache.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-05 22:54:49.000000000 +0200
> > +++ linux-2.6/drivers/md/dm-writecache.c	2018-06-07 17:44:11.000000000 +0200
> > @@ -1273,10 +1272,11 @@ static void writecache_writeback_endio(s
> >  	struct dm_writecache *wc = wb->wc;
> >  	unsigned long flags;
> >  
> > -	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
> > +	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> > +	if (unlikely(list_empty(&wc->endio_list)))
> > +		wake_up_process(wc->endio_thread);
> >  	list_add_tail(&wb->endio_entry, &wc->endio_list);
> > -	swake_up_locked(&wc->endio_thread_wait);
> > -	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
> > +	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
> >  }
> 
> I'm not following the logic you're using for the above pattern of using
> wake_up_process if the list is empty.. seems unintuitive.
> 
> Given you add to the list (be it endio here, or flush elsewhere), why
> not just add to the list and then always wake_up_process()?

I'd prefer the following, so please help me understand why you aren't
doing it this way.  Thanks.

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 5961c7794ef3..17cd81ce6ec3 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1103,9 +1103,9 @@ static int writecache_flush_thread(void *data)
 
 static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
 {
-	if (bio_list_empty(&wc->flush_list))
-		wake_up_process(wc->flush_thread);
+	lockdep_assert_held(&wc->lock);
 	bio_list_add(&wc->flush_list, bio);
+	wake_up_process(wc->flush_thread);
 }
 
 static int writecache_map(struct dm_target *ti, struct bio *bio)
@@ -1295,10 +1295,9 @@ static void writecache_writeback_endio(struct bio *bio)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
-	if (unlikely(list_empty(&wc->endio_list)))
-		wake_up_process(wc->endio_thread);
 	list_add_tail(&wb->endio_entry, &wc->endio_list);
 	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
+	wake_up_process(wc->endio_thread);
 }
 
 static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
@@ -1309,10 +1308,9 @@ static void writecache_copy_endio(int read_err, unsigned long write_err, void *p
 	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
 
 	raw_spin_lock_irq(&wc->endio_list_lock);
-	if (unlikely(list_empty(&wc->endio_list)))
-		wake_up_process(wc->endio_thread);
 	list_add_tail(&c->endio_entry, &wc->endio_list);
 	raw_spin_unlock_irq(&wc->endio_list_lock);
+	wake_up_process(wc->endio_thread);
 }
 
 static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-08 15:13   ` Mike Snitzer
  2018-06-08 15:30     ` Mike Snitzer
@ 2018-06-08 20:59     ` Mikulas Patocka
  2018-06-08 21:17       ` Mike Snitzer
  1 sibling, 1 reply; 8+ messages in thread
From: Mikulas Patocka @ 2018-06-08 20:59 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel



On Fri, 8 Jun 2018, Mike Snitzer wrote:

> On Thu, Jun 07 2018 at 11:48am -0400,
> Mikulas Patocka <mpatocka@redhat.com> wrote:
> 
> > This is second version of this patch - it also removes the label 
> > continue_locked, because it is no longer needed. If forgot to refresh the 
> > patch before sending it, so I sent an olded version.
> > 
> > 
> > From: Mikulas Patocka <mpatocka@redhat.com>
> > Subject: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
> > 
> > If there's just one process that can wait on a queue, we can use
> > wake_up_process. According to Linus, it is safe to call wake_up_process
> > on a process even if the process may be doing something else.
> > 
> > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > 
> > ---
> >  drivers/md/dm-writecache.c |   34 +++++++++++++++-------------------
> >  1 file changed, 15 insertions(+), 19 deletions(-)
> > 
> > Index: linux-2.6/drivers/md/dm-writecache.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-05 22:54:49.000000000 +0200
> > +++ linux-2.6/drivers/md/dm-writecache.c	2018-06-07 17:44:11.000000000 +0200
> > @@ -1273,10 +1272,11 @@ static void writecache_writeback_endio(s
> >  	struct dm_writecache *wc = wb->wc;
> >  	unsigned long flags;
> >  
> > -	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
> > +	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> > +	if (unlikely(list_empty(&wc->endio_list)))
> > +		wake_up_process(wc->endio_thread);
> >  	list_add_tail(&wb->endio_entry, &wc->endio_list);
> > -	swake_up_locked(&wc->endio_thread_wait);
> > -	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
> > +	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
> >  }
> 
> I'm not following the logic you're using for the above pattern of using
> wake_up_process if the list is empty.. seems unintuitive.
> 
> Given you add to the list (be it endio here, or flush elsewhere), why
> not just add to the list and then always wake_up_process()?
> 
> Mike

Because wake_up_process is costly (it takes a spinlock on the process). If 
multiple CPUs are simultaneously hammering on a spinlock, it degrades 
performance.

The process checks if the list is empty before going to sleep (and doesn't 
sleep if it is non-empty) - consequently, if the process goes to sleep, 
the list must have been empty.

So, we can wake the process up only once - when the list transitions from 
empty to non-empty - we don't have to wake it up with every item added to 
the list.


Originally, the code was like this:

raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
need_wake = list_empty(&wc->endio_list);
list_add_tail(&wb->endio_entry, &wc->endio_list);
if (need_wake)
	wake_up_process(wc->endio_thread);
raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);

However, because the target process takes the spinlock too, we can wake it 
up before we add the entry to the list - it doesn't matter here if we wake 
it before or after adding the entry to the list, because the target 
process will take the same spinlock when it is woken up.

Calling wake_up_process before list_add_tail results in slightly smaller 
code.

Mikulas

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-08 15:30     ` Mike Snitzer
@ 2018-06-08 21:06       ` Mikulas Patocka
  2018-06-08 21:18         ` Mike Snitzer
  0 siblings, 1 reply; 8+ messages in thread
From: Mikulas Patocka @ 2018-06-08 21:06 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel



On Fri, 8 Jun 2018, Mike Snitzer wrote:

> On Fri, Jun 08 2018 at 11:13P -0400,
> Mike Snitzer <snitzer@redhat.com> wrote:
> 
> I'd prefer the following, so please help me understand why you aren't
> doing it this way.  Thanks.
> 
> diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
> index 5961c7794ef3..17cd81ce6ec3 100644
> --- a/drivers/md/dm-writecache.c
> +++ b/drivers/md/dm-writecache.c
> @@ -1103,9 +1103,9 @@ static int writecache_flush_thread(void *data)
>  
>  static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
>  {
> -	if (bio_list_empty(&wc->flush_list))
> -		wake_up_process(wc->flush_thread);
> +	lockdep_assert_held(&wc->lock);
>  	bio_list_add(&wc->flush_list, bio);
> +	wake_up_process(wc->flush_thread);
>  }
>  
>  static int writecache_map(struct dm_target *ti, struct bio *bio)
> @@ -1295,10 +1295,9 @@ static void writecache_writeback_endio(struct bio *bio)
>  	unsigned long flags;
>  
>  	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> -	if (unlikely(list_empty(&wc->endio_list)))
> -		wake_up_process(wc->endio_thread);
>  	list_add_tail(&wb->endio_entry, &wc->endio_list);
>  	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
> +	wake_up_process(wc->endio_thread);
>  }
>  
>  static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
> @@ -1309,10 +1308,9 @@ static void writecache_copy_endio(int read_err, unsigned long write_err, void *p
>  	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
>  
>  	raw_spin_lock_irq(&wc->endio_list_lock);
> -	if (unlikely(list_empty(&wc->endio_list)))
> -		wake_up_process(wc->endio_thread);
>  	list_add_tail(&c->endio_entry, &wc->endio_list);
>  	raw_spin_unlock_irq(&wc->endio_list_lock);
> +	wake_up_process(wc->endio_thread);
>  }
>  
>  static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)

This is incorrect.

When you drop the spinlock, the endio thread may already take the item (it 
may take it even before wake_up_process is called). When the endio thread 
consumes all the items, the user may unload the device. When the user 
unloads the device, wc is freed and wc->endio_thread points to a 
non-existing process - and now you dereference freed "wc" structure and 
call wake_up_process on non-existing process and cause a crash.

wake_up_process must be inside the spinlock to avoid this race condition.

Mikulas

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-08 20:59     ` Mikulas Patocka
@ 2018-06-08 21:17       ` Mike Snitzer
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2018-06-08 21:17 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel

On Fri, Jun 08 2018 at  4:59pm -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> 
> 
> On Fri, 8 Jun 2018, Mike Snitzer wrote:
> 
> > On Thu, Jun 07 2018 at 11:48am -0400,
> > Mikulas Patocka <mpatocka@redhat.com> wrote:
> > 
> > > This is second version of this patch - it also removes the label 
> > > continue_locked, because it is no longer needed. If forgot to refresh the 
> > > patch before sending it, so I sent an olded version.
> > > 
> > > 
> > > From: Mikulas Patocka <mpatocka@redhat.com>
> > > Subject: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
> > > 
> > > If there's just one process that can wait on a queue, we can use
> > > wake_up_process. According to Linus, it is safe to call wake_up_process
> > > on a process even if the process may be doing something else.
> > > 
> > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > > 
> > > ---
> > >  drivers/md/dm-writecache.c |   34 +++++++++++++++-------------------
> > >  1 file changed, 15 insertions(+), 19 deletions(-)
> > > 
> > > Index: linux-2.6/drivers/md/dm-writecache.c
> > > ===================================================================
> > > --- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-05 22:54:49.000000000 +0200
> > > +++ linux-2.6/drivers/md/dm-writecache.c	2018-06-07 17:44:11.000000000 +0200
> > > @@ -1273,10 +1272,11 @@ static void writecache_writeback_endio(s
> > >  	struct dm_writecache *wc = wb->wc;
> > >  	unsigned long flags;
> > >  
> > > -	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
> > > +	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> > > +	if (unlikely(list_empty(&wc->endio_list)))
> > > +		wake_up_process(wc->endio_thread);
> > >  	list_add_tail(&wb->endio_entry, &wc->endio_list);
> > > -	swake_up_locked(&wc->endio_thread_wait);
> > > -	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
> > > +	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
> > >  }
> > 
> > I'm not following the logic you're using for the above pattern of using
> > wake_up_process if the list is empty.. seems unintuitive.
> > 
> > Given you add to the list (be it endio here, or flush elsewhere), why
> > not just add to the list and then always wake_up_process()?
> > 
> > Mike
> 
> Because wake_up_process is costly (it takes a spinlock on the process). If 
> multiple CPUs are simultaneously hammering on a spinlock, it degrades 
> performance.
> 
> The process checks if the list is empty before going to sleep (and doesn't 
> sleep if it is non-empty) - consequently, if the process goes to sleep, 
> the list must have been empty.
> 
> So, we can wake the process up only once - when the list transitions from 
> empty to non-empty - we don't have to wake it up with every item added to 
> the list.
> 
> 
> Originally, the code was like this:
> 
> raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> need_wake = list_empty(&wc->endio_list);
> list_add_tail(&wb->endio_entry, &wc->endio_list);
> if (need_wake)
> 	wake_up_process(wc->endio_thread);
> raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
> 
> However, because the target process takes the spinlock too, we can wake it 
> up before we add the entry to the list - it doesn't matter here if we wake 
> it before or after adding the entry to the list, because the target 
> process will take the same spinlock when it is woken up.
> 
> Calling wake_up_process before list_add_tail results in slightly smaller 
> code.

OK, thanks for explaining.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3 v2] dm-writecache: convert wait queue to wake_up_process
  2018-06-08 21:06       ` Mikulas Patocka
@ 2018-06-08 21:18         ` Mike Snitzer
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2018-06-08 21:18 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel

On Fri, Jun 08 2018 at  5:06pm -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> 
> 
> On Fri, 8 Jun 2018, Mike Snitzer wrote:
> 
> > On Fri, Jun 08 2018 at 11:13P -0400,
> > Mike Snitzer <snitzer@redhat.com> wrote:
> > 
> > I'd prefer the following, so please help me understand why you aren't
> > doing it this way.  Thanks.
> > 
> > diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
> > index 5961c7794ef3..17cd81ce6ec3 100644
> > --- a/drivers/md/dm-writecache.c
> > +++ b/drivers/md/dm-writecache.c
> > @@ -1103,9 +1103,9 @@ static int writecache_flush_thread(void *data)
> >  
> >  static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
> >  {
> > -	if (bio_list_empty(&wc->flush_list))
> > -		wake_up_process(wc->flush_thread);
> > +	lockdep_assert_held(&wc->lock);
> >  	bio_list_add(&wc->flush_list, bio);
> > +	wake_up_process(wc->flush_thread);
> >  }
> >  
> >  static int writecache_map(struct dm_target *ti, struct bio *bio)
> > @@ -1295,10 +1295,9 @@ static void writecache_writeback_endio(struct bio *bio)
> >  	unsigned long flags;
> >  
> >  	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
> > -	if (unlikely(list_empty(&wc->endio_list)))
> > -		wake_up_process(wc->endio_thread);
> >  	list_add_tail(&wb->endio_entry, &wc->endio_list);
> >  	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
> > +	wake_up_process(wc->endio_thread);
> >  }
> >  
> >  static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
> > @@ -1309,10 +1308,9 @@ static void writecache_copy_endio(int read_err, unsigned long write_err, void *p
> >  	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
> >  
> >  	raw_spin_lock_irq(&wc->endio_list_lock);
> > -	if (unlikely(list_empty(&wc->endio_list)))
> > -		wake_up_process(wc->endio_thread);
> >  	list_add_tail(&c->endio_entry, &wc->endio_list);
> >  	raw_spin_unlock_irq(&wc->endio_list_lock);
> > +	wake_up_process(wc->endio_thread);
> >  }
> >  
> >  static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
> 
> This is incorrect.
> 
> When you drop the spinlock, the endio thread may already take the item (it 
> may take it even before wake_up_process is called). When the endio thread 
> consumes all the items, the user may unload the device. When the user 
> unloads the device, wc is freed and wc->endio_thread points to a 
> non-existing process - and now you dereference freed "wc" structure and 
> call wake_up_process on non-existing process and cause a crash.
> 
> wake_up_process must be inside the spinlock to avoid this race condition.

OK, I understand, thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-06-08 21:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-06 15:31 [patch 2/3] dm-writecache: convert wait queue to wake_up_process Mikulas Patocka
2018-06-07 15:48 ` [patch 2/3 v2] " Mikulas Patocka
2018-06-08 15:13   ` Mike Snitzer
2018-06-08 15:30     ` Mike Snitzer
2018-06-08 21:06       ` Mikulas Patocka
2018-06-08 21:18         ` Mike Snitzer
2018-06-08 20:59     ` Mikulas Patocka
2018-06-08 21:17       ` Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.