All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg
@ 2010-10-19 12:57 Tejun Heo
  2010-10-19 12:58 ` [PATCH 2/2] scsi: don't use execute_in_process_context() Tejun Heo
                   ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Tejun Heo @ 2010-10-19 12:57 UTC (permalink / raw)
  To: Linux SCSI List, James Bottomley, FUJITA Tomonori, lkml

execute_work usage in sg is bogus.  execute_in_process_context() is
never used and ew is used purely as wrapper around work_struct which
is superflous.  Use work_struct directly.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/scsi/sg.c |   16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

Index: work/drivers/scsi/sg.c
===================================================================
--- work.orig/drivers/scsi/sg.c
+++ work/drivers/scsi/sg.c
@@ -137,7 +137,7 @@ typedef struct sg_request {	/* SG_MAX_QU
 	volatile char done;	/* 0->before bh, 1->before read, 2->read */
 	struct request *rq;
 	struct bio *bio;
-	struct execute_work ew;
+	struct work_struct work;
 } Sg_request;

 typedef struct sg_fd {		/* holds the state of a file descriptor */
@@ -160,7 +160,7 @@ typedef struct sg_fd {		/* holds the sta
 	char keep_orphan;	/* 0 -> drop orphan (def), 1 -> keep for read() */
 	char mmap_called;	/* 0 -> mmap() never called on this fd */
 	struct kref f_ref;
-	struct execute_work ew;
+	struct work_struct work;
 } Sg_fd;

 typedef struct sg_device { /* holds the state of each scsi generic device */
@@ -1246,7 +1246,7 @@ sg_mmap(struct file *filp, struct vm_are

 static void sg_rq_end_io_usercontext(struct work_struct *work)
 {
-	struct sg_request *srp = container_of(work, struct sg_request, ew.work);
+	struct sg_request *srp = container_of(work, struct sg_request, work);
 	struct sg_fd *sfp = srp->parentfp;

 	sg_finish_rem_req(srp);
@@ -1333,8 +1333,8 @@ static void sg_rq_end_io(struct request
 		kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
 		kref_put(&sfp->f_ref, sg_remove_sfp);
 	} else {
-		INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext);
-		schedule_work(&srp->ew.work);
+		INIT_WORK(&srp->work, sg_rq_end_io_usercontext);
+		schedule_work(&srp->work);
 	}
 }

@@ -2086,7 +2086,7 @@ sg_add_sfp(Sg_device * sdp, int dev)

 static void sg_remove_sfp_usercontext(struct work_struct *work)
 {
-	struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work);
+	struct sg_fd *sfp = container_of(work, struct sg_fd, work);
 	struct sg_device *sdp = sfp->parentdp;

 	/* Cleanup any responses which were never read(). */
@@ -2123,8 +2123,8 @@ static void sg_remove_sfp(struct kref *k
 	write_unlock_irqrestore(&sg_index_lock, iflags);
 	wake_up_interruptible(&sdp->o_excl_wait);

-	INIT_WORK(&sfp->ew.work, sg_remove_sfp_usercontext);
-	schedule_work(&sfp->ew.work);
+	INIT_WORK(&sfp->work, sg_remove_sfp_usercontext);
+	schedule_work(&sfp->work);
 }

 static int

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-10-19 12:57 [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg Tejun Heo
@ 2010-10-19 12:58 ` Tejun Heo
  2010-10-22 10:03   ` FUJITA Tomonori
  2010-12-12 22:48   ` James Bottomley
  2010-10-20 14:29 ` [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg FUJITA Tomonori
  2010-10-20 19:56 ` Douglas Gilbert
  2 siblings, 2 replies; 25+ messages in thread
From: Tejun Heo @ 2010-10-19 12:58 UTC (permalink / raw)
  To: Linux SCSI List, James Bottomley, FUJITA Tomonori, lkml

SCSI is the only subsystem which uses execute_in_process_context().
With the recent workqueue updates, unconditionally using work wouldn't
cause deadlocks around execution resources and the two places where
SCSI uses them are cold paths where using work unconditionally
wouldn't make any difference.  Drop execute_in_process_context() and
use work directly.

* scsi_device->ew is replaced with release_work.  scsi_target->ew is
  replaced with reap_work.

* Both works are initialized with the respective release/reap function
  during device/target init.  scsi_target_reap_usercontext() is moved
  upwards to avoid needing forward declaration.

* scsi_alloc_target() now explicitly flushes the reap_work of the
  found dying target before putting it instead of depending on
  flush_scheduled_work().

This is in preparation of deprecation of flush_scheduled_work() and
execute_in_process_context().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/scsi/scsi_scan.c   |   26 +++++++++++++-------------
 drivers/scsi/scsi_sysfs.c  |    8 +++++---
 include/scsi/scsi_device.h |    4 ++--
 3 files changed, 20 insertions(+), 18 deletions(-)

Index: work/drivers/scsi/scsi_scan.c
===================================================================
--- work.orig/drivers/scsi/scsi_scan.c
+++ work/drivers/scsi/scsi_scan.c
@@ -362,6 +362,16 @@ int scsi_is_target_device(const struct d
 }
 EXPORT_SYMBOL(scsi_is_target_device);

+static void scsi_target_reap_usercontext(struct work_struct *work)
+{
+	struct scsi_target *starget =
+		container_of(work, struct scsi_target, reap_work);
+
+	transport_remove_device(&starget->dev);
+	device_del(&starget->dev);
+	scsi_target_destroy(starget);
+}
+
 static struct scsi_target *__scsi_find_target(struct device *parent,
 					      int channel, uint id)
 {
@@ -429,6 +439,7 @@ static struct scsi_target *scsi_alloc_ta
 	starget->state = STARGET_CREATED;
 	starget->scsi_level = SCSI_2;
 	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
+	INIT_WORK(&starget->reap_work, scsi_target_reap_usercontext);
  retry:
 	spin_lock_irqsave(shost->host_lock, flags);

@@ -464,21 +475,11 @@ static struct scsi_target *scsi_alloc_ta
 	}
 	/* Unfortunately, we found a dying target; need to
 	 * wait until it's dead before we can get a new one */
+	flush_work(&found_target->reap_work);
 	put_device(&found_target->dev);
-	flush_scheduled_work();
 	goto retry;
 }

-static void scsi_target_reap_usercontext(struct work_struct *work)
-{
-	struct scsi_target *starget =
-		container_of(work, struct scsi_target, ew.work);
-
-	transport_remove_device(&starget->dev);
-	device_del(&starget->dev);
-	scsi_target_destroy(starget);
-}
-
 /**
  * scsi_target_reap - check to see if target is in use and destroy if not
  * @starget: target to be checked
@@ -509,8 +510,7 @@ void scsi_target_reap(struct scsi_target
 	if (state == STARGET_CREATED)
 		scsi_target_destroy(starget);
 	else
-		execute_in_process_context(scsi_target_reap_usercontext,
-					   &starget->ew);
+		schedule_work(&starget->reap_work);
 }

 /**
Index: work/drivers/scsi/scsi_sysfs.c
===================================================================
--- work.orig/drivers/scsi/scsi_sysfs.c
+++ work/drivers/scsi/scsi_sysfs.c
@@ -298,7 +298,7 @@ static void scsi_device_dev_release_user
 	struct list_head *this, *tmp;
 	unsigned long flags;

-	sdev = container_of(work, struct scsi_device, ew.work);
+	sdev = container_of(work, struct scsi_device, release_work);

 	parent = sdev->sdev_gendev.parent;
 	starget = to_scsi_target(parent);
@@ -341,8 +341,8 @@ static void scsi_device_dev_release_user
 static void scsi_device_dev_release(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	execute_in_process_context(scsi_device_dev_release_usercontext,
-				   &sdp->ew);
+
+	schedule_work(&sdp->release_work);
 }

 static struct class sdev_class = {
@@ -1066,6 +1066,8 @@ void scsi_sysfs_device_initialize(struct
 	dev_set_name(&sdev->sdev_dev, "%d:%d:%d:%d",
 		     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
 	sdev->scsi_level = starget->scsi_level;
+	INIT_WORK(&sdev->release_work, scsi_device_dev_release_usercontext);
+
 	transport_setup_device(&sdev->sdev_gendev);
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_add_tail(&sdev->same_target_siblings, &starget->devices);
Index: work/include/scsi/scsi_device.h
===================================================================
--- work.orig/include/scsi/scsi_device.h
+++ work/include/scsi/scsi_device.h
@@ -166,7 +166,7 @@ struct scsi_device {
 	struct device		sdev_gendev,
 				sdev_dev;

-	struct execute_work	ew; /* used to get process context on put */
+	struct work_struct	release_work; /* for process context on put */

 	struct scsi_dh_data	*scsi_dh_data;
 	enum scsi_device_state sdev_state;
@@ -256,7 +256,7 @@ struct scsi_target {
 #define SCSI_DEFAULT_TARGET_BLOCKED	3

 	char			scsi_level;
-	struct execute_work	ew;
+	struct work_struct	reap_work;
 	enum scsi_target_state	state;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg
  2010-10-19 12:57 [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg Tejun Heo
  2010-10-19 12:58 ` [PATCH 2/2] scsi: don't use execute_in_process_context() Tejun Heo
@ 2010-10-20 14:29 ` FUJITA Tomonori
  2010-10-20 19:56 ` Douglas Gilbert
  2 siblings, 0 replies; 25+ messages in thread
From: FUJITA Tomonori @ 2010-10-20 14:29 UTC (permalink / raw)
  To: tj; +Cc: linux-scsi, James.Bottomley, fujita.tomonori, linux-kernel

On Tue, 19 Oct 2010 14:57:36 +0200
Tejun Heo <tj@kernel.org> wrote:

> execute_work usage in sg is bogus.  execute_in_process_context() is
> never used and ew is used purely as wrapper around work_struct which
> is superflous.  Use work_struct directly.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
>  drivers/scsi/sg.c |   16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)

Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg
  2010-10-19 12:57 [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg Tejun Heo
  2010-10-19 12:58 ` [PATCH 2/2] scsi: don't use execute_in_process_context() Tejun Heo
  2010-10-20 14:29 ` [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg FUJITA Tomonori
@ 2010-10-20 19:56 ` Douglas Gilbert
  2 siblings, 0 replies; 25+ messages in thread
From: Douglas Gilbert @ 2010-10-20 19:56 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, James Bottomley, FUJITA Tomonori, lkml

On 10-10-19 02:57 PM, Tejun Heo wrote:
> execute_work usage in sg is bogus.  execute_in_process_context() is
> never used and ew is used purely as wrapper around work_struct which
> is superflous.  Use work_struct directly.
>
> Signed-off-by: Tejun Heo<tj@kernel.org>

Acked-by: Douglas Gilbert <dgilbert@interlog.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-10-19 12:58 ` [PATCH 2/2] scsi: don't use execute_in_process_context() Tejun Heo
@ 2010-10-22 10:03   ` FUJITA Tomonori
  2010-12-12 22:48   ` James Bottomley
  1 sibling, 0 replies; 25+ messages in thread
From: FUJITA Tomonori @ 2010-10-22 10:03 UTC (permalink / raw)
  To: tj; +Cc: linux-scsi, James.Bottomley, fujita.tomonori, linux-kernel

On Tue, 19 Oct 2010 14:58:04 +0200
Tejun Heo <tj@kernel.org> wrote:

> SCSI is the only subsystem which uses execute_in_process_context().
> With the recent workqueue updates, unconditionally using work wouldn't
> cause deadlocks around execution resources and the two places where
> SCSI uses them are cold paths where using work unconditionally
> wouldn't make any difference.  Drop execute_in_process_context() and
> use work directly.
> 
> * scsi_device->ew is replaced with release_work.  scsi_target->ew is
>   replaced with reap_work.
> 
> * Both works are initialized with the respective release/reap function
>   during device/target init.  scsi_target_reap_usercontext() is moved
>   upwards to avoid needing forward declaration.
> 
> * scsi_alloc_target() now explicitly flushes the reap_work of the
>   found dying target before putting it instead of depending on
>   flush_scheduled_work().
> 
> This is in preparation of deprecation of flush_scheduled_work() and
> execute_in_process_context().
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
>  drivers/scsi/scsi_scan.c   |   26 +++++++++++++-------------
>  drivers/scsi/scsi_sysfs.c  |    8 +++++---
>  include/scsi/scsi_device.h |    4 ++--
>  3 files changed, 20 insertions(+), 18 deletions(-)

Looks fine to me.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-10-19 12:58 ` [PATCH 2/2] scsi: don't use execute_in_process_context() Tejun Heo
  2010-10-22 10:03   ` FUJITA Tomonori
@ 2010-12-12 22:48   ` James Bottomley
  2010-12-14  9:53     ` Tejun Heo
  1 sibling, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-12 22:48 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Tue, 2010-10-19 at 14:58 +0200, Tejun Heo wrote:
> SCSI is the only subsystem which uses execute_in_process_context().
> With the recent workqueue updates, unconditionally using work wouldn't
> cause deadlocks around execution resources and the two places where
> SCSI uses them are cold paths where using work unconditionally
> wouldn't make any difference.  Drop execute_in_process_context() and
> use work directly.

Sorry, managed to lose this until the ping.

The analysis above isn't quite correct, I'm afraid.  We use the
execute_in_process_context() not to avoid deadlocks, but to acquire
process context if we don't have it because the API allows calling from
sites at interrupt context.  The point of using
execute_in_process_context() is that we actually want to make use of the
user context if we have one ... there's no point using a workqueue in
that case, because there's nothing to be gained (except to slow
everything down).  We have no ordering constraints (the traditional
reason for using workqueues) so this is purely about context.

Alan stern recently did an analysis (at least for the target reap) that
says we actually have no interrupt context call sites, so abandoning the
execute_in_process_context() for a direct call in that case might make
sense.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-12 22:48   ` James Bottomley
@ 2010-12-14  9:53     ` Tejun Heo
  2010-12-14 14:09       ` James Bottomley
  0 siblings, 1 reply; 25+ messages in thread
From: Tejun Heo @ 2010-12-14  9:53 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello, James.

On 12/12/2010 11:48 PM, James Bottomley wrote:
> The analysis above isn't quite correct, I'm afraid.  We use the
> execute_in_process_context() not to avoid deadlocks, but to acquire
> process context if we don't have it because the API allows calling from
> sites at interrupt context.  The point of using
> execute_in_process_context() is that we actually want to make use of the
> user context if we have one ... there's no point using a workqueue in
> that case, because there's nothing to be gained (except to slow
> everything down).  We have no ordering constraints (the traditional
> reason for using workqueues) so this is purely about context.

Sure, what I tried to say was that the change couldn't introduce
deadlock no matter how it was used.  Sure execute_in_process_context()
would be slightly more efficient, but it currently is used a few times
only in quite cold paths where optimization isn't necessary at all and
the API is somewhat incomplete in that it doesn't provide ordering or
synchronization APIs.

So, unless there's a compelling reason, let's remove it.  It has been
there for quite some time now and hasn't grown any other users.  There
wouldn't be any noticeable difference for the current users either.
If you really like to keep it in the current users, let's move it into
SCSI.  I don't see much reason to keep it as a part of generic
workqueue API in its current form.

Thank you.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-14  9:53     ` Tejun Heo
@ 2010-12-14 14:09       ` James Bottomley
  2010-12-14 14:19         ` Tejun Heo
  0 siblings, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-14 14:09 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Tue, 2010-12-14 at 10:53 +0100, Tejun Heo wrote:
> Hello, James.
> 
> On 12/12/2010 11:48 PM, James Bottomley wrote:
> > The analysis above isn't quite correct, I'm afraid.  We use the
> > execute_in_process_context() not to avoid deadlocks, but to acquire
> > process context if we don't have it because the API allows calling from
> > sites at interrupt context.  The point of using
> > execute_in_process_context() is that we actually want to make use of the
> > user context if we have one ... there's no point using a workqueue in
> > that case, because there's nothing to be gained (except to slow
> > everything down).  We have no ordering constraints (the traditional
> > reason for using workqueues) so this is purely about context.
> 
> Sure, what I tried to say was that the change couldn't introduce
> deadlock no matter how it was used.  Sure execute_in_process_context()
> would be slightly more efficient, but it currently is used a few times
> only in quite cold paths where optimization isn't necessary at all and
> the API is somewhat incomplete in that it doesn't provide ordering or
> synchronization APIs.

That's the point ... it's purely for operations which require user
context which may not have it.  There's no synchronisation by design
(it's a simple API).

> So, unless there's a compelling reason, let's remove it.

The open coding of if (in_atomic()) { do workqueue stuff } else
{ execute function } is rather bug prone (most people tend to do
in_interrupt()).  It's better to encapsulate it in an API.

>   It has been
> there for quite some time now and hasn't grown any other users.  There
> wouldn't be any noticeable difference for the current users either.
> If you really like to keep it in the current users, let's move it into
> SCSI.  I don't see much reason to keep it as a part of generic
> workqueue API in its current form.

It was in SCSI ... I got told to make it generic.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-14 14:09       ` James Bottomley
@ 2010-12-14 14:19         ` Tejun Heo
  2010-12-14 14:26           ` James Bottomley
  0 siblings, 1 reply; 25+ messages in thread
From: Tejun Heo @ 2010-12-14 14:19 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello, James.

On 12/14/2010 03:09 PM, James Bottomley wrote:
> That's the point ... it's purely for operations which require user
> context which may not have it.  There's no synchronisation by design
> (it's a simple API).

Well, the problem is that you do require proper synchornization
anyway; otherwise, there is no way to make sure that the work is
finished before the SCSI module is about to be unloaded.  Currently,
the code uses flush_scheduled_work() for this, which is going away
because the latency can grow arbitrarily large and the behavior is
dependent on completely unrelated work items.  So, either we need to
add a separate flush interface for ew's, flush the work item inside
ew's or schedule them to a dedicated workqueue.

>> So, unless there's a compelling reason, let's remove it.
> 
> The open coding of if (in_atomic()) { do workqueue stuff } else
> { execute function } is rather bug prone (most people tend to do
> in_interrupt()).  It's better to encapsulate it in an API.

Compelling reason for it to exist.  Why not just use work when you
need execution context and the caller might or might not have one?

> It was in SCSI ... I got told to make it generic.

Heh, yeah, that would feel quite silly.  Sorry about that.  :-)

But, really, let's just remove it.  At this point, we either need to
fortify the interface or remove it and given the current usage, I
think we're better off with the latter.  If any pressing need arises,
we can always add a proper API with all the necessary bells and
whistles later.

Thank you.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-14 14:19         ` Tejun Heo
@ 2010-12-14 14:26           ` James Bottomley
  2010-12-14 14:33             ` Tejun Heo
  0 siblings, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-14 14:26 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Tue, 2010-12-14 at 15:19 +0100, Tejun Heo wrote:
> Hello, James.
> 
> On 12/14/2010 03:09 PM, James Bottomley wrote:
> > That's the point ... it's purely for operations which require user
> > context which may not have it.  There's no synchronisation by design
> > (it's a simple API).
> 
> Well, the problem is that you do require proper synchornization
> anyway; otherwise, there is no way to make sure that the work is
> finished before the SCSI module is about to be unloaded.  Currently,
> the code uses flush_scheduled_work() for this, which is going away
> because the latency can grow arbitrarily large and the behavior is
> dependent on completely unrelated work items.  So, either we need to
> add a separate flush interface for ew's, flush the work item inside
> ew's or schedule them to a dedicated workqueue.

Depends what you're doing about the flush problem.  The synchronisation
is inherent in the use (we're holding a reference to the module within
the executed code).  The flush is to try to speed things up so the user
doesn't get annoyed during rmmod.  We don't need a sync, just an
accelerator.

> >> So, unless there's a compelling reason, let's remove it.
> > 
> > The open coding of if (in_atomic()) { do workqueue stuff } else
> > { execute function } is rather bug prone (most people tend to do
> > in_interrupt()).  It's better to encapsulate it in an API.
> 
> Compelling reason for it to exist.  Why not just use work when you
> need execution context and the caller might or might not have one?

Because it's completely lame to have user context and not use it.

> > It was in SCSI ... I got told to make it generic.
> 
> Heh, yeah, that would feel quite silly.  Sorry about that.  :-)
> 
> But, really, let's just remove it.  At this point, we either need to
> fortify the interface or remove it and given the current usage, I
> think we're better off with the latter.

I really don't think the open coding is a good idea.  It's complex and
error prone; exactly the type of thing that should be in an API.

>   If any pressing need arises,
> we can always add a proper API with all the necessary bells and
> whistles later.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-14 14:26           ` James Bottomley
@ 2010-12-14 14:33             ` Tejun Heo
  2010-12-15  3:04               ` James Bottomley
  0 siblings, 1 reply; 25+ messages in thread
From: Tejun Heo @ 2010-12-14 14:33 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello,

On 12/14/2010 03:26 PM, James Bottomley wrote:
> Depends what you're doing about the flush problem.  The synchronisation
> is inherent in the use (we're holding a reference to the module within
> the executed code).  The flush is to try to speed things up so the user
> doesn't get annoyed during rmmod.  We don't need a sync, just an
> accelerator.

Hmmm, I'm confused.  How does it drop the reference then?  Something
outside of the callback should wait for its completion and drop the
reference as otherwise nothing can guarantee that the modules doesn't
go away between the reference drop and the actual completion of the
callback.

>> Compelling reason for it to exist.  Why not just use work when you
>> need execution context and the caller might or might not have one?
> 
> Because it's completely lame to have user context and not use it.

It may be lame but I think it's better than having an optimization
interface which is incomplete and, more importantly, unnecessary.

>> But, really, let's just remove it.  At this point, we either need to
>> fortify the interface or remove it and given the current usage, I
>> think we're better off with the latter.
> 
> I really don't think the open coding is a good idea.  It's complex and
> error prone; exactly the type of thing that should be in an API.

Yeah, just schedule work like everyone else.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-14 14:33             ` Tejun Heo
@ 2010-12-15  3:04               ` James Bottomley
  2010-12-15 15:47                 ` Tejun Heo
  0 siblings, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-15  3:04 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Tue, 2010-12-14 at 15:33 +0100, Tejun Heo wrote:
> Hello,
> 
> On 12/14/2010 03:26 PM, James Bottomley wrote:
> > Depends what you're doing about the flush problem.  The synchronisation
> > is inherent in the use (we're holding a reference to the module within
> > the executed code).  The flush is to try to speed things up so the user
> > doesn't get annoyed during rmmod.  We don't need a sync, just an
> > accelerator.
> 
> Hmmm, I'm confused.  How does it drop the reference then?

Um, the same way it does in your new code: inside the executed function.

>   Something
> outside of the callback should wait for its completion and drop the
> reference as otherwise nothing can guarantee that the modules doesn't
> go away between the reference drop and the actual completion of the
> callback.

Well, if that's an actual problem, your patch doesn't solve it.  In both
cases the work structure is part of the object that will be released.
The way it should happen is that workqueues dequeue the work (so now no
refs) and execute the callback with the data, so the callback is OK to
free the work structure.  As long as it still does that, there's no
problem in either case.

> >> Compelling reason for it to exist.  Why not just use work when you
> >> need execution context and the caller might or might not have one?
> > 
> > Because it's completely lame to have user context and not use it.
> 
> It may be lame but I think it's better than having an optimization
> interface which is incomplete and, more importantly, unnecessary.

But you're thinking of it as a workqueue issue ... it isn't, it's an API
which says "just make sure I have user context".  The workqueue is just
the implementation detail.

> >> But, really, let's just remove it.  At this point, we either need to
> >> fortify the interface or remove it and given the current usage, I
> >> think we're better off with the latter.
> > 
> > I really don't think the open coding is a good idea.  It's complex and
> > error prone; exactly the type of thing that should be in an API.
> 
> Yeah, just schedule work like everyone else.

As I said: the required open coding then becomes error prone.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15  3:04               ` James Bottomley
@ 2010-12-15 15:47                 ` Tejun Heo
  2010-12-15 15:54                   ` James Bottomley
  0 siblings, 1 reply; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 15:47 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello James,

On 12/15/2010 04:04 AM, James Bottomley wrote:
>> Hmmm, I'm confused.  How does it drop the reference then?
> 
> Um, the same way it does in your new code: inside the executed function.

Okay, it wouldn't work that way.  They both are broken then.  It's
basically like trying put the last module reference from inside the
module.

>> Something outside of the callback should wait for its completion
>> and drop the reference as otherwise nothing can guarantee that the
>> modules doesn't go away between the reference drop and the actual
>> completion of the callback.
> 
> Well, if that's an actual problem, your patch doesn't solve it.  In both
> cases the work structure is part of the object that will be released.
> The way it should happen is that workqueues dequeue the work (so now no
> refs) and execute the callback with the data, so the callback is OK to
> free the work structure.  As long as it still does that, there's no
> problem in either case.

The workqueue code doesn't know anything about the specific work.  It
can't do that.  The work should be flushed from outside.

>>>> Compelling reason for it to exist.  Why not just use work when you
>>>> need execution context and the caller might or might not have one?
>>>
>>> Because it's completely lame to have user context and not use it.
>>
>> It may be lame but I think it's better than having an optimization
>> interface which is incomplete and, more importantly, unnecessary.
> 
> But you're thinking of it as a workqueue issue ... it isn't, it's an API
> which says "just make sure I have user context".  The workqueue is just
> the implementation detail.

Sure, yes, I understand what you're saying, but like everything else
it is a matter of tradeoff.

* It currently is incomplete in that it doesn't have a proper
  synchronization interface, and it isn't true that it's a simple
  interface which doesn't require synchronization.  No, it's not any
  simpler than directly using a workqueue.  How could it be?  It
  conditionally uses workqueue.  It needs the same level of
  synchronization.

* Which is all fine and dandy if it's something which brings actual
  benefits other than the perceived conceptual difference or avoidance
  of the feeling of lameness, but it doesn't.  At least not in the
  current users.  Even if you simply schedule work for the current
  users, nobody would notice.

* It existed for quite some time but failed to grow any new user.  It
  may be conceptually different but apparently there aren't many
  people looking for it.

The logical conclusion is to remove it and conver to work items in the
current few users and use the provided synchronization constructs to
fix the unlikely but still existing race condition.

>>> I really don't think the open coding is a good idea.  It's complex and
>>> error prone; exactly the type of thing that should be in an API.
>>
>> Yeah, just schedule work like everyone else.
> 
> As I said: the required open coding then becomes error prone.

No, don't open code atomicity test.  That's an unnecessary
optimization.  Schedule work directly whether you have context or not.
It just doesn't matter and isn't more complex than the current code.

One way or the other, the current code is racy.  The module can go
away while the work is still running.  We'll have to add sync
interface for ew's, which conceptually is fine but is unnecessary with
the current code base.  Let's do it when it actually is necessary.

I can refresh the patchset so that the relevant works are properly
flushed before the release of the relevant data structures.  Would
that be agreeable?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 15:47                 ` Tejun Heo
@ 2010-12-15 15:54                   ` James Bottomley
  2010-12-15 16:00                     ` Tejun Heo
  0 siblings, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-15 15:54 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Wed, 2010-12-15 at 16:47 +0100, Tejun Heo wrote:
> One way or the other, the current code is racy.  The module can go
> away while the work is still running.  We'll have to add sync
> interface for ew's, which conceptually is fine but is unnecessary with
> the current code base.  Let's do it when it actually is necessary.

OK, ignoring the bickering over API, this is what I don't get.

The executed function releases the parent reference as its last call.
That will cause the freeing of the embedded work item and a cascade
release of all the parents.  If there's no more references, that will
result in a final put of the module semaphore and rmmod will then
proceed.  What is racy about that?  All the work structures and
references have been freed before the module gets removed.  Nothing
blocks the execution thread in the function, so it exits long before the
code path gets zeroed.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 15:54                   ` James Bottomley
@ 2010-12-15 16:00                     ` Tejun Heo
  2010-12-15 17:22                       ` James Bottomley
  0 siblings, 1 reply; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 16:00 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello,

On 12/15/2010 04:54 PM, James Bottomley wrote:
> On Wed, 2010-12-15 at 16:47 +0100, Tejun Heo wrote:
>> One way or the other, the current code is racy.  The module can go
>> away while the work is still running.  We'll have to add sync
>> interface for ew's, which conceptually is fine but is unnecessary with
>> the current code base.  Let's do it when it actually is necessary.
> 
> OK, ignoring the bickering over API, this is what I don't get.
> 
> The executed function releases the parent reference as its last call.
> That will cause the freeing of the embedded work item and a cascade
> release of all the parents.  If there's no more references, that will
> result in a final put of the module semaphore and rmmod will then
> proceed.  What is racy about that?  All the work structures and
> references have been freed before the module gets removed.  Nothing
> blocks the execution thread in the function, so it exits long before the
> code path gets zeroed.

Because the final put and return aren't atomic against module
unloading.  The worker can get preempted inbetween and the module can
be unloaded beneath it.  When the worker is scheduled back, its text,
which was inside the module, is gone.

To make that working, it either has to do the final put from the code
outside of the module (in another module or built-in) or the module
unloading should guarantee that the work item has finished executing
before proceeding with unload, which can only be done by flushing it
from outside the work itself.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 16:00                     ` Tejun Heo
@ 2010-12-15 17:22                       ` James Bottomley
  2010-12-15 19:05                         ` Tejun Heo
  0 siblings, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-15 17:22 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Wed, 2010-12-15 at 17:00 +0100, Tejun Heo wrote:
> Hello,
> 
> On 12/15/2010 04:54 PM, James Bottomley wrote:
> > On Wed, 2010-12-15 at 16:47 +0100, Tejun Heo wrote:
> >> One way or the other, the current code is racy.  The module can go
> >> away while the work is still running.  We'll have to add sync
> >> interface for ew's, which conceptually is fine but is unnecessary with
> >> the current code base.  Let's do it when it actually is necessary.
> > 
> > OK, ignoring the bickering over API, this is what I don't get.
> > 
> > The executed function releases the parent reference as its last call.
> > That will cause the freeing of the embedded work item and a cascade
> > release of all the parents.  If there's no more references, that will
> > result in a final put of the module semaphore and rmmod will then
> > proceed.  What is racy about that?  All the work structures and
> > references have been freed before the module gets removed.  Nothing
> > blocks the execution thread in the function, so it exits long before the
> > code path gets zeroed.
> 
> Because the final put and return aren't atomic against module
> unloading.  The worker can get preempted inbetween and the module can
> be unloaded beneath it.  When the worker is scheduled back, its text,
> which was inside the module, is gone.
> 
> To make that working, it either has to do the final put from the code
> outside of the module (in another module or built-in) or the module
> unloading should guarantee that the work item has finished executing
> before proceeding with unload, which can only be done by flushing it
> from outside the work itself.

Hmm, I suppose the original coding didn't contemplate pre-emption.  This
should fix it then, I think (with no alteration to the callsites because
of the encapsulating API).  It does assume the function being executed
is local to the file doing the execution, which is true in all current
cases.

James

---

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0c0771f..1ebe4a1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -10,6 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/lockdep.h>
 #include <linux/threads.h>
+#include <linux/module.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -101,6 +102,8 @@ static inline struct delayed_work *to_delayed_work(struct work_struct *work)
 
 struct execute_work {
 	struct work_struct work;
+	work_func_t fn;
+	struct module *module;
 };
 
 #ifdef CONFIG_LOCKDEP
@@ -353,7 +356,11 @@ extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
-int execute_in_process_context(work_func_t fn, struct execute_work *);
+int __execute_in_process_context(work_func_t fn, struct execute_work *,
+				 struct module *);
+
+#define execute_in_process_context(fn, ew) \
+	__execute_in_process_context(fn, ew, THIS_MODULE)
 
 extern bool flush_work(struct work_struct *work);
 extern bool flush_work_sync(struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e785b0f..8f5d111 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2727,6 +2727,15 @@ void flush_scheduled_work(void)
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
+/* Wrapper to release the module in pinned kernel space */
+static void __execute_in_process_context_fn(struct work_struct *work)
+{
+	struct execute_work *ew = container_of(work, struct execute_work, work);
+
+	ew->fn(&ew->work);
+	module_put(ew->module);
+}
+
 /**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:		the function to execute
@@ -2739,19 +2748,25 @@ EXPORT_SYMBOL(flush_scheduled_work);
  * Returns:	0 - function was executed
  *		1 - function was scheduled for execution
  */
-int execute_in_process_context(work_func_t fn, struct execute_work *ew)
+int __execute_in_process_context(work_func_t fn, struct execute_work *ew,
+				 struct module *module)
 {
 	if (!in_interrupt()) {
 		fn(&ew->work);
 		return 0;
 	}
 
-	INIT_WORK(&ew->work, fn);
+	/* This would mean the module is already dying and the function
+	 * must be completely unsafe to execute */
+	BUG_ON(!try_module_get(module));
+	ew->fn = fn;
+	ew->module = module;
+	INIT_WORK(&ew->work, __execute_in_process_context_fn);
 	schedule_work(&ew->work);
 
 	return 1;
 }
-EXPORT_SYMBOL_GPL(execute_in_process_context);
+EXPORT_SYMBOL_GPL(__execute_in_process_context);
 
 int keventd_up(void)
 {



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 17:22                       ` James Bottomley
@ 2010-12-15 19:05                         ` Tejun Heo
  2010-12-15 19:10                           ` James Bottomley
  0 siblings, 1 reply; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 19:05 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hey, James.

On 12/15/2010 06:22 PM, James Bottomley wrote:
> Hmm, I suppose the original coding didn't contemplate pre-emption.  This
> should fix it then, I think (with no alteration to the callsites because
> of the encapsulating API).  It does assume the function being executed
> is local to the file doing the execution, which is true in all current
> cases.

Yes, it would do, but we were already too far with the existing
implementation and I don't agree we need more when replacing it with
usual workqueue usage would remove the issue.  So, when we actually
need them, let's consider that or any other way to do it, please.
A core API with only a few users which can be easily replaced isn't
really worth keeping around.  Wouldn't you agree?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:05                         ` Tejun Heo
@ 2010-12-15 19:10                           ` James Bottomley
  2010-12-15 19:19                             ` Tejun Heo
  0 siblings, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-15 19:10 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Wed, 2010-12-15 at 20:05 +0100, Tejun Heo wrote:
> Hey, James.
> 
> On 12/15/2010 06:22 PM, James Bottomley wrote:
> > Hmm, I suppose the original coding didn't contemplate pre-emption.  This
> > should fix it then, I think (with no alteration to the callsites because
> > of the encapsulating API).  It does assume the function being executed
> > is local to the file doing the execution, which is true in all current
> > cases.
> 
> Yes, it would do, but we were already too far with the existing
> implementation and I don't agree we need more when replacing it with
> usual workqueue usage would remove the issue.  So, when we actually
> need them, let's consider that or any other way to do it, please.
> A core API with only a few users which can be easily replaced isn't
> really worth keeping around.  Wouldn't you agree?

Not really ... since the fix is small and obvious.  Plus now it can't be
moved into SCSI because I need the unremovable call chain.

Show me how you propose to fix it differently first, since we both agree
the initial attempt doesn't work, and we can take the discussion from
there.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:10                           ` James Bottomley
@ 2010-12-15 19:19                             ` Tejun Heo
  2010-12-15 19:33                               ` James Bottomley
  2010-12-15 19:34                               ` Tejun Heo
  0 siblings, 2 replies; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 19:19 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello,

On 12/15/2010 08:10 PM, James Bottomley wrote:
>> Yes, it would do, but we were already too far with the existing
>> implementation and I don't agree we need more when replacing it with
>> usual workqueue usage would remove the issue.  So, when we actually
>> need them, let's consider that or any other way to do it, please.
>> A core API with only a few users which can be easily replaced isn't
>> really worth keeping around.  Wouldn't you agree?
> 
> Not really ... since the fix is small and obvious.

IMHO, it's a bit too subtle to be a good API.  The callee is called
under different (locking) context depending on the callsite and I've
been already bitten enough times from implicit THIS_MODULEs.  Both
properties increase possbility of introducing problems which can be
quite difficult to detect and reproduce.

> Plus now it can't be moved into SCSI because I need the unremovable
> call chain.

Yes, with the proposed change, it cannot be moved to SCSI.

> Show me how you propose to fix it differently first, since we both agree
> the initial attempt doesn't work, and we can take the discussion from
> there.

Given that the structures containing the work items are dynamically
allocated, I would introduce a scsi_wq, unconditionally schedule
release works on them and flush them before unloading.  Please note
that workqueues no longer require dedicated threads, so it's quite
cheap.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:19                             ` Tejun Heo
@ 2010-12-15 19:33                               ` James Bottomley
  2010-12-15 19:42                                 ` Tejun Heo
  2010-12-15 19:34                               ` Tejun Heo
  1 sibling, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-15 19:33 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Wed, 2010-12-15 at 20:19 +0100, Tejun Heo wrote:
> Hello,
> 
> On 12/15/2010 08:10 PM, James Bottomley wrote:
> >> Yes, it would do, but we were already too far with the existing
> >> implementation and I don't agree we need more when replacing it with
> >> usual workqueue usage would remove the issue.  So, when we actually
> >> need them, let's consider that or any other way to do it, please.
> >> A core API with only a few users which can be easily replaced isn't
> >> really worth keeping around.  Wouldn't you agree?
> > 
> > Not really ... since the fix is small and obvious.
> 
> IMHO, it's a bit too subtle to be a good API.  The callee is called
> under different (locking) context depending on the callsite and I've
> been already bitten enough times from implicit THIS_MODULEs.  Both
> properties increase possbility of introducing problems which can be
> quite difficult to detect and reproduce.

Both have subtleties ... see below.

> > Plus now it can't be moved into SCSI because I need the unremovable
> > call chain.
> 
> Yes, with the proposed change, it cannot be moved to SCSI.
> 
> > Show me how you propose to fix it differently first, since we both agree
> > the initial attempt doesn't work, and we can take the discussion from
> > there.
> 
> Given that the structures containing the work items are dynamically
> allocated, I would introduce a scsi_wq, unconditionally schedule
> release works on them and flush them before unloading.  Please note
> that workqueues no longer require dedicated threads, so it's quite
> cheap.

A single flush won't quite work.  The target is a parent of the device,
both of which release methods have execute_in_process_context()
requirements.  What can happen here is that the last put of the device
will release the target (from the function).  If both are moved to
workqueues, a single flush could cause the execution of the device work,
which then queues up target work (and makes it still pending).  A double
flush will solve this (because I think our nesting level doesn't go
beyond 2) but it's a bit ugly ...

execute_in_process_context() doesn't have this problem because the first
call automatically executes the second inline (because it now has
context).

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:19                             ` Tejun Heo
  2010-12-15 19:33                               ` James Bottomley
@ 2010-12-15 19:34                               ` Tejun Heo
  1 sibling, 0 replies; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 19:34 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On 12/15/2010 08:19 PM, Tejun Heo wrote:
> Given that the structures containing the work items are dynamically
> allocated, I would introduce a scsi_wq, unconditionally schedule
> release works on them and flush them before unloading.  Please note
> that workqueues no longer require dedicated threads, so it's quite
> cheap.

So, something like the following.

---
 drivers/scsi/scsi.c        |   15 +++++++++++++--
 drivers/scsi/scsi_scan.c   |   26 +++++++++++++-------------
 drivers/scsi/scsi_sysfs.c  |    8 +++++---
 include/scsi/scsi_device.h |    6 ++++--
 4 files changed, 35 insertions(+), 20 deletions(-)

Index: work/drivers/scsi/scsi_scan.c
===================================================================
--- work.orig/drivers/scsi/scsi_scan.c
+++ work/drivers/scsi/scsi_scan.c
@@ -362,6 +362,16 @@ int scsi_is_target_device(const struct d
 }
 EXPORT_SYMBOL(scsi_is_target_device);

+static void scsi_target_reap_usercontext(struct work_struct *work)
+{
+	struct scsi_target *starget =
+		container_of(work, struct scsi_target, reap_work);
+
+	transport_remove_device(&starget->dev);
+	device_del(&starget->dev);
+	scsi_target_destroy(starget);
+}
+
 static struct scsi_target *__scsi_find_target(struct device *parent,
 					      int channel, uint id)
 {
@@ -427,6 +437,7 @@ static struct scsi_target *scsi_alloc_ta
 	starget->state = STARGET_CREATED;
 	starget->scsi_level = SCSI_2;
 	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
+	INIT_WORK(&starget->reap_work, scsi_target_reap_usercontext);
  retry:
 	spin_lock_irqsave(shost->host_lock, flags);

@@ -462,21 +473,11 @@ static struct scsi_target *scsi_alloc_ta
 	}
 	/* Unfortunately, we found a dying target; need to
 	 * wait until it's dead before we can get a new one */
+	flush_work(&found_target->reap_work);
 	put_device(&found_target->dev);
-	flush_scheduled_work();
 	goto retry;
 }

-static void scsi_target_reap_usercontext(struct work_struct *work)
-{
-	struct scsi_target *starget =
-		container_of(work, struct scsi_target, ew.work);
-
-	transport_remove_device(&starget->dev);
-	device_del(&starget->dev);
-	scsi_target_destroy(starget);
-}
-
 /**
  * scsi_target_reap - check to see if target is in use and destroy if not
  * @starget: target to be checked
@@ -507,8 +508,7 @@ void scsi_target_reap(struct scsi_target
 	if (state == STARGET_CREATED)
 		scsi_target_destroy(starget);
 	else
-		execute_in_process_context(scsi_target_reap_usercontext,
-					   &starget->ew);
+		queue_work(scsi_wq, &starget->reap_work);
 }

 /**
Index: work/drivers/scsi/scsi_sysfs.c
===================================================================
--- work.orig/drivers/scsi/scsi_sysfs.c
+++ work/drivers/scsi/scsi_sysfs.c
@@ -300,7 +300,7 @@ static void scsi_device_dev_release_user
 	struct list_head *this, *tmp;
 	unsigned long flags;

-	sdev = container_of(work, struct scsi_device, ew.work);
+	sdev = container_of(work, struct scsi_device, release_work);

 	parent = sdev->sdev_gendev.parent;
 	starget = to_scsi_target(parent);
@@ -343,8 +343,8 @@ static void scsi_device_dev_release_user
 static void scsi_device_dev_release(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	execute_in_process_context(scsi_device_dev_release_usercontext,
-				   &sdp->ew);
+
+	queue_work(scsi_wq, &sdp->release_work);
 }

 static struct class sdev_class = {
@@ -1069,6 +1069,8 @@ void scsi_sysfs_device_initialize(struct
 	dev_set_name(&sdev->sdev_dev, "%d:%d:%d:%d",
 		     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
 	sdev->scsi_level = starget->scsi_level;
+	INIT_WORK(&sdev->release_work, scsi_device_dev_release_usercontext);
+
 	transport_setup_device(&sdev->sdev_gendev);
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_add_tail(&sdev->same_target_siblings, &starget->devices);
Index: work/include/scsi/scsi_device.h
===================================================================
--- work.orig/include/scsi/scsi_device.h
+++ work/include/scsi/scsi_device.h
@@ -168,7 +168,7 @@ struct scsi_device {
 	struct device		sdev_gendev,
 				sdev_dev;

-	struct execute_work	ew; /* used to get process context on put */
+	struct work_struct	release_work; /* for process context on put */

 	struct scsi_dh_data	*scsi_dh_data;
 	enum scsi_device_state sdev_state;
@@ -258,7 +258,7 @@ struct scsi_target {
 #define SCSI_DEFAULT_TARGET_BLOCKED	3

 	char			scsi_level;
-	struct execute_work	ew;
+	struct work_struct	reap_work;
 	enum scsi_target_state	state;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */
@@ -276,6 +276,8 @@ static inline struct scsi_target *scsi_t
 #define starget_printk(prefix, starget, fmt, a...)	\
 	dev_printk(prefix, &(starget)->dev, fmt, ##a)

+extern struct workqueue_struct *scsi_wq;
+
 extern struct scsi_device *__scsi_add_device(struct Scsi_Host *,
 		uint, uint, uint, void *hostdata);
 extern int scsi_add_device(struct Scsi_Host *host, uint channel,
Index: work/drivers/scsi/scsi.c
===================================================================
--- work.orig/drivers/scsi/scsi.c
+++ work/drivers/scsi/scsi.c
@@ -70,6 +70,11 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/scsi.h>

+/*
+ * Utility multithreaded workqueue for SCSI.
+ */
+struct workqueue_struct *scsi_wq;
+
 static void scsi_done(struct scsi_cmnd *cmd);

 /*
@@ -1306,11 +1311,14 @@ MODULE_PARM_DESC(scsi_logging_level, "a

 static int __init init_scsi(void)
 {
-	int error;
+	int error = -ENOMEM;

+	scsi_wq = alloc_workqueue("scsi", 0, 0);
+	if (!scsi_wq)
+		return error;
 	error = scsi_init_queue();
 	if (error)
-		return error;
+		goto cleanup_wq;
 	error = scsi_init_procfs();
 	if (error)
 		goto cleanup_queue;
@@ -1342,6 +1350,8 @@ cleanup_procfs:
 	scsi_exit_procfs();
 cleanup_queue:
 	scsi_exit_queue();
+cleanup_wq:
+	destroy_workqueue(scsi_wq);
 	printk(KERN_ERR "SCSI subsystem failed to initialize, error = %d\n",
 	       -error);
 	return error;
@@ -1356,6 +1366,7 @@ static void __exit exit_scsi(void)
 	scsi_exit_devinfo();
 	scsi_exit_procfs();
 	scsi_exit_queue();
+	destroy_workqueue(scsi_wq);
 }

 subsys_initcall(init_scsi);

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:33                               ` James Bottomley
@ 2010-12-15 19:42                                 ` Tejun Heo
  2010-12-15 19:46                                   ` Tejun Heo
  2010-12-16 14:39                                   ` James Bottomley
  0 siblings, 2 replies; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 19:42 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On 12/15/2010 08:33 PM, James Bottomley wrote:
> A single flush won't quite work.  The target is a parent of the device,
> both of which release methods have execute_in_process_context()
> requirements.  What can happen here is that the last put of the device
> will release the target (from the function).  If both are moved to
> workqueues, a single flush could cause the execution of the device work,
> which then queues up target work (and makes it still pending).  A double
> flush will solve this (because I think our nesting level doesn't go
> beyond 2) but it's a bit ugly ...

Yeap, that's an interesting point actually.  I just sent the patch
butn there is no explicit flush.  It's implied by destroy_work() and
it has been a bit bothering that destroy_work() could exit with
pending works if execution of the current one produces more.  I was
pondering making destroy_workqueue() actually drain all the scheduled
works and maybe trigger a warning if it seems to loop for too long.

But, anyways, I don't think that's gonna happen here.  If the last put
hasn't been executed the module reference wouldn't be zero, so module
unload can't initiate, right?

> execute_in_process_context() doesn't have this problem because the first
> call automatically executes the second inline (because it now has
> context).

Yes, it wouldn't have that problem but it becomes subtle to high
heavens.

I don't think the queue destroyed with pending works problem exists
here because of the module refcnts but I could be mistaken.  Either
way, I'll fix destroy_workqueue() such that it actually drains the
workqueue before destruction, which actually seems like the right
thing to do so that scsi doesn't have to worry about double flushing
or whatnot.  How does that sound?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:42                                 ` Tejun Heo
@ 2010-12-15 19:46                                   ` Tejun Heo
  2010-12-16 14:39                                   ` James Bottomley
  1 sibling, 0 replies; 25+ messages in thread
From: Tejun Heo @ 2010-12-15 19:46 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On 12/15/2010 08:42 PM, Tejun Heo wrote:
> Yeap, that's an interesting point actually.  I just sent the patch

Hmm... I remember sending but don't see it anywhere.  Where did it go?
Anyways, here it is again.

---
 drivers/scsi/scsi.c        |   15 +++++++++++++--
 drivers/scsi/scsi_scan.c   |   26 +++++++++++++-------------
 drivers/scsi/scsi_sysfs.c  |    8 +++++---
 include/scsi/scsi_device.h |    6 ++++--
 4 files changed, 35 insertions(+), 20 deletions(-)

Index: work/drivers/scsi/scsi_scan.c
===================================================================
--- work.orig/drivers/scsi/scsi_scan.c
+++ work/drivers/scsi/scsi_scan.c
@@ -362,6 +362,16 @@ int scsi_is_target_device(const struct d
 }
 EXPORT_SYMBOL(scsi_is_target_device);

+static void scsi_target_reap_usercontext(struct work_struct *work)
+{
+	struct scsi_target *starget =
+		container_of(work, struct scsi_target, reap_work);
+
+	transport_remove_device(&starget->dev);
+	device_del(&starget->dev);
+	scsi_target_destroy(starget);
+}
+
 static struct scsi_target *__scsi_find_target(struct device *parent,
 					      int channel, uint id)
 {
@@ -427,6 +437,7 @@ static struct scsi_target *scsi_alloc_ta
 	starget->state = STARGET_CREATED;
 	starget->scsi_level = SCSI_2;
 	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
+	INIT_WORK(&starget->reap_work, scsi_target_reap_usercontext);
  retry:
 	spin_lock_irqsave(shost->host_lock, flags);

@@ -462,21 +473,11 @@ static struct scsi_target *scsi_alloc_ta
 	}
 	/* Unfortunately, we found a dying target; need to
 	 * wait until it's dead before we can get a new one */
+	flush_work(&found_target->reap_work);
 	put_device(&found_target->dev);
-	flush_scheduled_work();
 	goto retry;
 }

-static void scsi_target_reap_usercontext(struct work_struct *work)
-{
-	struct scsi_target *starget =
-		container_of(work, struct scsi_target, ew.work);
-
-	transport_remove_device(&starget->dev);
-	device_del(&starget->dev);
-	scsi_target_destroy(starget);
-}
-
 /**
  * scsi_target_reap - check to see if target is in use and destroy if not
  * @starget: target to be checked
@@ -507,8 +508,7 @@ void scsi_target_reap(struct scsi_target
 	if (state == STARGET_CREATED)
 		scsi_target_destroy(starget);
 	else
-		execute_in_process_context(scsi_target_reap_usercontext,
-					   &starget->ew);
+		queue_work(scsi_wq, &starget->reap_work);
 }

 /**
Index: work/drivers/scsi/scsi_sysfs.c
===================================================================
--- work.orig/drivers/scsi/scsi_sysfs.c
+++ work/drivers/scsi/scsi_sysfs.c
@@ -300,7 +300,7 @@ static void scsi_device_dev_release_user
 	struct list_head *this, *tmp;
 	unsigned long flags;

-	sdev = container_of(work, struct scsi_device, ew.work);
+	sdev = container_of(work, struct scsi_device, release_work);

 	parent = sdev->sdev_gendev.parent;
 	starget = to_scsi_target(parent);
@@ -343,8 +343,8 @@ static void scsi_device_dev_release_user
 static void scsi_device_dev_release(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	execute_in_process_context(scsi_device_dev_release_usercontext,
-				   &sdp->ew);
+
+	queue_work(scsi_wq, &sdp->release_work);
 }

 static struct class sdev_class = {
@@ -1069,6 +1069,8 @@ void scsi_sysfs_device_initialize(struct
 	dev_set_name(&sdev->sdev_dev, "%d:%d:%d:%d",
 		     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
 	sdev->scsi_level = starget->scsi_level;
+	INIT_WORK(&sdev->release_work, scsi_device_dev_release_usercontext);
+
 	transport_setup_device(&sdev->sdev_gendev);
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_add_tail(&sdev->same_target_siblings, &starget->devices);
Index: work/include/scsi/scsi_device.h
===================================================================
--- work.orig/include/scsi/scsi_device.h
+++ work/include/scsi/scsi_device.h
@@ -168,7 +168,7 @@ struct scsi_device {
 	struct device		sdev_gendev,
 				sdev_dev;

-	struct execute_work	ew; /* used to get process context on put */
+	struct work_struct	release_work; /* for process context on put */

 	struct scsi_dh_data	*scsi_dh_data;
 	enum scsi_device_state sdev_state;
@@ -258,7 +258,7 @@ struct scsi_target {
 #define SCSI_DEFAULT_TARGET_BLOCKED	3

 	char			scsi_level;
-	struct execute_work	ew;
+	struct work_struct	reap_work;
 	enum scsi_target_state	state;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */
@@ -276,6 +276,8 @@ static inline struct scsi_target *scsi_t
 #define starget_printk(prefix, starget, fmt, a...)	\
 	dev_printk(prefix, &(starget)->dev, fmt, ##a)

+extern struct workqueue_struct *scsi_wq;
+
 extern struct scsi_device *__scsi_add_device(struct Scsi_Host *,
 		uint, uint, uint, void *hostdata);
 extern int scsi_add_device(struct Scsi_Host *host, uint channel,
Index: work/drivers/scsi/scsi.c
===================================================================
--- work.orig/drivers/scsi/scsi.c
+++ work/drivers/scsi/scsi.c
@@ -70,6 +70,11 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/scsi.h>

+/*
+ * Utility multithreaded workqueue for SCSI.
+ */
+struct workqueue_struct *scsi_wq;
+
 static void scsi_done(struct scsi_cmnd *cmd);

 /*
@@ -1306,11 +1311,14 @@ MODULE_PARM_DESC(scsi_logging_level, "a

 static int __init init_scsi(void)
 {
-	int error;
+	int error = -ENOMEM;

+	scsi_wq = alloc_workqueue("scsi", 0, 0);
+	if (!scsi_wq)
+		return error;
 	error = scsi_init_queue();
 	if (error)
-		return error;
+		goto cleanup_wq;
 	error = scsi_init_procfs();
 	if (error)
 		goto cleanup_queue;
@@ -1342,6 +1350,8 @@ cleanup_procfs:
 	scsi_exit_procfs();
 cleanup_queue:
 	scsi_exit_queue();
+cleanup_wq:
+	destroy_workqueue(scsi_wq);
 	printk(KERN_ERR "SCSI subsystem failed to initialize, error = %d\n",
 	       -error);
 	return error;
@@ -1356,6 +1366,7 @@ static void __exit exit_scsi(void)
 	scsi_exit_devinfo();
 	scsi_exit_procfs();
 	scsi_exit_queue();
+	destroy_workqueue(scsi_wq);
 }

 subsys_initcall(init_scsi);

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-15 19:42                                 ` Tejun Heo
  2010-12-15 19:46                                   ` Tejun Heo
@ 2010-12-16 14:39                                   ` James Bottomley
  2010-12-16 15:51                                     ` Tejun Heo
  1 sibling, 1 reply; 25+ messages in thread
From: James Bottomley @ 2010-12-16 14:39 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

On Wed, 2010-12-15 at 20:42 +0100, Tejun Heo wrote:
> On 12/15/2010 08:33 PM, James Bottomley wrote:
> > A single flush won't quite work.  The target is a parent of the device,
> > both of which release methods have execute_in_process_context()
> > requirements.  What can happen here is that the last put of the device
> > will release the target (from the function).  If both are moved to
> > workqueues, a single flush could cause the execution of the device work,
> > which then queues up target work (and makes it still pending).  A double
> > flush will solve this (because I think our nesting level doesn't go
> > beyond 2) but it's a bit ugly ...
> 
> Yeap, that's an interesting point actually.  I just sent the patch
> butn there is no explicit flush.  It's implied by destroy_work() and
> it has been a bit bothering that destroy_work() could exit with
> pending works if execution of the current one produces more.  I was
> pondering making destroy_workqueue() actually drain all the scheduled
> works and maybe trigger a warning if it seems to loop for too long.
> 
> But, anyways, I don't think that's gonna happen here.  If the last put
> hasn't been executed the module reference wouldn't be zero, so module
> unload can't initiate, right?

Wrong I'm afraid.  There's a nasty two level complexity in module
references:  Anything which takes an external reference (like open or
mount) does indeed take the module reference and prevent removal.
Anything that takes an internal reference doesn't ... we wait for all of
them to come back in the final removal of the bus type.  The is to
prevent a module removal deadlock.  The callbacks are internal
references, so we wait for them in module_exit() but don't block
module_exit() from being called ... meaning the double callback scenario
could be outstanding.

James



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] scsi: don't use execute_in_process_context()
  2010-12-16 14:39                                   ` James Bottomley
@ 2010-12-16 15:51                                     ` Tejun Heo
  0 siblings, 0 replies; 25+ messages in thread
From: Tejun Heo @ 2010-12-16 15:51 UTC (permalink / raw)
  To: James Bottomley; +Cc: Linux SCSI List, FUJITA Tomonori, lkml

Hello, James.

On 12/16/2010 03:39 PM, James Bottomley wrote:
>> But, anyways, I don't think that's gonna happen here.  If the last put
>> hasn't been executed the module reference wouldn't be zero, so module
>> unload can't initiate, right?
> 
> Wrong I'm afraid.  There's a nasty two level complexity in module
> references:  Anything which takes an external reference (like open or
> mount) does indeed take the module reference and prevent removal.
> Anything that takes an internal reference doesn't ... we wait for all of
> them to come back in the final removal of the bus type.  The is to
> prevent a module removal deadlock.  The callbacks are internal
> references, so we wait for them in module_exit() but don't block
> module_exit() from being called ... meaning the double callback scenario
> could be outstanding.

Okay, so something like the following should solve the problem.  Once
destroy_workqueue() is called, queueing is allowed from only the same
workqueue and flush is repeated until the queue is drained, which
seems to be the right thing to do on destruction regardless of the
SCSI changes.

With the following change, the previous patch should do fine for SCSI
and the ew can be removed.  Please note that the following patch is
still only compile tested.

Are we in agreement now?

Thanks.

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90db1bd..8dd0b80 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -932,6 +932,32 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 		wake_up_worker(gcwq);
 }

+/*
+ * Test whether @work is being queued from another work executing on the
+ * same workqueue.
+ */
+static bool is_chained_work(struct work_struct *work)
+{
+	struct workqueue_struct *wq = get_work_cwq(work)->wq;
+	unsigned long flags;
+	unsigned int cpu;
+
+	for_each_gcwq_cpu(cpu) {
+		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct worker *worker;
+
+		spin_lock_irqsave(&gcwq->lock, flags);
+		worker = find_worker_executing_work(gcwq, work);
+		if (worker && worker->task == current &&
+		    worker->current_cwq->wq == wq) {
+			spin_unlock_irqrestore(&gcwq->lock, flags);
+			return true;
+		}
+		spin_unlock_irqrestore(&gcwq->lock, flags);
+	}
+	return false;
+}
+
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -943,7 +969,8 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,

 	debug_work_activate(work);

-	if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+	/* if dying, only works from the same workqueue are allowed */
+	if (WARN_ON_ONCE((wq->flags & WQ_DYING) && !is_chained_work(work)))
 		return;

 	/* determine gcwq to use */
@@ -2936,11 +2963,34 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
  */
 void destroy_workqueue(struct workqueue_struct *wq)
 {
+	unsigned int flush_cnt = 0;
 	unsigned int cpu;

+	/*
+	 * Mark @wq dying and drain all pending works.  Once WQ_DYING is
+	 * set, only chain queueing is allowed.  IOW, only currently
+	 * pending or running works on @wq can queue further works on it.
+	 * @wq is flushed repeatedly until it becomes empty.  The number of
+	 * flushing is detemined by the depth of chaining and should be
+	 * relatively short.  Whine if it takes too long.
+	 */
 	wq->flags |= WQ_DYING;
+reflush:
 	flush_workqueue(wq);

+	for_each_cwq_cpu(cpu, wq) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+		if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+			continue;
+
+		if (flush_cnt++ == 10 || flush_cnt % 100 == 0)
+			printk(KERN_WARNING "workqueue %s: flush on "
+			       "destruction isn't complete after %u tries\n",
+			       wq->name, flush_cnt);
+		goto reflush;
+	}
+
 	/*
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.

^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2010-12-16 15:51 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-19 12:57 [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg Tejun Heo
2010-10-19 12:58 ` [PATCH 2/2] scsi: don't use execute_in_process_context() Tejun Heo
2010-10-22 10:03   ` FUJITA Tomonori
2010-12-12 22:48   ` James Bottomley
2010-12-14  9:53     ` Tejun Heo
2010-12-14 14:09       ` James Bottomley
2010-12-14 14:19         ` Tejun Heo
2010-12-14 14:26           ` James Bottomley
2010-12-14 14:33             ` Tejun Heo
2010-12-15  3:04               ` James Bottomley
2010-12-15 15:47                 ` Tejun Heo
2010-12-15 15:54                   ` James Bottomley
2010-12-15 16:00                     ` Tejun Heo
2010-12-15 17:22                       ` James Bottomley
2010-12-15 19:05                         ` Tejun Heo
2010-12-15 19:10                           ` James Bottomley
2010-12-15 19:19                             ` Tejun Heo
2010-12-15 19:33                               ` James Bottomley
2010-12-15 19:42                                 ` Tejun Heo
2010-12-15 19:46                                   ` Tejun Heo
2010-12-16 14:39                                   ` James Bottomley
2010-12-16 15:51                                     ` Tejun Heo
2010-12-15 19:34                               ` Tejun Heo
2010-10-20 14:29 ` [PATCH 1/2] scsi: remove bogus use of struct execute_work in sg FUJITA Tomonori
2010-10-20 19:56 ` Douglas Gilbert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.