All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/2] target refcounting infrastructure fixes for usb
@ 2014-01-03  0:36 James Bottomley
       [not found] ` <1388709362.2399.31.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: James Bottomley @ 2014-01-03  0:36 UTC (permalink / raw)
  To: linux-scsi; +Cc: Alan Stern, USB list

This set should fix our target problems with USB by making the target
visibility properly reference counted.  Since it's a major change to the
infrastructure, we'll incubate upstream first before backporting to
stable.

James Bottomley (2):
  [SCSI] fix our current target reap infrastructure.
  [SCSI] dual scan thread bug fix

 drivers/scsi/scsi_scan.c   | 108 +++++++++++++++++++++++++++++----------------
 drivers/scsi/scsi_sysfs.c  |  20 ++++++---
 include/scsi/scsi_device.h |   3 +-
 3 files changed, 84 insertions(+), 47 deletions(-)

James


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC 1/2] fix our current target reap infrastructure
       [not found] ` <1388709362.2399.31.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
@ 2014-01-03  0:38   ` James Bottomley
  2014-01-03  0:38   ` [RFC 2/2] dual scan thread bug fix James Bottomley
  1 sibling, 0 replies; 12+ messages in thread
From: James Bottomley @ 2014-01-03  0:38 UTC (permalink / raw)
  To: linux-scsi; +Cc: Alan Stern, USB list

This patch eliminates the reap_ref and replaces it with a proper kref.
On last put of this kref, the target is removed from visibility in
sysfs.  The final call to scsi_target_reap() for the device is done from
__scsi_remove_device() and only if the device was made visible.  This
ensures that the target disappears as soon as the last device is gone
rather than waiting until final release of the device (which is often
too long).

Reviewed-by: Alan Stern <stern-nwvwT67g6+6dFdvTe/nMLpVzexx5G7lz@public.gmane.org>
Signed-off-by: James Bottomley <JBottomley-MU7nAjRaF3makBO8gow8eQ@public.gmane.org>
---
 drivers/scsi/scsi_scan.c   | 95 ++++++++++++++++++++++++++++------------------
 drivers/scsi/scsi_sysfs.c  | 20 +++++++---
 include/scsi/scsi_device.h |  3 +-
 3 files changed, 73 insertions(+), 45 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 307a811..ef3f958 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -371,6 +371,31 @@ static struct scsi_target *__scsi_find_target(struct device *parent,
 }
 
 /**
+ * scsi_target_reap_ref_release - remove target from visibility
+ * @kref: the reap_ref in the target being released
+ *
+ * Called on last put of reap_ref, which is the indication that no device
+ * under this target is visible anymore, so render the target invisible in
+ * sysfs.  Note: we have to be in user context here because the target reaps
+ * should be done in places where the scsi device visibility is being removed.
+ */
+static void scsi_target_reap_ref_release(struct kref *kref)
+{
+	struct scsi_target *starget
+		= container_of(kref, struct scsi_target, reap_ref);
+
+	transport_remove_device(&starget->dev);
+	device_del(&starget->dev);
+	starget->state = STARGET_DEL;
+	scsi_target_destroy(starget);
+}
+
+static void scsi_target_reap_ref_put(struct scsi_target *starget)
+{
+	kref_put(&starget->reap_ref, scsi_target_reap_ref_release);
+}
+
+/**
  * scsi_alloc_target - allocate a new or find an existing target
  * @parent:	parent of the target (need not be a scsi host)
  * @channel:	target channel number (zero if no channels)
@@ -401,7 +426,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	}
 	dev = &starget->dev;
 	device_initialize(dev);
-	starget->reap_ref = 1;
+	kref_init(&starget->reap_ref);
 	dev->parent = get_device(parent);
 	dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id);
 	dev->bus = &scsi_bus_type;
@@ -441,29 +466,36 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	return starget;
 
  found:
-	found_target->reap_ref++;
+	if (!kref_get_unless_zero(&found_target->reap_ref))
+		/*
+		 * release routine already fired.  Target is dead, but
+		 * STARGET_DEL may not yet be set (set in the release
+		 * routine), so set here as well, just in case
+		 */
+		found_target->state = STARGET_DEL;
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	if (found_target->state != STARGET_DEL) {
 		put_device(dev);
 		return found_target;
 	}
-	/* Unfortunately, we found a dying target; need to
-	 * wait until it's dead before we can get a new one */
+	/*
+	 * Unfortunately, we found a dying target; need to wait until it's
+	 * dead before we can get a new one.  There is an anomaly here.  We
+	 * *should* call scsi_target_reap() to balance the kref_get() of the
+	 * reap_ref above.  However, since the target is in state STARGET_DEL,
+	 * it's already invisible and the reap_ref is irrelevant.  If we call
+	 * scsi_target_reap() we might spuriously do another device_del() on
+	 * an already invisible target.
+	 */
 	put_device(&found_target->dev);
-	flush_scheduled_work();
+	/*
+	 * length of time is irrelevant here, we just want to yield the CPU
+	 * for a tick to avoid busy waiting for the target to die.
+	 */
+	msleep(1);
 	goto retry;
 }
 
-static void scsi_target_reap_usercontext(struct work_struct *work)
-{
-	struct scsi_target *starget =
-		container_of(work, struct scsi_target, ew.work);
-
-	transport_remove_device(&starget->dev);
-	device_del(&starget->dev);
-	scsi_target_destroy(starget);
-}
-
 /**
  * scsi_target_reap - check to see if target is in use and destroy if not
  * @starget: target to be checked
@@ -474,28 +506,11 @@ static void scsi_target_reap_usercontext(struct work_struct *work)
  */
 void scsi_target_reap(struct scsi_target *starget)
 {
-	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
-	unsigned long flags;
-	enum scsi_target_state state;
-	int empty = 0;
-
-	spin_lock_irqsave(shost->host_lock, flags);
-	state = starget->state;
-	if (--starget->reap_ref == 0 && list_empty(&starget->devices)) {
-		empty = 1;
-		starget->state = STARGET_DEL;
-	}
-	spin_unlock_irqrestore(shost->host_lock, flags);
-
-	if (!empty)
-		return;
-
-	BUG_ON(state == STARGET_DEL);
-	if (state == STARGET_CREATED)
+	BUG_ON(starget->state == STARGET_DEL);
+	if (starget->state == STARGET_CREATED)
 		scsi_target_destroy(starget);
 	else
-		execute_in_process_context(scsi_target_reap_usercontext,
-					   &starget->ew);
+		scsi_target_reap_ref_put(starget);
 }
 
 /**
@@ -1532,6 +1547,10 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 	}
 	mutex_unlock(&shost->scan_mutex);
 	scsi_autopm_put_target(starget);
+	/*
+	 * paired with scsi_alloc_target().  Target will be destroyed unless
+	 * scsi_probe_and_add_lun made an underlying device visible
+	 */
 	scsi_target_reap(starget);
 	put_device(&starget->dev);
 
@@ -1612,8 +1631,10 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 
  out_reap:
 	scsi_autopm_put_target(starget);
-	/* now determine if the target has any children at all
-	 * and if not, nuke it */
+	/*
+	 * paired with scsi_alloc_target(): determine if the target has
+	 * any children at all and if not, nuke it
+	 */
 	scsi_target_reap(starget);
 
 	put_device(&starget->dev);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 9117d0b..665acbf 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -383,17 +383,14 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 {
 	struct scsi_device *sdev;
 	struct device *parent;
-	struct scsi_target *starget;
 	struct list_head *this, *tmp;
 	unsigned long flags;
 
 	sdev = container_of(work, struct scsi_device, ew.work);
 
 	parent = sdev->sdev_gendev.parent;
-	starget = to_scsi_target(parent);
 
 	spin_lock_irqsave(sdev->host->host_lock, flags);
-	starget->reap_ref++;
 	list_del(&sdev->siblings);
 	list_del(&sdev->same_target_siblings);
 	list_del(&sdev->starved_entry);
@@ -413,8 +410,6 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 	/* NULL queue means the device can't be used */
 	sdev->request_queue = NULL;
 
-	scsi_target_reap(scsi_target(sdev));
-
 	kfree(sdev->inquiry);
 	kfree(sdev);
 
@@ -1071,6 +1066,13 @@ void __scsi_remove_device(struct scsi_device *sdev)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
 
+	/*
+	 * Paired with the kref_get() in scsi_sysfs_initialize().  We have
+	 * removed sysfs visibility from the device, so make the target
+	 * invisible if this was the last device underneath it.
+	 */
+	scsi_target_reap(scsi_target(sdev));
+
 	put_device(dev);
 }
 
@@ -1133,7 +1135,7 @@ void scsi_remove_target(struct device *dev)
 			continue;
 		if (starget->dev.parent == dev || &starget->dev == dev) {
 			/* assuming new targets arrive at the end */
-			starget->reap_ref++;
+			kref_get(&starget->reap_ref);
 			spin_unlock_irqrestore(shost->host_lock, flags);
 			if (last)
 				scsi_target_reap(last);
@@ -1217,6 +1219,12 @@ void scsi_sysfs_device_initialize(struct scsi_device *sdev)
 	list_add_tail(&sdev->same_target_siblings, &starget->devices);
 	list_add_tail(&sdev->siblings, &shost->__devices);
 	spin_unlock_irqrestore(shost->host_lock, flags);
+	/*
+	 * device can now only be removed via __scsi_remove_device() so hold
+	 * the target.  Target will be held in CREATED state until something
+	 * beneath it becomes visible (in which case it moves to RUNNING)
+	 */
+	kref_get(&starget->reap_ref);
 }
 
 int scsi_is_sdev_device(const struct device *dev)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d65fbec..b4f1eff 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -257,7 +257,7 @@ struct scsi_target {
 	struct list_head	siblings;
 	struct list_head	devices;
 	struct device		dev;
-	unsigned int		reap_ref; /* protected by the host lock */
+	struct kref		reap_ref; /* last put renders target invisible */
 	unsigned int		channel;
 	unsigned int		id; /* target id ... replace
 				     * scsi_device.id eventually */
@@ -284,7 +284,6 @@ struct scsi_target {
 #define SCSI_DEFAULT_TARGET_BLOCKED	3
 
 	char			scsi_level;
-	struct execute_work	ew;
 	enum scsi_target_state	state;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */
-- 
1.8.4.3



--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC 2/2] dual scan thread bug fix
       [not found] ` <1388709362.2399.31.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
  2014-01-03  0:38   ` [RFC 1/2] fix our current target reap infrastructure James Bottomley
@ 2014-01-03  0:38   ` James Bottomley
       [not found]     ` <1388709533.2399.34.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 12+ messages in thread
From: James Bottomley @ 2014-01-03  0:38 UTC (permalink / raw)
  To: linux-scsi; +Cc: Alan Stern, USB list

In the highly unusual case where two threads are running concurrently through
the scanning code scanning the same target, we run into the situation where
one may allocate the target while the other is still using it.  In this case,
because the reap checks for STARGET_CREATED and kills the target without
reference counting, the second thread will do the wrong thing on reap.

Fix this by reference counting even creates and doing the STARGET_CREATED
check in the final put.

Signed-off-by: James Bottomley <JBottomley-MU7nAjRaF3makBO8gow8eQ@public.gmane.org>
---
 drivers/scsi/scsi_scan.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index ef3f958..82cf902 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -320,6 +320,7 @@ static void scsi_target_destroy(struct scsi_target *starget)
 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
 	unsigned long flags;
 
+	starget->state = STARGET_DEL;
 	transport_destroy_device(dev);
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->hostt->target_destroy)
@@ -384,9 +385,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
 	struct scsi_target *starget
 		= container_of(kref, struct scsi_target, reap_ref);
 
-	transport_remove_device(&starget->dev);
-	device_del(&starget->dev);
-	starget->state = STARGET_DEL;
+	/*
+	 * if we get here and the target is still in the CREATED state that
+	 * means it was allocated but never made visible (because a scan
+	 * turned up no LUNs), so don't call device_del() on it.
+	 */
+	if (starget->state == STARGET_RUNNING) {
+		transport_remove_device(&starget->dev);
+		device_del(&starget->dev);
+	}
 	scsi_target_destroy(starget);
 }
 
@@ -506,11 +513,13 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
  */
 void scsi_target_reap(struct scsi_target *starget)
 {
+	/*
+	 * serious problem if this triggers: STARGET_DEL is only set in the
+	 * kref release routine, so we're doing another final put on an
+	 * already released kref
+	 */
 	BUG_ON(starget->state == STARGET_DEL);
-	if (starget->state == STARGET_CREATED)
-		scsi_target_destroy(starget);
-	else
-		scsi_target_reap_ref_put(starget);
+	scsi_target_reap_ref_put(starget);
 }
 
 /**
-- 
1.8.4.3



--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] dual scan thread bug fix
       [not found]     ` <1388709533.2399.34.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
@ 2014-01-03 15:58       ` Alan Stern
  2014-01-04  5:41         ` James Bottomley
  0 siblings, 1 reply; 12+ messages in thread
From: Alan Stern @ 2014-01-03 15:58 UTC (permalink / raw)
  To: James Bottomley; +Cc: linux-scsi, USB list

On Thu, 2 Jan 2014, James Bottomley wrote:

> In the highly unusual case where two threads are running concurrently through
> the scanning code scanning the same target, we run into the situation where
> one may allocate the target while the other is still using it.  In this case,
> because the reap checks for STARGET_CREATED and kills the target without
> reference counting, the second thread will do the wrong thing on reap.
> 
> Fix this by reference counting even creates and doing the STARGET_CREATED
> check in the final put.

I'm still concerned about one thing.  The previous patch does this in
scsi_alloc_target():

>   found:
> -	found_target->reap_ref++;
> +	if (!kref_get_unless_zero(&found_target->reap_ref))
> +		/*
> +		 * release routine already fired.  Target is dead, but
> +		 * STARGET_DEL may not yet be set (set in the release
> +		 * routine), so set here as well, just in case
> +		 */
> +		found_target->state = STARGET_DEL;
>  	spin_unlock_irqrestore(shost->host_lock, flags);

As a result, the two comments in this patch aren't right:

> @@ -384,9 +385,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
>  	struct scsi_target *starget
>  		= container_of(kref, struct scsi_target, reap_ref);
>  
> -	transport_remove_device(&starget->dev);
> -	device_del(&starget->dev);
> -	starget->state = STARGET_DEL;
> +	/*
> +	 * if we get here and the target is still in the CREATED state that
> +	 * means it was allocated but never made visible (because a scan
> +	 * turned up no LUNs), so don't call device_del() on it.
> +	 */
> +	if (starget->state == STARGET_RUNNING) {
> +		transport_remove_device(&starget->dev);
> +		device_del(&starget->dev);
> +	}

Here the state could already be STARGET_DEL, even though the target is
still visible.

Also, it's a little odd that the comment talks about CREATED but the 
code really checks for RUNNING.  They should be consistent.

> @@ -506,11 +513,13 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>   */
>  void scsi_target_reap(struct scsi_target *starget)
>  {
> +	/*
> +	 * serious problem if this triggers: STARGET_DEL is only set in the
> +	 * kref release routine, so we're doing another final put on an
> +	 * already released kref
> +	 */
>  	BUG_ON(starget->state == STARGET_DEL);

Here the code is okay but the comment is wrong: STARGET_DEL is set in 
_two_ places (but neither of them runs until reap_ref has reached 0).

Would it be better in scsi_alloc_target() to behave as though the state 
were STARGET_DEL without actually setting it?

Alan Stern


--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] dual scan thread bug fix
  2014-01-03 15:58       ` Alan Stern
@ 2014-01-04  5:41         ` James Bottomley
  2014-01-04 16:27           ` Alan Stern
  0 siblings, 1 reply; 12+ messages in thread
From: James Bottomley @ 2014-01-04  5:41 UTC (permalink / raw)
  To: Alan Stern; +Cc: linux-scsi, USB list


On Fri, 2014-01-03 at 10:58 -0500, Alan Stern wrote:
> On Thu, 2 Jan 2014, James Bottomley wrote:
> 
> > In the highly unusual case where two threads are running concurrently through
> > the scanning code scanning the same target, we run into the situation where
> > one may allocate the target while the other is still using it.  In this case,
> > because the reap checks for STARGET_CREATED and kills the target without
> > reference counting, the second thread will do the wrong thing on reap.
> > 
> > Fix this by reference counting even creates and doing the STARGET_CREATED
> > check in the final put.
> 
> I'm still concerned about one thing.  The previous patch does this in
> scsi_alloc_target():
> 
> >   found:
> > -	found_target->reap_ref++;
> > +	if (!kref_get_unless_zero(&found_target->reap_ref))
> > +		/*
> > +		 * release routine already fired.  Target is dead, but
> > +		 * STARGET_DEL may not yet be set (set in the release
> > +		 * routine), so set here as well, just in case
> > +		 */
> > +		found_target->state = STARGET_DEL;
> >  	spin_unlock_irqrestore(shost->host_lock, flags);
> 
> As a result, the two comments in this patch aren't right:
> 
> > @@ -384,9 +385,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
> >  	struct scsi_target *starget
> >  		= container_of(kref, struct scsi_target, reap_ref);
> >  
> > -	transport_remove_device(&starget->dev);
> > -	device_del(&starget->dev);
> > -	starget->state = STARGET_DEL;
> > +	/*
> > +	 * if we get here and the target is still in the CREATED state that
> > +	 * means it was allocated but never made visible (because a scan
> > +	 * turned up no LUNs), so don't call device_del() on it.
> > +	 */
> > +	if (starget->state == STARGET_RUNNING) {
> > +		transport_remove_device(&starget->dev);
> > +		device_del(&starget->dev);
> > +	}
> 
> Here the state could already be STARGET_DEL, even though the target is
> still visible.

Well, I agree with the theory.  In practise, there are only a few
machine instructions between the kref going to zero and us reaching that
point, because kref_release will jump into this routine next, so the
condition would be very hard to see.  However, I suppose it's easy to
close by checking for != STARGET_CREATED and there's no reason not to do
that, so I'll change it.

> Also, it's a little odd that the comment talks about CREATED but the 
> code really checks for RUNNING.  They should be consistent.

!= STARGET_CREATED should solve this.

> > @@ -506,11 +513,13 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
> >   */
> >  void scsi_target_reap(struct scsi_target *starget)
> >  {
> > +	/*
> > +	 * serious problem if this triggers: STARGET_DEL is only set in the
> > +	 * kref release routine, so we're doing another final put on an
> > +	 * already released kref
> > +	 */
> >  	BUG_ON(starget->state == STARGET_DEL);
> 
> Here the code is okay but the comment is wrong: STARGET_DEL is set in 
> _two_ places (but neither of them runs until reap_ref has reached 0).
> 
> Would it be better in scsi_alloc_target() to behave as though the state 
> were STARGET_DEL without actually setting it?

Yes, I'll update the comment to it only goes to DEL after the kref goes
to zero.

How does the attached diff look?

James

---

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 82cf902..2f7de33 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -390,7 +390,7 @@ static void scsi_target_reap_ref_release(struct kref *kref)
 	 * means it was allocated but never made visible (because a scan
 	 * turned up no LUNs), so don't call device_del() on it.
 	 */
-	if (starget->state == STARGET_RUNNING) {
+	if (starget->state != STARGET_CREATED) {
 		transport_remove_device(&starget->dev);
 		device_del(&starget->dev);
 	}
@@ -514,9 +514,9 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 void scsi_target_reap(struct scsi_target *starget)
 {
 	/*
-	 * serious problem if this triggers: STARGET_DEL is only set in the
-	 * kref release routine, so we're doing another final put on an
-	 * already released kref
+	 * serious problem if this triggers: STARGET_DEL is only set in the if
+	 * the reap_ref drops to zero, so we're trying to do another final put
+	 * on an already released kref
 	 */
 	BUG_ON(starget->state == STARGET_DEL);
 	scsi_target_reap_ref_put(starget);





^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] dual scan thread bug fix
  2014-01-04  5:41         ` James Bottomley
@ 2014-01-04 16:27           ` Alan Stern
       [not found]             ` <Pine.LNX.4.44L0.1401041119030.31700-100000-pYrvlCTfrz9XsRXLowluHWD2FQJk+8+b@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Alan Stern @ 2014-01-04 16:27 UTC (permalink / raw)
  To: James Bottomley; +Cc: linux-scsi, USB list

On Fri, 3 Jan 2014, James Bottomley wrote:

> > I'm still concerned about one thing.  The previous patch does this in
> > scsi_alloc_target():
> > 
> > >   found:
> > > -	found_target->reap_ref++;
> > > +	if (!kref_get_unless_zero(&found_target->reap_ref))
> > > +		/*
> > > +		 * release routine already fired.  Target is dead, but
> > > +		 * STARGET_DEL may not yet be set (set in the release
> > > +		 * routine), so set here as well, just in case
> > > +		 */
> > > +		found_target->state = STARGET_DEL;
> > >  	spin_unlock_irqrestore(shost->host_lock, flags);
> > 
> > As a result, the two comments in this patch aren't right:
> > 
> > > @@ -384,9 +385,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
> > >  	struct scsi_target *starget
> > >  		= container_of(kref, struct scsi_target, reap_ref);
> > >  
> > > -	transport_remove_device(&starget->dev);
> > > -	device_del(&starget->dev);
> > > -	starget->state = STARGET_DEL;
> > > +	/*
> > > +	 * if we get here and the target is still in the CREATED state that
> > > +	 * means it was allocated but never made visible (because a scan
> > > +	 * turned up no LUNs), so don't call device_del() on it.
> > > +	 */
> > > +	if (starget->state == STARGET_RUNNING) {
> > > +		transport_remove_device(&starget->dev);
> > > +		device_del(&starget->dev);
> > > +	}
> > 
> > Here the state could already be STARGET_DEL, even though the target is
> > still visible.
> 
> Well, I agree with the theory.  In practise, there are only a few
> machine instructions between the kref going to zero and us reaching that
> point, because kref_release will jump into this routine next, so the
> condition would be very hard to see.

It's true that the window is very small and not at all likely to be
hit.  Still, I prefer eliminating such things entirely.

>  However, I suppose it's easy to
> close by checking for != STARGET_CREATED and there's no reason not to do
> that, so I'll change it.

Checking for != STARGET_CREATED is also wrong in principle.  The state
could already be STARGET_DEL even though the target was never made
visible.

The basic problem is that you are relying on the state to be an
accurate description of the target's visibility, but
scsi_alloc_target() changes the state without changing the visibility.  
I really think you should get rid of that extra assignment in
scsi_alloc_target().

> How does the attached diff look?

Better but still not right.

Alan Stern


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] dual scan thread bug fix
       [not found]             ` <Pine.LNX.4.44L0.1401041119030.31700-100000-pYrvlCTfrz9XsRXLowluHWD2FQJk+8+b@public.gmane.org>
@ 2014-01-07 23:42               ` James Bottomley
  2014-01-08 15:57                 ` Alan Stern
  0 siblings, 1 reply; 12+ messages in thread
From: James Bottomley @ 2014-01-07 23:42 UTC (permalink / raw)
  To: Alan Stern; +Cc: linux-scsi, USB list

On Sat, 2014-01-04 at 11:27 -0500, Alan Stern wrote:
> On Fri, 3 Jan 2014, James Bottomley wrote:
> 
> > > I'm still concerned about one thing.  The previous patch does this in
> > > scsi_alloc_target():
> > > 
> > > >   found:
> > > > -	found_target->reap_ref++;
> > > > +	if (!kref_get_unless_zero(&found_target->reap_ref))
> > > > +		/*
> > > > +		 * release routine already fired.  Target is dead, but
> > > > +		 * STARGET_DEL may not yet be set (set in the release
> > > > +		 * routine), so set here as well, just in case
> > > > +		 */
> > > > +		found_target->state = STARGET_DEL;
> > > >  	spin_unlock_irqrestore(shost->host_lock, flags);
> > > 
> > > As a result, the two comments in this patch aren't right:
> > > 
> > > > @@ -384,9 +385,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
> > > >  	struct scsi_target *starget
> > > >  		= container_of(kref, struct scsi_target, reap_ref);
> > > >  
> > > > -	transport_remove_device(&starget->dev);
> > > > -	device_del(&starget->dev);
> > > > -	starget->state = STARGET_DEL;
> > > > +	/*
> > > > +	 * if we get here and the target is still in the CREATED state that
> > > > +	 * means it was allocated but never made visible (because a scan
> > > > +	 * turned up no LUNs), so don't call device_del() on it.
> > > > +	 */
> > > > +	if (starget->state == STARGET_RUNNING) {
> > > > +		transport_remove_device(&starget->dev);
> > > > +		device_del(&starget->dev);
> > > > +	}
> > > 
> > > Here the state could already be STARGET_DEL, even though the target is
> > > still visible.
> > 
> > Well, I agree with the theory.  In practise, there are only a few
> > machine instructions between the kref going to zero and us reaching that
> > point, because kref_release will jump into this routine next, so the
> > condition would be very hard to see.
> 
> It's true that the window is very small and not at all likely to be
> hit.  Still, I prefer eliminating such things entirely.
> 
> >  However, I suppose it's easy to
> > close by checking for != STARGET_CREATED and there's no reason not to do
> > that, so I'll change it.
> 
> Checking for != STARGET_CREATED is also wrong in principle.  The state
> could already be STARGET_DEL even though the target was never made
> visible.
> 
> The basic problem is that you are relying on the state to be an
> accurate description of the target's visibility, but
> scsi_alloc_target() changes the state without changing the visibility.  
> I really think you should get rid of that extra assignment in
> scsi_alloc_target().

OK, Agreed, but that means modifying the 1/2 patch with the below.  This
should make the proposed diff to 2/2 correct.

James

---
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index ef3f958..5fad646 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -417,7 +417,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 		+ shost->transportt->target_size;
 	struct scsi_target *starget;
 	struct scsi_target *found_target;
-	int error;
+	int error, ref_got;
 
 	starget = kzalloc(size, GFP_KERNEL);
 	if (!starget) {
@@ -466,15 +466,15 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	return starget;
 
  found:
-	if (!kref_get_unless_zero(&found_target->reap_ref))
-		/*
-		 * release routine already fired.  Target is dead, but
-		 * STARGET_DEL may not yet be set (set in the release
-		 * routine), so set here as well, just in case
-		 */
-		found_target->state = STARGET_DEL;
+	/*
+	 * release routine already fired if kref is zero, so if we can still
+	 * take the reference, the target must be alive.  If we can't, it must
+	 * be dying and we need to wait for a new target
+	 */
+	ref_got = kref_get_unless_zero(&found_target->reap_ref);
+
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	if (found_target->state != STARGET_DEL) {
+	if (ref_got) {
 		put_device(dev);
 		return found_target;
 	}
@@ -482,8 +482,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	 * Unfortunately, we found a dying target; need to wait until it's
 	 * dead before we can get a new one.  There is an anomaly here.  We
 	 * *should* call scsi_target_reap() to balance the kref_get() of the
-	 * reap_ref above.  However, since the target is in state STARGET_DEL,
-	 * it's already invisible and the reap_ref is irrelevant.  If we call
+	 * reap_ref above.  However, since the target being released, it's
+	 * already invisible and the reap_ref is irrelevant.  If we call
 	 * scsi_target_reap() we might spuriously do another device_del() on
 	 * an already invisible target.
 	 */




--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] dual scan thread bug fix
  2014-01-07 23:42               ` James Bottomley
@ 2014-01-08 15:57                 ` Alan Stern
  2014-01-08 22:37                   ` James Bottomley
  0 siblings, 1 reply; 12+ messages in thread
From: Alan Stern @ 2014-01-08 15:57 UTC (permalink / raw)
  To: James Bottomley; +Cc: linux-scsi, USB list

On Wed, 8 Jan 2014, James Bottomley wrote:

> OK, Agreed, but that means modifying the 1/2 patch with the below.  This
> should make the proposed diff to 2/2 correct.
> 
> James
> 
> ---
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index ef3f958..5fad646 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -417,7 +417,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>  		+ shost->transportt->target_size;
>  	struct scsi_target *starget;
>  	struct scsi_target *found_target;
> -	int error;
> +	int error, ref_got;
>  
>  	starget = kzalloc(size, GFP_KERNEL);
>  	if (!starget) {
> @@ -466,15 +466,15 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>  	return starget;
>  
>   found:
> -	if (!kref_get_unless_zero(&found_target->reap_ref))
> -		/*
> -		 * release routine already fired.  Target is dead, but
> -		 * STARGET_DEL may not yet be set (set in the release
> -		 * routine), so set here as well, just in case
> -		 */
> -		found_target->state = STARGET_DEL;
> +	/*
> +	 * release routine already fired if kref is zero, so if we can still
> +	 * take the reference, the target must be alive.  If we can't, it must
> +	 * be dying and we need to wait for a new target
> +	 */
> +	ref_got = kref_get_unless_zero(&found_target->reap_ref);
> +
>  	spin_unlock_irqrestore(shost->host_lock, flags);
> -	if (found_target->state != STARGET_DEL) {
> +	if (ref_got) {
>  		put_device(dev);
>  		return found_target;
>  	}
> @@ -482,8 +482,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>  	 * Unfortunately, we found a dying target; need to wait until it's
>  	 * dead before we can get a new one.  There is an anomaly here.  We
>  	 * *should* call scsi_target_reap() to balance the kref_get() of the
> -	 * reap_ref above.  However, since the target is in state STARGET_DEL,
> -	 * it's already invisible and the reap_ref is irrelevant.  If we call
> +	 * reap_ref above.  However, since the target being released, it's
> +	 * already invisible and the reap_ref is irrelevant.  If we call
>  	 * scsi_target_reap() we might spuriously do another device_del() on
>  	 * an already invisible target.
>  	 */

In fact, most of this comment (everything after the first sentence) is
no longer needed.  If the target is dying then kref_get_unless_zero
must have failed, so there is no need to worry about unbalanced
refcounts.

Otherwise this looks good.

Alan Stern


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] dual scan thread bug fix
  2014-01-08 15:57                 ` Alan Stern
@ 2014-01-08 22:37                   ` James Bottomley
  0 siblings, 0 replies; 12+ messages in thread
From: James Bottomley @ 2014-01-08 22:37 UTC (permalink / raw)
  To: Alan Stern; +Cc: linux-scsi, USB list

On Wed, 2014-01-08 at 10:57 -0500, Alan Stern wrote:
> On Wed, 8 Jan 2014, James Bottomley wrote:
> 
> > OK, Agreed, but that means modifying the 1/2 patch with the below.  This
> > should make the proposed diff to 2/2 correct.
> > 
> > James
> > 
> > ---
> > diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> > index ef3f958..5fad646 100644
> > --- a/drivers/scsi/scsi_scan.c
> > +++ b/drivers/scsi/scsi_scan.c
> > @@ -417,7 +417,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
> >  		+ shost->transportt->target_size;
> >  	struct scsi_target *starget;
> >  	struct scsi_target *found_target;
> > -	int error;
> > +	int error, ref_got;
> >  
> >  	starget = kzalloc(size, GFP_KERNEL);
> >  	if (!starget) {
> > @@ -466,15 +466,15 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
> >  	return starget;
> >  
> >   found:
> > -	if (!kref_get_unless_zero(&found_target->reap_ref))
> > -		/*
> > -		 * release routine already fired.  Target is dead, but
> > -		 * STARGET_DEL may not yet be set (set in the release
> > -		 * routine), so set here as well, just in case
> > -		 */
> > -		found_target->state = STARGET_DEL;
> > +	/*
> > +	 * release routine already fired if kref is zero, so if we can still
> > +	 * take the reference, the target must be alive.  If we can't, it must
> > +	 * be dying and we need to wait for a new target
> > +	 */
> > +	ref_got = kref_get_unless_zero(&found_target->reap_ref);
> > +
> >  	spin_unlock_irqrestore(shost->host_lock, flags);
> > -	if (found_target->state != STARGET_DEL) {
> > +	if (ref_got) {
> >  		put_device(dev);
> >  		return found_target;
> >  	}
> > @@ -482,8 +482,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
> >  	 * Unfortunately, we found a dying target; need to wait until it's
> >  	 * dead before we can get a new one.  There is an anomaly here.  We
> >  	 * *should* call scsi_target_reap() to balance the kref_get() of the
> > -	 * reap_ref above.  However, since the target is in state STARGET_DEL,
> > -	 * it's already invisible and the reap_ref is irrelevant.  If we call
> > +	 * reap_ref above.  However, since the target being released, it's
> > +	 * already invisible and the reap_ref is irrelevant.  If we call
> >  	 * scsi_target_reap() we might spuriously do another device_del() on
> >  	 * an already invisible target.
> >  	 */
> 
> In fact, most of this comment (everything after the first sentence) is
> no longer needed.  If the target is dying then kref_get_unless_zero
> must have failed, so there is no need to worry about unbalanced
> refcounts.

I'd like to keep the comment: I get a lot of email from people who write
static checkers for this.  In principle, I agree, they should treat
kref_get_unless_zero() as a spin_trylock(), but it's nice to have
something concrete in the code to point to when the email arrives.

> Otherwise this looks good.

Great, thanks, I'll re-roll and repost.

James




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] fix our current target reap infrastructure.
  2013-12-16 15:57   ` Alan Stern
@ 2013-12-17 13:38     ` James Bottomley
  0 siblings, 0 replies; 12+ messages in thread
From: James Bottomley @ 2013-12-17 13:38 UTC (permalink / raw)
  To: Alan Stern; +Cc: linux-scsi, USB list


On Mon, 2013-12-16 at 10:57 -0500, Alan Stern wrote:
> On Mon, 16 Dec 2013, James Bottomley wrote:
> 
> > This patch eliminates the reap_ref and replaces it with a proper kref.
> > On last put of this kref, the target is removed from visibility in
> > sysfs.  The final call to scsi_target_reap() for the device is done from
> > __scsi_remove_device() and only if the device was made visible.  This
> > ensures that the target disappears as soon as the last device is gone
> > rather than waiting until final release of the device (which is often
> > too long).
> 
> Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
> 
> Two small suggested changes:
> 
> > @@ -441,29 +466,32 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
> >  	return starget;
> >  
> >   found:
> > -	found_target->reap_ref++;
> > +	if (!kref_get_unless_zero(&found_target->reap_ref))
> > +		/*
> > +		 * release routine already fired.  Target is dead, but
> > +		 * STARGET_DEL may not yet be set (set in the release
> > +		 * routine), so set here as well, just in case
> > +		 */
> > +		found_target->state = STARGET_DEL;
> >  	spin_unlock_irqrestore(shost->host_lock, flags);
> >  	if (found_target->state != STARGET_DEL) {
> >  		put_device(dev);
> >  		return found_target;
> >  	}
> > -	/* Unfortunately, we found a dying target; need to
> > -	 * wait until it's dead before we can get a new one */
> > +	/*
> > +	 * Unfortunately, we found a dying target; need to wait until it's
> > +	 * dead before we can get a new one.  There is an anomaly here.  We
> > +	 * *should* call scsi_target_reap() to balance the kref_get() of the
> > +	 * reap_ref above.  However, since the target is in state STARGET_DEL,
> > +	 * it's already invisible and the reap_ref is irrelevant.  If we call
> > +	 * scsi_target_reap() we might spuriously do another device_del() on
> > +	 * an already invisible target.
> > +	 */
> >  	put_device(&found_target->dev);
> >  	flush_scheduled_work();
> >  	goto retry;
> 
> Since scsi_target_reap_usercontext() is now gone, scsi_target_destroy()
> doesn't rely on a work queue any more.  Therefore something like
> msleep(1) would be more appropriate than flush_scheduled_work().

I suppose so.  In theory with the removal of the work queue, going from
final release to list del should be deterministic, so we should run into
this less.  I quite like the flush_scheduled_work, because it pushes out
all the pending work for devices on other targets (so accelerates host
remove).  However, I suppose it does now look wrong in this context.

> > --- a/include/scsi/scsi_device.h
> > +++ b/include/scsi/scsi_device.h
> > @@ -257,7 +257,7 @@ struct scsi_target {
> >  	struct list_head	siblings;
> >  	struct list_head	devices;
> >  	struct device		dev;
> > -	unsigned int		reap_ref; /* protected by the host lock */
> > +	struct kref		reap_ref; /* last put renders device invisible */
> 
> s/device/target/

Will update, thanks.

James



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] fix our current target reap infrastructure.
  2013-12-16 15:12 ` [RFC 1/2] fix our current target reap infrastructure James Bottomley
@ 2013-12-16 15:57   ` Alan Stern
  2013-12-17 13:38     ` James Bottomley
  0 siblings, 1 reply; 12+ messages in thread
From: Alan Stern @ 2013-12-16 15:57 UTC (permalink / raw)
  To: James Bottomley; +Cc: linux-scsi, USB list

On Mon, 16 Dec 2013, James Bottomley wrote:

> This patch eliminates the reap_ref and replaces it with a proper kref.
> On last put of this kref, the target is removed from visibility in
> sysfs.  The final call to scsi_target_reap() for the device is done from
> __scsi_remove_device() and only if the device was made visible.  This
> ensures that the target disappears as soon as the last device is gone
> rather than waiting until final release of the device (which is often
> too long).

Reviewed-by: Alan Stern <stern@rowland.harvard.edu>

Two small suggested changes:

> @@ -441,29 +466,32 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>  	return starget;
>  
>   found:
> -	found_target->reap_ref++;
> +	if (!kref_get_unless_zero(&found_target->reap_ref))
> +		/*
> +		 * release routine already fired.  Target is dead, but
> +		 * STARGET_DEL may not yet be set (set in the release
> +		 * routine), so set here as well, just in case
> +		 */
> +		found_target->state = STARGET_DEL;
>  	spin_unlock_irqrestore(shost->host_lock, flags);
>  	if (found_target->state != STARGET_DEL) {
>  		put_device(dev);
>  		return found_target;
>  	}
> -	/* Unfortunately, we found a dying target; need to
> -	 * wait until it's dead before we can get a new one */
> +	/*
> +	 * Unfortunately, we found a dying target; need to wait until it's
> +	 * dead before we can get a new one.  There is an anomaly here.  We
> +	 * *should* call scsi_target_reap() to balance the kref_get() of the
> +	 * reap_ref above.  However, since the target is in state STARGET_DEL,
> +	 * it's already invisible and the reap_ref is irrelevant.  If we call
> +	 * scsi_target_reap() we might spuriously do another device_del() on
> +	 * an already invisible target.
> +	 */
>  	put_device(&found_target->dev);
>  	flush_scheduled_work();
>  	goto retry;

Since scsi_target_reap_usercontext() is now gone, scsi_target_destroy()
doesn't rely on a work queue any more.  Therefore something like
msleep(1) would be more appropriate than flush_scheduled_work().

> --- a/include/scsi/scsi_device.h
> +++ b/include/scsi/scsi_device.h
> @@ -257,7 +257,7 @@ struct scsi_target {
>  	struct list_head	siblings;
>  	struct list_head	devices;
>  	struct device		dev;
> -	unsigned int		reap_ref; /* protected by the host lock */
> +	struct kref		reap_ref; /* last put renders device invisible */

s/device/target/

Alan Stern


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC 1/2] fix our current target reap infrastructure.
  2013-12-16 15:10 [RFC 0/2] target refcounting infrastructure fixes for usb James Bottomley
@ 2013-12-16 15:12 ` James Bottomley
  2013-12-16 15:57   ` Alan Stern
  0 siblings, 1 reply; 12+ messages in thread
From: James Bottomley @ 2013-12-16 15:12 UTC (permalink / raw)
  To: linux-scsi; +Cc: Alan Stern, USB list

This patch eliminates the reap_ref and replaces it with a proper kref.
On last put of this kref, the target is removed from visibility in
sysfs.  The final call to scsi_target_reap() for the device is done from
__scsi_remove_device() and only if the device was made visible.  This
ensures that the target disappears as soon as the last device is gone
rather than waiting until final release of the device (which is often
too long).

---
 drivers/scsi/scsi_scan.c   | 89 +++++++++++++++++++++++++++-------------------
 drivers/scsi/scsi_sysfs.c  | 15 ++++----
 include/scsi/scsi_device.h |  3 +-
 3 files changed, 63 insertions(+), 44 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 307a811..39e5c85 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -371,6 +371,31 @@ static struct scsi_target *__scsi_find_target(struct device *parent,
 }
 
 /**
+ * scsi_target_reap_ref_release - remove target from visibility
+ * @kref: the reap_ref in the target being released
+ *
+ * Called on last put of reap_ref, which is the indication that no device
+ * under this target is visible anymore, so render the target invisible in
+ * sysfs.  Note: we have to be in user context here because the target reaps
+ * should be done in places where the scsi device visibility is being removed.
+ */
+static void scsi_target_reap_ref_release(struct kref *kref)
+{
+	struct scsi_target *starget
+		= container_of(kref, struct scsi_target, reap_ref);
+
+	transport_remove_device(&starget->dev);
+	device_del(&starget->dev);
+	starget->state = STARGET_DEL;
+	scsi_target_destroy(starget);
+}
+
+static void scsi_target_reap_ref_put(struct scsi_target *starget)
+{
+	kref_put(&starget->reap_ref, scsi_target_reap_ref_release);
+}
+
+/**
  * scsi_alloc_target - allocate a new or find an existing target
  * @parent:	parent of the target (need not be a scsi host)
  * @channel:	target channel number (zero if no channels)
@@ -401,7 +426,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	}
 	dev = &starget->dev;
 	device_initialize(dev);
-	starget->reap_ref = 1;
+	kref_init(&starget->reap_ref);
 	dev->parent = get_device(parent);
 	dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id);
 	dev->bus = &scsi_bus_type;
@@ -441,29 +466,32 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	return starget;
 
  found:
-	found_target->reap_ref++;
+	if (!kref_get_unless_zero(&found_target->reap_ref))
+		/*
+		 * release routine already fired.  Target is dead, but
+		 * STARGET_DEL may not yet be set (set in the release
+		 * routine), so set here as well, just in case
+		 */
+		found_target->state = STARGET_DEL;
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	if (found_target->state != STARGET_DEL) {
 		put_device(dev);
 		return found_target;
 	}
-	/* Unfortunately, we found a dying target; need to
-	 * wait until it's dead before we can get a new one */
+	/*
+	 * Unfortunately, we found a dying target; need to wait until it's
+	 * dead before we can get a new one.  There is an anomaly here.  We
+	 * *should* call scsi_target_reap() to balance the kref_get() of the
+	 * reap_ref above.  However, since the target is in state STARGET_DEL,
+	 * it's already invisible and the reap_ref is irrelevant.  If we call
+	 * scsi_target_reap() we might spuriously do another device_del() on
+	 * an already invisible target.
+	 */
 	put_device(&found_target->dev);
 	flush_scheduled_work();
 	goto retry;
 }
 
-static void scsi_target_reap_usercontext(struct work_struct *work)
-{
-	struct scsi_target *starget =
-		container_of(work, struct scsi_target, ew.work);
-
-	transport_remove_device(&starget->dev);
-	device_del(&starget->dev);
-	scsi_target_destroy(starget);
-}
-
 /**
  * scsi_target_reap - check to see if target is in use and destroy if not
  * @starget: target to be checked
@@ -474,28 +502,11 @@ static void scsi_target_reap_usercontext(struct work_struct *work)
  */
 void scsi_target_reap(struct scsi_target *starget)
 {
-	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
-	unsigned long flags;
-	enum scsi_target_state state;
-	int empty = 0;
-
-	spin_lock_irqsave(shost->host_lock, flags);
-	state = starget->state;
-	if (--starget->reap_ref == 0 && list_empty(&starget->devices)) {
-		empty = 1;
-		starget->state = STARGET_DEL;
-	}
-	spin_unlock_irqrestore(shost->host_lock, flags);
-
-	if (!empty)
-		return;
-
-	BUG_ON(state == STARGET_DEL);
-	if (state == STARGET_CREATED)
+	BUG_ON(starget->state == STARGET_DEL);
+	if (starget->state == STARGET_CREATED)
 		scsi_target_destroy(starget);
 	else
-		execute_in_process_context(scsi_target_reap_usercontext,
-					   &starget->ew);
+		scsi_target_reap_ref_put(starget);
 }
 
 /**
@@ -1532,6 +1543,10 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 	}
 	mutex_unlock(&shost->scan_mutex);
 	scsi_autopm_put_target(starget);
+	/*
+	 * paired with scsi_alloc_target().  Target will be destroyed unless
+	 * scsi_probe_and_add_lun made an underlying device visible
+	 */
 	scsi_target_reap(starget);
 	put_device(&starget->dev);
 
@@ -1612,8 +1627,10 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 
  out_reap:
 	scsi_autopm_put_target(starget);
-	/* now determine if the target has any children at all
-	 * and if not, nuke it */
+	/*
+	 * paired with scsi_alloc_target(): determine if the target has
+	 * any children at all and if not, nuke it
+	 */
 	scsi_target_reap(starget);
 
 	put_device(&starget->dev);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 9117d0b..34eab7b 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -383,17 +383,14 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 {
 	struct scsi_device *sdev;
 	struct device *parent;
-	struct scsi_target *starget;
 	struct list_head *this, *tmp;
 	unsigned long flags;
 
 	sdev = container_of(work, struct scsi_device, ew.work);
 
 	parent = sdev->sdev_gendev.parent;
-	starget = to_scsi_target(parent);
 
 	spin_lock_irqsave(sdev->host->host_lock, flags);
-	starget->reap_ref++;
 	list_del(&sdev->siblings);
 	list_del(&sdev->same_target_siblings);
 	list_del(&sdev->starved_entry);
@@ -413,8 +410,6 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 	/* NULL queue means the device can't be used */
 	sdev->request_queue = NULL;
 
-	scsi_target_reap(scsi_target(sdev));
-
 	kfree(sdev->inquiry);
 	kfree(sdev);
 
@@ -1001,6 +996,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 		return error;
 	}
 	transport_add_device(&sdev->sdev_gendev);
+	kref_get(&starget->reap_ref); /* device now visible, so target is held */
 	sdev->is_visible = 1;
 
 	/* create queue files, which may be writable, depending on the host */
@@ -1055,6 +1051,13 @@ void __scsi_remove_device(struct scsi_device *sdev)
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		device_del(dev);
+		/*
+		 * Paired with the kref_get() in scsi_sysfs_add_sdev().  We're
+		 * removing sysfs visibility from the device, so make the
+		 * target invisible if this was the last device underneath it.
+		 */
+		scsi_target_reap(scsi_target(sdev));
+
 	} else
 		put_device(&sdev->sdev_dev);
 
@@ -1133,7 +1136,7 @@ void scsi_remove_target(struct device *dev)
 			continue;
 		if (starget->dev.parent == dev || &starget->dev == dev) {
 			/* assuming new targets arrive at the end */
-			starget->reap_ref++;
+			kref_get(&starget->reap_ref);
 			spin_unlock_irqrestore(shost->host_lock, flags);
 			if (last)
 				scsi_target_reap(last);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d65fbec..24b9e06 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -257,7 +257,7 @@ struct scsi_target {
 	struct list_head	siblings;
 	struct list_head	devices;
 	struct device		dev;
-	unsigned int		reap_ref; /* protected by the host lock */
+	struct kref		reap_ref; /* last put renders device invisible */
 	unsigned int		channel;
 	unsigned int		id; /* target id ... replace
 				     * scsi_device.id eventually */
@@ -284,7 +284,6 @@ struct scsi_target {
 #define SCSI_DEFAULT_TARGET_BLOCKED	3
 
 	char			scsi_level;
-	struct execute_work	ew;
 	enum scsi_target_state	state;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */
-- 
1.8.4.3




^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-01-08 22:37 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-03  0:36 [RFC 0/2] target refcounting infrastructure fixes for usb James Bottomley
     [not found] ` <1388709362.2399.31.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
2014-01-03  0:38   ` [RFC 1/2] fix our current target reap infrastructure James Bottomley
2014-01-03  0:38   ` [RFC 2/2] dual scan thread bug fix James Bottomley
     [not found]     ` <1388709533.2399.34.camel-sFMDBYUN5F8GjUHQrlYNx2Wm91YjaHnnhRte9Li2A+AAvxtiuMwx3w@public.gmane.org>
2014-01-03 15:58       ` Alan Stern
2014-01-04  5:41         ` James Bottomley
2014-01-04 16:27           ` Alan Stern
     [not found]             ` <Pine.LNX.4.44L0.1401041119030.31700-100000-pYrvlCTfrz9XsRXLowluHWD2FQJk+8+b@public.gmane.org>
2014-01-07 23:42               ` James Bottomley
2014-01-08 15:57                 ` Alan Stern
2014-01-08 22:37                   ` James Bottomley
  -- strict thread matches above, loose matches on Subject: below --
2013-12-16 15:10 [RFC 0/2] target refcounting infrastructure fixes for usb James Bottomley
2013-12-16 15:12 ` [RFC 1/2] fix our current target reap infrastructure James Bottomley
2013-12-16 15:57   ` Alan Stern
2013-12-17 13:38     ` James Bottomley

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.