All of lore.kernel.org
 help / color / mirror / Atom feed
* [GIT PATCH] SCSI fixes for 2.6.32-rc8
@ 2009-11-30 22:29 James Bottomley
  2009-11-30 23:03 ` FUJITA Tomonori
  2009-12-01 16:54 ` Kleber Sacilotto de Souza
  0 siblings, 2 replies; 13+ messages in thread
From: James Bottomley @ 2009-11-30 22:29 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds; +Cc: linux-scsi, linux-kernel

This is a set of miscellaneous (but small) fixes for SCSI.

The patch is available here:

master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git

The short changelog is:

Alexey Kuznetsov (1):
      fix crash when disconnecting usb storage

James Bottomley (1):
      fix async scan add/remove race resulting in an oops

Martin K. Petersen (1):
      sd: Return correct error code for DIF

The diffstat is:

 drivers/scsi/hosts.c       |    2 -
 drivers/scsi/scsi_scan.c   |   18 ++----------
 drivers/scsi/scsi_sysfs.c  |   63 +++++++++++++++++++--------------------------
 drivers/scsi/sd_dif.c      |    2 -
 include/scsi/scsi_device.h |    1 
 5 files changed, 34 insertions(+), 52 deletions(-)

And the full patch is attached below.

James

---

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 5fd2da4..c968cc3 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -164,8 +164,8 @@ void scsi_remove_host(struct Scsi_Host *shost)
 			return;
 		}
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	mutex_unlock(&shost->scan_mutex);
 	scsi_forget_host(shost);
+	mutex_unlock(&shost->scan_mutex);
 	scsi_proc_host_rm(shost);
 
 	spin_lock_irqsave(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 0547a7f..47291bc 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -952,16 +952,6 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	return SCSI_SCAN_LUN_PRESENT;
 }
 
-static inline void scsi_destroy_sdev(struct scsi_device *sdev)
-{
-	scsi_device_set_state(sdev, SDEV_DEL);
-	if (sdev->host->hostt->slave_destroy)
-		sdev->host->hostt->slave_destroy(sdev);
-	transport_destroy_device(&sdev->sdev_gendev);
-	put_device(&sdev->sdev_dev);
-	put_device(&sdev->sdev_gendev);
-}
-
 #ifdef CONFIG_SCSI_LOGGING
 /** 
  * scsi_inq_str - print INQUIRY data from min to max index, strip trailing whitespace
@@ -1139,7 +1129,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
 			}
 		}
 	} else
-		scsi_destroy_sdev(sdev);
+		__scsi_remove_device(sdev);
  out:
 	return res;
 }
@@ -1500,7 +1490,7 @@ static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
 		/*
 		 * the sdev we used didn't appear in the report luns scan
 		 */
-		scsi_destroy_sdev(sdev);
+		__scsi_remove_device(sdev);
 	return ret;
 }
 
@@ -1710,7 +1700,7 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
 	shost_for_each_device(sdev, shost) {
 		if (!scsi_host_scan_allowed(shost) ||
 		    scsi_sysfs_add_sdev(sdev) != 0)
-			scsi_destroy_sdev(sdev);
+			__scsi_remove_device(sdev);
 	}
 }
 
@@ -1943,7 +1933,7 @@ void scsi_free_host_dev(struct scsi_device *sdev)
 {
 	BUG_ON(sdev->id != sdev->host->this_id);
 
-	scsi_destroy_sdev(sdev);
+	__scsi_remove_device(sdev);
 }
 EXPORT_SYMBOL(scsi_free_host_dev);
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 5c7eb63..392d8db 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -854,82 +854,73 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 	transport_configure_device(&starget->dev);
 	error = device_add(&sdev->sdev_gendev);
 	if (error) {
-		put_device(sdev->sdev_gendev.parent);
 		printk(KERN_INFO "error 1\n");
-		return error;
+		goto out_remove;
 	}
 	error = device_add(&sdev->sdev_dev);
 	if (error) {
 		printk(KERN_INFO "error 2\n");
-		goto clean_device;
+		device_del(&sdev->sdev_gendev);
+		goto out_remove;
 	}
+	transport_add_device(&sdev->sdev_gendev);
+	sdev->is_visible = 1;
 
 	/* create queue files, which may be writable, depending on the host */
 	if (sdev->host->hostt->change_queue_depth)
 		error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_depth_rw);
 	else
 		error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_depth);
-	if (error) {
-		__scsi_remove_device(sdev);
-		goto out;
-	}
+	if (error)
+		goto out_remove;
+
 	if (sdev->host->hostt->change_queue_type)
 		error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_type_rw);
 	else
 		error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_type);
-	if (error) {
-		__scsi_remove_device(sdev);
-		goto out;
-	}
+	if (error)
+		goto out_remove;
 
 	error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL);
 
 	if (error)
+		/* we're treating error on bsg register as non-fatal,
+		 * so pretend nothing went wrong */
 		sdev_printk(KERN_INFO, sdev,
 			    "Failed to register bsg queue, errno=%d\n", error);
 
-	/* we're treating error on bsg register as non-fatal, so pretend
-	 * nothing went wrong */
-	error = 0;
-
 	/* add additional host specific attributes */
 	if (sdev->host->hostt->sdev_attrs) {
 		for (i = 0; sdev->host->hostt->sdev_attrs[i]; i++) {
 			error = device_create_file(&sdev->sdev_gendev,
 					sdev->host->hostt->sdev_attrs[i]);
-			if (error) {
-				__scsi_remove_device(sdev);
-				goto out;
-			}
+			if (error)
+				goto out_remove;
 		}
 	}
 
-	transport_add_device(&sdev->sdev_gendev);
- out:
-	return error;
-
- clean_device:
-	scsi_device_set_state(sdev, SDEV_CANCEL);
-
-	device_del(&sdev->sdev_gendev);
-	transport_destroy_device(&sdev->sdev_gendev);
-	put_device(&sdev->sdev_dev);
-	put_device(&sdev->sdev_gendev);
+	return 0;
 
+ out_remove:
+	__scsi_remove_device(sdev);
 	return error;
+
 }
 
 void __scsi_remove_device(struct scsi_device *sdev)
 {
 	struct device *dev = &sdev->sdev_gendev;
 
-	if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
-		return;
+	if (sdev->is_visible) {
+		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
+			return;
 
-	bsg_unregister_queue(sdev->request_queue);
-	device_unregister(&sdev->sdev_dev);
-	transport_remove_device(dev);
-	device_del(dev);
+		bsg_unregister_queue(sdev->request_queue);
+		device_unregister(&sdev->sdev_dev);
+		transport_remove_device(dev);
+		device_del(dev);
+	} else
+		put_device(&sdev->sdev_dev);
 	scsi_device_set_state(sdev, SDEV_DEL);
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 88da977..84be621 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -418,7 +418,7 @@ error:
 		  __func__, virt, phys, be32_to_cpu(sdt->ref_tag),
 		  be16_to_cpu(sdt->app_tag));
 
-	return -EIO;
+	return -EILSEQ;
 }
 
 /*
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 9af48cb..f097ae3 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -145,6 +145,7 @@ struct scsi_device {
 	unsigned retry_hwerror:1;	/* Retry HARDWARE_ERROR */
 	unsigned last_sector_bug:1;	/* do not use multisector accesses on
 					   SD_LAST_BUGGY_SECTORS */
+	unsigned is_visible:1;	/* is the device visible in sysfs */
 
 	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
 	struct list_head event_list;	/* asserted events */



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-11-30 22:29 [GIT PATCH] SCSI fixes for 2.6.32-rc8 James Bottomley
@ 2009-11-30 23:03 ` FUJITA Tomonori
  2009-12-01 14:06   ` James Bottomley
  2009-12-01 16:54 ` Kleber Sacilotto de Souza
  1 sibling, 1 reply; 13+ messages in thread
From: FUJITA Tomonori @ 2009-11-30 23:03 UTC (permalink / raw)
  To: James.Bottomley; +Cc: akpm, torvalds, linux-scsi, linux-kernel

On Mon, 30 Nov 2009 17:29:54 -0500
James Bottomley <James.Bottomley@suse.de> wrote:

> This is a set of miscellaneous (but small) fixes for SCSI.
> 
> The patch is available here:
> 
> master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git
> 
> The short changelog is:
> 
> Alexey Kuznetsov (1):
>       fix crash when disconnecting usb storage
> 
> James Bottomley (1):
>       fix async scan add/remove race resulting in an oops
> 
> Martin K. Petersen (1):
>       sd: Return correct error code for DIF
> 
> The diffstat is:
> 
>  drivers/scsi/hosts.c       |    2 -
>  drivers/scsi/scsi_scan.c   |   18 ++----------
>  drivers/scsi/scsi_sysfs.c  |   63 +++++++++++++++++++--------------------------
>  drivers/scsi/sd_dif.c      |    2 -
>  include/scsi/scsi_device.h |    1 
>  5 files changed, 34 insertions(+), 52 deletions(-)

Can you add the tape driver fix?

http://marc.info/?l=linux-scsi&m=125919508329412&w=2

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-11-30 23:03 ` FUJITA Tomonori
@ 2009-12-01 14:06   ` James Bottomley
  2009-12-01 17:15     ` Martin K. Petersen
  2009-12-01 18:14     ` Kai Makisara
  0 siblings, 2 replies; 13+ messages in thread
From: James Bottomley @ 2009-12-01 14:06 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: akpm, torvalds, linux-scsi, linux-kernel, Kai Makisara

On Tue, 2009-12-01 at 08:03 +0900, FUJITA Tomonori wrote:
> On Mon, 30 Nov 2009 17:29:54 -0500
> James Bottomley <James.Bottomley@suse.de> wrote:
> 
> > This is a set of miscellaneous (but small) fixes for SCSI.
> > 
> > The patch is available here:
> > 
> > master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git
> > 
> > The short changelog is:
> > 
> > Alexey Kuznetsov (1):
> >       fix crash when disconnecting usb storage
> > 
> > James Bottomley (1):
> >       fix async scan add/remove race resulting in an oops
> > 
> > Martin K. Petersen (1):
> >       sd: Return correct error code for DIF
> > 
> > The diffstat is:
> > 
> >  drivers/scsi/hosts.c       |    2 -
> >  drivers/scsi/scsi_scan.c   |   18 ++----------
> >  drivers/scsi/scsi_sysfs.c  |   63 +++++++++++++++++++--------------------------
> >  drivers/scsi/sd_dif.c      |    2 -
> >  include/scsi/scsi_device.h |    1 
> >  5 files changed, 34 insertions(+), 52 deletions(-)
> 
> Can you add the tape driver fix?

Sure ... can I have an acked-by from Kai?

Thanks,

James



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-11-30 22:29 [GIT PATCH] SCSI fixes for 2.6.32-rc8 James Bottomley
  2009-11-30 23:03 ` FUJITA Tomonori
@ 2009-12-01 16:54 ` Kleber Sacilotto de Souza
  2009-12-01 17:13   ` James Bottomley
  1 sibling, 1 reply; 13+ messages in thread
From: Kleber Sacilotto de Souza @ 2009-12-01 16:54 UTC (permalink / raw)
  To: James Bottomley; +Cc: linux-scsi

On Mon, 2009-11-30 at 17:29 -0500, James Bottomley wrote:
> This is a set of miscellaneous (but small) fixes for SCSI.
> 
> The patch is available here:
> 
> master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git
> 
> The short changelog is:
> 
> Alexey Kuznetsov (1):
>       fix crash when disconnecting usb storage
> 
> James Bottomley (1):
>       fix async scan add/remove race resulting in an oops
> 
> Martin K. Petersen (1):
>       sd: Return correct error code for DIF
> 
> The diffstat is:
> 
>  drivers/scsi/hosts.c       |    2 -
>  drivers/scsi/scsi_scan.c   |   18 ++----------
>  drivers/scsi/scsi_sysfs.c  |   63 +++++++++++++++++++--------------------------
>  drivers/scsi/sd_dif.c      |    2 -
>  include/scsi/scsi_device.h |    1 
>  5 files changed, 34 insertions(+), 52 deletions(-)

Can you please add the patch from "[PATCH] ipr: fix EEH recovery" sent
to this list?


Thanks,
Kleber


-- 
Kleber S. Souza
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 16:54 ` Kleber Sacilotto de Souza
@ 2009-12-01 17:13   ` James Bottomley
  2009-12-01 17:18     ` Jesse Barnes
  0 siblings, 1 reply; 13+ messages in thread
From: James Bottomley @ 2009-12-01 17:13 UTC (permalink / raw)
  To: Kleber Sacilotto de Souza; +Cc: linux-scsi, linux-pci, Jesse Barnes

On Tue, 2009-12-01 at 14:54 -0200, Kleber Sacilotto de Souza wrote:
> Can you please add the patch from "[PATCH] ipr: fix EEH recovery" sent
> to this list?

Adding linux-pci because this hack actually tampers with internal PCI
device state, which looks wrong.

The thread is here:

http://marc.info/?l=linux-scsi&m=125918723218627

and the proposed full patch and explanation below.

PCI people, is this correct, or is there a better way to do it?

James

---

Hi,

After commits c82f63e411f1b58427c103bd95af2863b1c96dd1 (PCI: check saved
state before restore) and 4b77b0a2ba27d64f58f16d8d4d48d8319dda36ff (PCI:
Clear saved_state after the state has been restored) PCI drivers are
prevented from restoring the device standard configuration registers
twice in a row. These changes introduced a regression on ipr EEH
recovery.

The ipr device driver saves the PCI state only during the device probe
and restores it on ipr_reset_restore_cfg_space() during IOA resets. This
behavior is causing the EEH recovery to fail after the second error
detected, since the registers are not being restored.

One possible solution would be saving the registers after restoring
them. The problem with this approach is that while recovering from an
EEH error if pci_save_state() results in an EEH error, the adapter/slot
will be reset, and end up back in ipr_reset_restore_cfg_space(), but it
won't have a valid saved state to restore, so pci_restore_state() will
fail.

The following patch introduces a workaround for this problem, hacking
around the PCI API by setting pdev->state_saved = true before we do the
restore. It fixes the EEH regression and prevents that we hit another
EEH error during EEH recovery.


Thanks,
Kleber



Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
---
 drivers/scsi/ipr.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 76d294f..c3ff9a6 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6516,6 +6516,7 @@ static int ipr_reset_restore_cfg_space(struct
ipr_cmnd *ipr_cmd)
        int rc;
 
        ENTER;
+       ioa_cfg->pdev->state_saved = true;
        rc = pci_restore_state(ioa_cfg->pdev);
 
        if (rc != PCIBIOS_SUCCESSFUL) {
-- 


-- 
Kleber S. Souza
IBM Linux Technology Center

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 14:06   ` James Bottomley
@ 2009-12-01 17:15     ` Martin K. Petersen
  2009-12-01 18:14     ` Kai Makisara
  1 sibling, 0 replies; 13+ messages in thread
From: Martin K. Petersen @ 2009-12-01 17:15 UTC (permalink / raw)
  To: James Bottomley
  Cc: FUJITA Tomonori, akpm, torvalds, linux-scsi, linux-kernel, Kai Makisara


If we are doing another small batch of SCSI fixes before .32 I'd like to
see Mike Christie's error handling fix go in as well.  I just noticed it
ended up being queued in scsi-misc and not scsi-rc-fixes.  It really is
an important bug fix. ad63082626f99651d261ccd8698ce4e997362f7e

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 17:13   ` James Bottomley
@ 2009-12-01 17:18     ` Jesse Barnes
  2009-12-01 21:40       ` Rafael J. Wysocki
  0 siblings, 1 reply; 13+ messages in thread
From: Jesse Barnes @ 2009-12-01 17:18 UTC (permalink / raw)
  To: James Bottomley
  Cc: Kleber Sacilotto de Souza, linux-scsi, linux-pci, Rafael J. Wysocki

On Tue, 01 Dec 2009 12:13:47 -0500
James Bottomley <James.Bottomley@suse.de> wrote:

> On Tue, 2009-12-01 at 14:54 -0200, Kleber Sacilotto de Souza wrote:
> > Can you please add the patch from "[PATCH] ipr: fix EEH recovery"
> > sent to this list?
> 
> Adding linux-pci because this hack actually tampers with internal PCI
> device state, which looks wrong.
> 
> The thread is here:
> 
> http://marc.info/?l=linux-scsi&m=125918723218627
> 
> and the proposed full patch and explanation below.
> 
> PCI people, is this correct, or is there a better way to do it?
> 
> James
> 
> ---
> 
> Hi,
> 
> After commits c82f63e411f1b58427c103bd95af2863b1c96dd1 (PCI: check
> saved state before restore) and
> 4b77b0a2ba27d64f58f16d8d4d48d8319dda36ff (PCI: Clear saved_state
> after the state has been restored) PCI drivers are prevented from
> restoring the device standard configuration registers twice in a row.
> These changes introduced a regression on ipr EEH recovery.
> 
> The ipr device driver saves the PCI state only during the device probe
> and restores it on ipr_reset_restore_cfg_space() during IOA resets.
> This behavior is causing the EEH recovery to fail after the second
> error detected, since the registers are not being restored.
> 
> One possible solution would be saving the registers after restoring
> them. The problem with this approach is that while recovering from an
> EEH error if pci_save_state() results in an EEH error, the
> adapter/slot will be reset, and end up back in
> ipr_reset_restore_cfg_space(), but it won't have a valid saved state
> to restore, so pci_restore_state() will fail.
> 
> The following patch introduces a workaround for this problem, hacking
> around the PCI API by setting pdev->state_saved = true before we do
> the restore. It fixes the EEH regression and prevents that we hit
> another EEH error during EEH recovery.
> 
> 
> Thanks,
> Kleber
> 
> 
> 
> Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
> ---
>  drivers/scsi/ipr.c |    1 +
>  1 files changed, 1 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> index 76d294f..c3ff9a6 100644
> --- a/drivers/scsi/ipr.c
> +++ b/drivers/scsi/ipr.c
> @@ -6516,6 +6516,7 @@ static int ipr_reset_restore_cfg_space(struct
> ipr_cmnd *ipr_cmd)
>         int rc;
>  
>         ENTER;
> +       ioa_cfg->pdev->state_saved = true;
>         rc = pci_restore_state(ioa_cfg->pdev);
>  
>         if (rc != PCIBIOS_SUCCESSFUL) {

Rafael may have input here, but it seems like we need a low level
save/restore routine that ignores the flag (which is generally used for
suspend/resume I think?).

Maybe adding low level _pci_save_state/_pci_restore_state that don't
check/set the flags would help?

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 14:06   ` James Bottomley
  2009-12-01 17:15     ` Martin K. Petersen
@ 2009-12-01 18:14     ` Kai Makisara
  1 sibling, 0 replies; 13+ messages in thread
From: Kai Makisara @ 2009-12-01 18:14 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, akpm, torvalds, linux-scsi, linux-kernel

On Tue, 1 Dec 2009, James Bottomley wrote:

> On Tue, 2009-12-01 at 08:03 +0900, FUJITA Tomonori wrote:
> > On Mon, 30 Nov 2009 17:29:54 -0500
> > James Bottomley <James.Bottomley@suse.de> wrote:
> > 
> > > This is a set of miscellaneous (but small) fixes for SCSI.
> > > 
> > > The patch is available here:
> > > 
> > > master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git
> > > 
> > > The short changelog is:
> > > 
> > > Alexey Kuznetsov (1):
> > >       fix crash when disconnecting usb storage
> > > 
> > > James Bottomley (1):
> > >       fix async scan add/remove race resulting in an oops
> > > 
> > > Martin K. Petersen (1):
> > >       sd: Return correct error code for DIF
> > > 
> > > The diffstat is:
> > > 
> > >  drivers/scsi/hosts.c       |    2 -
> > >  drivers/scsi/scsi_scan.c   |   18 ++----------
> > >  drivers/scsi/scsi_sysfs.c  |   63 +++++++++++++++++++--------------------------
> > >  drivers/scsi/sd_dif.c      |    2 -
> > >  include/scsi/scsi_device.h |    1 
> > >  5 files changed, 34 insertions(+), 52 deletions(-)
> > 
> > Can you add the tape driver fix?
> 
> Sure ... can I have an acked-by from Kai?
> 
There was already tested-by but here is

Acked-by: Kai Makisara <kai.makisara@kolumbus.fi>

Thanks,
Kai


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 17:18     ` Jesse Barnes
@ 2009-12-01 21:40       ` Rafael J. Wysocki
  2009-12-01 22:10         ` Jesse Barnes
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2009-12-01 21:40 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: James Bottomley, Kleber Sacilotto de Souza, linux-scsi, linux-pci

On Tuesday 01 December 2009, Jesse Barnes wrote:
> On Tue, 01 Dec 2009 12:13:47 -0500
> James Bottomley <James.Bottomley@suse.de> wrote:
> 
> > On Tue, 2009-12-01 at 14:54 -0200, Kleber Sacilotto de Souza wrote:
> > > Can you please add the patch from "[PATCH] ipr: fix EEH recovery"
> > > sent to this list?
> > 
> > Adding linux-pci because this hack actually tampers with internal PCI
> > device state, which looks wrong.
> > 
> > The thread is here:
> > 
> > http://marc.info/?l=linux-scsi&m=125918723218627
> > 
> > and the proposed full patch and explanation below.
> > 
> > PCI people, is this correct, or is there a better way to do it?
> > 
> > James
> > 
> > ---
> > 
> > Hi,
> > 
> > After commits c82f63e411f1b58427c103bd95af2863b1c96dd1 (PCI: check
> > saved state before restore) and
> > 4b77b0a2ba27d64f58f16d8d4d48d8319dda36ff (PCI: Clear saved_state
> > after the state has been restored) PCI drivers are prevented from
> > restoring the device standard configuration registers twice in a row.
> > These changes introduced a regression on ipr EEH recovery.
> > 
> > The ipr device driver saves the PCI state only during the device probe
> > and restores it on ipr_reset_restore_cfg_space() during IOA resets.
> > This behavior is causing the EEH recovery to fail after the second
> > error detected, since the registers are not being restored.
> > 
> > One possible solution would be saving the registers after restoring
> > them. The problem with this approach is that while recovering from an
> > EEH error if pci_save_state() results in an EEH error, the
> > adapter/slot will be reset, and end up back in
> > ipr_reset_restore_cfg_space(), but it won't have a valid saved state
> > to restore, so pci_restore_state() will fail.
> > 
> > The following patch introduces a workaround for this problem, hacking
> > around the PCI API by setting pdev->state_saved = true before we do
> > the restore. It fixes the EEH regression and prevents that we hit
> > another EEH error during EEH recovery.
> > 
> > 
> > Thanks,
> > Kleber
> > 
> > 
> > 
> > Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
> > ---
> >  drivers/scsi/ipr.c |    1 +
> >  1 files changed, 1 insertions(+), 0 deletions(-)
> > 
> > diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> > index 76d294f..c3ff9a6 100644
> > --- a/drivers/scsi/ipr.c
> > +++ b/drivers/scsi/ipr.c
> > @@ -6516,6 +6516,7 @@ static int ipr_reset_restore_cfg_space(struct
> > ipr_cmnd *ipr_cmd)
> >         int rc;
> >  
> >         ENTER;
> > +       ioa_cfg->pdev->state_saved = true;
> >         rc = pci_restore_state(ioa_cfg->pdev);
> >  
> >         if (rc != PCIBIOS_SUCCESSFUL) {
> 
> Rafael may have input here, but it seems like we need a low level
> save/restore routine that ignores the flag (which is generally used for
> suspend/resume I think?).

There are some other users, but they are only a few.

> Maybe adding low level _pci_save_state/_pci_restore_state that don't
> check/set the flags would help?

That might work, but how do we know that the state we're going to restore
is actually valid at this particular point?

Perhaps we need a version using a separate storage area?

Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 21:40       ` Rafael J. Wysocki
@ 2009-12-01 22:10         ` Jesse Barnes
  2009-12-08 18:00           ` James Bottomley
  0 siblings, 1 reply; 13+ messages in thread
From: Jesse Barnes @ 2009-12-01 22:10 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: James Bottomley, Kleber Sacilotto de Souza, linux-scsi, linux-pci

On Tue, 1 Dec 2009 22:40:48 +0100
"Rafael J. Wysocki" <rjw@sisk.pl> wrote:

> On Tuesday 01 December 2009, Jesse Barnes wrote:
> > On Tue, 01 Dec 2009 12:13:47 -0500
> > James Bottomley <James.Bottomley@suse.de> wrote:
> > 
> > > On Tue, 2009-12-01 at 14:54 -0200, Kleber Sacilotto de Souza
> > > wrote:
> > > > Can you please add the patch from "[PATCH] ipr: fix EEH
> > > > recovery" sent to this list?
> > > 
> > > Adding linux-pci because this hack actually tampers with internal
> > > PCI device state, which looks wrong.
> > > 
> > > The thread is here:
> > > 
> > > http://marc.info/?l=linux-scsi&m=125918723218627
> > > 
> > > and the proposed full patch and explanation below.
> > > 
> > > PCI people, is this correct, or is there a better way to do it?
> > > 
> > > James
> > > 
> > > ---
> > > 
> > > Hi,
> > > 
> > > After commits c82f63e411f1b58427c103bd95af2863b1c96dd1 (PCI: check
> > > saved state before restore) and
> > > 4b77b0a2ba27d64f58f16d8d4d48d8319dda36ff (PCI: Clear saved_state
> > > after the state has been restored) PCI drivers are prevented from
> > > restoring the device standard configuration registers twice in a
> > > row. These changes introduced a regression on ipr EEH recovery.
> > > 
> > > The ipr device driver saves the PCI state only during the device
> > > probe and restores it on ipr_reset_restore_cfg_space() during IOA
> > > resets. This behavior is causing the EEH recovery to fail after
> > > the second error detected, since the registers are not being
> > > restored.
> > > 
> > > One possible solution would be saving the registers after
> > > restoring them. The problem with this approach is that while
> > > recovering from an EEH error if pci_save_state() results in an
> > > EEH error, the adapter/slot will be reset, and end up back in
> > > ipr_reset_restore_cfg_space(), but it won't have a valid saved
> > > state to restore, so pci_restore_state() will fail.
> > > 
> > > The following patch introduces a workaround for this problem,
> > > hacking around the PCI API by setting pdev->state_saved = true
> > > before we do the restore. It fixes the EEH regression and
> > > prevents that we hit another EEH error during EEH recovery.
> > > 
> > > 
> > > Thanks,
> > > Kleber
> > > 
> > > 
> > > 
> > > Signed-off-by: Kleber Sacilotto de Souza
> > > <klebers@linux.vnet.ibm.com> ---
> > >  drivers/scsi/ipr.c |    1 +
> > >  1 files changed, 1 insertions(+), 0 deletions(-)
> > > 
> > > diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> > > index 76d294f..c3ff9a6 100644
> > > --- a/drivers/scsi/ipr.c
> > > +++ b/drivers/scsi/ipr.c
> > > @@ -6516,6 +6516,7 @@ static int
> > > ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
> > >         int rc;
> > >  
> > >         ENTER;
> > > +       ioa_cfg->pdev->state_saved = true;
> > >         rc = pci_restore_state(ioa_cfg->pdev);
> > >  
> > >         if (rc != PCIBIOS_SUCCESSFUL) {
> > 
> > Rafael may have input here, but it seems like we need a low level
> > save/restore routine that ignores the flag (which is generally used
> > for suspend/resume I think?).
> 
> There are some other users, but they are only a few.
> 
> > Maybe adding low level _pci_save_state/_pci_restore_state that don't
> > check/set the flags would help?
> 
> That might work, but how do we know that the state we're going to
> restore is actually valid at this particular point?
> 
> Perhaps we need a version using a separate storage area?

Yeah, that would probably be best.  Let the caller allocate the space
and save/restore it all it wants for special cases like error handling.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-01 22:10         ` Jesse Barnes
@ 2009-12-08 18:00           ` James Bottomley
  2009-12-08 20:07             ` Rafael J. Wysocki
  0 siblings, 1 reply; 13+ messages in thread
From: James Bottomley @ 2009-12-08 18:00 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Rafael J. Wysocki, Kleber Sacilotto de Souza, linux-scsi, linux-pci

On Tue, 2009-12-01 at 14:10 -0800, Jesse Barnes wrote:
> On Tue, 1 Dec 2009 22:40:48 +0100
> "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> 
> > On Tuesday 01 December 2009, Jesse Barnes wrote:
> > > On Tue, 01 Dec 2009 12:13:47 -0500
> > > James Bottomley <James.Bottomley@suse.de> wrote:
> > > 
> > > > On Tue, 2009-12-01 at 14:54 -0200, Kleber Sacilotto de Souza
> > > > wrote:
> > > > > Can you please add the patch from "[PATCH] ipr: fix EEH
> > > > > recovery" sent to this list?
> > > > 
> > > > Adding linux-pci because this hack actually tampers with internal
> > > > PCI device state, which looks wrong.
> > > > 
> > > > The thread is here:
> > > > 
> > > > http://marc.info/?l=linux-scsi&m=125918723218627
> > > > 
> > > > and the proposed full patch and explanation below.
> > > > 
> > > > PCI people, is this correct, or is there a better way to do it?
> > > > 
> > > > James
> > > > 
> > > > ---
> > > > 
> > > > Hi,
> > > > 
> > > > After commits c82f63e411f1b58427c103bd95af2863b1c96dd1 (PCI: check
> > > > saved state before restore) and
> > > > 4b77b0a2ba27d64f58f16d8d4d48d8319dda36ff (PCI: Clear saved_state
> > > > after the state has been restored) PCI drivers are prevented from
> > > > restoring the device standard configuration registers twice in a
> > > > row. These changes introduced a regression on ipr EEH recovery.
> > > > 
> > > > The ipr device driver saves the PCI state only during the device
> > > > probe and restores it on ipr_reset_restore_cfg_space() during IOA
> > > > resets. This behavior is causing the EEH recovery to fail after
> > > > the second error detected, since the registers are not being
> > > > restored.
> > > > 
> > > > One possible solution would be saving the registers after
> > > > restoring them. The problem with this approach is that while
> > > > recovering from an EEH error if pci_save_state() results in an
> > > > EEH error, the adapter/slot will be reset, and end up back in
> > > > ipr_reset_restore_cfg_space(), but it won't have a valid saved
> > > > state to restore, so pci_restore_state() will fail.
> > > > 
> > > > The following patch introduces a workaround for this problem,
> > > > hacking around the PCI API by setting pdev->state_saved = true
> > > > before we do the restore. It fixes the EEH regression and
> > > > prevents that we hit another EEH error during EEH recovery.
> > > > 
> > > > 
> > > > Thanks,
> > > > Kleber
> > > > 
> > > > 
> > > > 
> > > > Signed-off-by: Kleber Sacilotto de Souza
> > > > <klebers@linux.vnet.ibm.com> ---
> > > >  drivers/scsi/ipr.c |    1 +
> > > >  1 files changed, 1 insertions(+), 0 deletions(-)
> > > > 
> > > > diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> > > > index 76d294f..c3ff9a6 100644
> > > > --- a/drivers/scsi/ipr.c
> > > > +++ b/drivers/scsi/ipr.c
> > > > @@ -6516,6 +6516,7 @@ static int
> > > > ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
> > > >         int rc;
> > > >  
> > > >         ENTER;
> > > > +       ioa_cfg->pdev->state_saved = true;
> > > >         rc = pci_restore_state(ioa_cfg->pdev);
> > > >  
> > > >         if (rc != PCIBIOS_SUCCESSFUL) {
> > > 
> > > Rafael may have input here, but it seems like we need a low level
> > > save/restore routine that ignores the flag (which is generally used
> > > for suspend/resume I think?).
> > 
> > There are some other users, but they are only a few.
> > 
> > > Maybe adding low level _pci_save_state/_pci_restore_state that don't
> > > check/set the flags would help?
> > 
> > That might work, but how do we know that the state we're going to
> > restore is actually valid at this particular point?
> > 
> > Perhaps we need a version using a separate storage area?
> 
> Yeah, that would probably be best.  Let the caller allocate the space
> and save/restore it all it wants for special cases like error handling.

OK, so could I have a resolution for this, please, guys?

Do I just apply the patch to fiddle with the internal state which looks
ugly but will fix the bug, or are you going to provide us with the
correct interface to use?

James




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-08 18:00           ` James Bottomley
@ 2009-12-08 20:07             ` Rafael J. Wysocki
  2009-12-09 18:17               ` Jesse Barnes
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2009-12-08 20:07 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jesse Barnes, Kleber Sacilotto de Souza, linux-scsi, linux-pci

On Tuesday 08 December 2009, James Bottomley wrote:
> On Tue, 2009-12-01 at 14:10 -0800, Jesse Barnes wrote:
> > On Tue, 1 Dec 2009 22:40:48 +0100
> > "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> > 
> > > On Tuesday 01 December 2009, Jesse Barnes wrote:
> > > > On Tue, 01 Dec 2009 12:13:47 -0500
> > > > James Bottomley <James.Bottomley@suse.de> wrote:
> > > > 
> > > > > On Tue, 2009-12-01 at 14:54 -0200, Kleber Sacilotto de Souza
> > > > > wrote:
> > > > > > Can you please add the patch from "[PATCH] ipr: fix EEH
> > > > > > recovery" sent to this list?
> > > > > 
> > > > > Adding linux-pci because this hack actually tampers with internal
> > > > > PCI device state, which looks wrong.
> > > > > 
> > > > > The thread is here:
> > > > > 
> > > > > http://marc.info/?l=linux-scsi&m=125918723218627
> > > > > 
> > > > > and the proposed full patch and explanation below.
> > > > > 
> > > > > PCI people, is this correct, or is there a better way to do it?
> > > > > 
> > > > > James
> > > > > 
> > > > > ---
> > > > > 
> > > > > Hi,
> > > > > 
> > > > > After commits c82f63e411f1b58427c103bd95af2863b1c96dd1 (PCI: check
> > > > > saved state before restore) and
> > > > > 4b77b0a2ba27d64f58f16d8d4d48d8319dda36ff (PCI: Clear saved_state
> > > > > after the state has been restored) PCI drivers are prevented from
> > > > > restoring the device standard configuration registers twice in a
> > > > > row. These changes introduced a regression on ipr EEH recovery.
> > > > > 
> > > > > The ipr device driver saves the PCI state only during the device
> > > > > probe and restores it on ipr_reset_restore_cfg_space() during IOA
> > > > > resets. This behavior is causing the EEH recovery to fail after
> > > > > the second error detected, since the registers are not being
> > > > > restored.
> > > > > 
> > > > > One possible solution would be saving the registers after
> > > > > restoring them. The problem with this approach is that while
> > > > > recovering from an EEH error if pci_save_state() results in an
> > > > > EEH error, the adapter/slot will be reset, and end up back in
> > > > > ipr_reset_restore_cfg_space(), but it won't have a valid saved
> > > > > state to restore, so pci_restore_state() will fail.
> > > > > 
> > > > > The following patch introduces a workaround for this problem,
> > > > > hacking around the PCI API by setting pdev->state_saved = true
> > > > > before we do the restore. It fixes the EEH regression and
> > > > > prevents that we hit another EEH error during EEH recovery.
> > > > > 
> > > > > 
> > > > > Thanks,
> > > > > Kleber
> > > > > 
> > > > > 
> > > > > 
> > > > > Signed-off-by: Kleber Sacilotto de Souza
> > > > > <klebers@linux.vnet.ibm.com> ---
> > > > >  drivers/scsi/ipr.c |    1 +
> > > > >  1 files changed, 1 insertions(+), 0 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> > > > > index 76d294f..c3ff9a6 100644
> > > > > --- a/drivers/scsi/ipr.c
> > > > > +++ b/drivers/scsi/ipr.c
> > > > > @@ -6516,6 +6516,7 @@ static int
> > > > > ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
> > > > >         int rc;
> > > > >  
> > > > >         ENTER;
> > > > > +       ioa_cfg->pdev->state_saved = true;
> > > > >         rc = pci_restore_state(ioa_cfg->pdev);
> > > > >  
> > > > >         if (rc != PCIBIOS_SUCCESSFUL) {
> > > > 
> > > > Rafael may have input here, but it seems like we need a low level
> > > > save/restore routine that ignores the flag (which is generally used
> > > > for suspend/resume I think?).
> > > 
> > > There are some other users, but they are only a few.
> > > 
> > > > Maybe adding low level _pci_save_state/_pci_restore_state that don't
> > > > check/set the flags would help?
> > > 
> > > That might work, but how do we know that the state we're going to
> > > restore is actually valid at this particular point?
> > > 
> > > Perhaps we need a version using a separate storage area?
> > 
> > Yeah, that would probably be best.  Let the caller allocate the space
> > and save/restore it all it wants for special cases like error handling.
> 
> OK, so could I have a resolution for this, please, guys?
> 
> Do I just apply the patch to fiddle with the internal state which looks
> ugly but will fix the bug, or are you going to provide us with the
> correct interface to use?

I guess at the moment it's better to apply the workaround first and then
remove it when the correct interface is ready.

I really wouldn't like to hurry with reworking pci_save_state(), because that
has a potential of infroducing some new nasty bugs if not done with care.

Jesse, what's your opinion?

Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [GIT PATCH] SCSI fixes for 2.6.32-rc8
  2009-12-08 20:07             ` Rafael J. Wysocki
@ 2009-12-09 18:17               ` Jesse Barnes
  0 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2009-12-09 18:17 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: James Bottomley, Kleber Sacilotto de Souza, linux-scsi, linux-pci

On Tue, 2009-12-08 at 21:07 +0100, Rafael J. Wysocki wrote:
> I guess at the moment it's better to apply the workaround first and then
> remove it when the correct interface is ready.
> 
> I really wouldn't like to hurry with reworking pci_save_state(), because that
> has a potential of infroducing some new nasty bugs if not done with care.
> 
> Jesse, what's your opinion?

Yeah, that's fine.  I won't have time to hack anything together for a
week or two either, and I don't want to hold up a valid fix.

Jesse


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-12-09 18:29 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-30 22:29 [GIT PATCH] SCSI fixes for 2.6.32-rc8 James Bottomley
2009-11-30 23:03 ` FUJITA Tomonori
2009-12-01 14:06   ` James Bottomley
2009-12-01 17:15     ` Martin K. Petersen
2009-12-01 18:14     ` Kai Makisara
2009-12-01 16:54 ` Kleber Sacilotto de Souza
2009-12-01 17:13   ` James Bottomley
2009-12-01 17:18     ` Jesse Barnes
2009-12-01 21:40       ` Rafael J. Wysocki
2009-12-01 22:10         ` Jesse Barnes
2009-12-08 18:00           ` James Bottomley
2009-12-08 20:07             ` Rafael J. Wysocki
2009-12-09 18:17               ` Jesse Barnes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.