linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] PCI Error Recovery: e100 network device driver
@ 2006-04-06 22:24 Linas Vepstas
  2006-04-06 22:46 ` Greg KH
  2006-04-29  2:48 ` Zhang, Yanmin
  0 siblings, 2 replies; 6+ messages in thread
From: Linas Vepstas @ 2006-04-06 22:24 UTC (permalink / raw)
  To: Jeff Garzik, netdev, linux-pci
  Cc: linux-kernel, linuxppc-dev, john.ronciak, jesse.brandeburg,
	jeffrey.t.kirsher


Please apply and forward upstream.

--linas

[PATCH] PCI Error Recovery: e100 network device driver

Various PCI bus errors can be signaled by newer PCI controllers.  This
patch adds the PCI error recovery callbacks to the intel ethernet e100
device driver. The patch has been tested, and appears to work well.

Signed-off-by: Linas Vepstas <linas@linas.org>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>

----

 drivers/net/e100.c |   65 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 65 insertions(+)

Index: linux-2.6.17-rc1/drivers/net/e100.c
===================================================================
--- linux-2.6.17-rc1.orig/drivers/net/e100.c	2006-04-05 09:56:06.000000000 -0500
+++ linux-2.6.17-rc1/drivers/net/e100.c	2006-04-06 15:17:29.000000000 -0500
@@ -2781,6 +2781,70 @@ static void e100_shutdown(struct pci_dev
 }
 
 
+/* ------------------ PCI Error Recovery infrastructure  -------------- */
+/** e100_io_error_detected() is called when PCI error is detected */
+static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	/* Same as calling e100_down(netdev_priv(netdev)), but generic */
+	netdev->stop(netdev);
+
+	/* Detach; put netif into state similar to hotplug unplug */
+	netif_poll_enable(netdev);
+	netif_device_detach(netdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/** e100_io_slot_reset is called after the pci bus has been reset.
+ *  Restart the card from scratch. */
+static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	if(pci_enable_device(pdev)) {
+		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	pci_set_master(pdev);
+
+	/* Only one device per card can do a reset */
+	if (0 != PCI_FUNC (pdev->devfn))
+		return PCI_ERS_RESULT_RECOVERED;
+	e100_hw_reset(nic);
+	e100_phy_init(nic);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/** e100_io_resume is called when the error recovery driver
+ *  tells us that its OK to resume normal operation.
+ */
+static void e100_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	/* ack any pending wake events, disable PME */
+	pci_enable_wake(pdev, 0, 0);
+
+	netif_device_attach(netdev);
+	if(netif_running(netdev)) {
+		e100_open (netdev);
+		mod_timer(&nic->watchdog, jiffies);
+	}
+}
+
+static struct pci_error_handlers e100_err_handler = {
+	.error_detected = e100_io_error_detected,
+	.slot_reset = e100_io_slot_reset,
+	.resume = e100_io_resume,
+};
+
+
 static struct pci_driver e100_driver = {
 	.name =         DRV_NAME,
 	.id_table =     e100_id_table,
@@ -2791,6 +2855,7 @@ static struct pci_driver e100_driver = {
 	.resume =       e100_resume,
 #endif
 	.shutdown =     e100_shutdown,
+	.err_handler = &e100_err_handler,
 };
 
 static int __init e100_init_module(void)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI Error Recovery: e100 network device driver
  2006-04-06 22:24 [PATCH] PCI Error Recovery: e100 network device driver Linas Vepstas
@ 2006-04-06 22:46 ` Greg KH
  2006-04-07 23:11   ` Linas Vepstas
  2006-04-29  2:48 ` Zhang, Yanmin
  1 sibling, 1 reply; 6+ messages in thread
From: Greg KH @ 2006-04-06 22:46 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Jeff Garzik, netdev, linux-pci, linux-kernel, linuxppc-dev,
	john.ronciak, jesse.brandeburg, jeffrey.t.kirsher

On Thu, Apr 06, 2006 at 05:24:00PM -0500, Linas Vepstas wrote:
> +	if(pci_enable_device(pdev)) {

Add a space after "if" and before "(" please.

You do this in a few different places.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI Error Recovery: e100 network device driver
  2006-04-06 22:46 ` Greg KH
@ 2006-04-07 23:11   ` Linas Vepstas
  2006-04-08  0:03     ` Alexey Dobriyan
  2006-04-08  8:12     ` Francois Romieu
  0 siblings, 2 replies; 6+ messages in thread
From: Linas Vepstas @ 2006-04-07 23:11 UTC (permalink / raw)
  To: Greg KH
  Cc: Jeff Garzik, netdev, linux-pci, linux-kernel, linuxppc-dev,
	john.ronciak, jesse.brandeburg, jeffrey.t.kirsher

On Thu, Apr 06, 2006 at 03:46:43PM -0700, Greg KH wrote:
> On Thu, Apr 06, 2006 at 05:24:00PM -0500, Linas Vepstas wrote:
> > +	if(pci_enable_device(pdev)) {
> 
> Add a space after "if" and before "(" please.

I guess I'm immune to learning from experience. :-/

Here's a new improved patch.

--linas

[PATCH] PCI Error Recovery: e100 network device driver

Various PCI bus errors can be signaled by newer PCI controllers.  This
patch adds the PCI error recovery callbacks to the intel ethernet e100
device driver. The patch has been tested, and appears to work well.

Signed-off-by: Linas Vepstas <linas@linas.org>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>

----

 drivers/net/e100.c |   75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 75 insertions(+)

Index: linux-2.6.17-rc1/drivers/net/e100.c
===================================================================
--- linux-2.6.17-rc1.orig/drivers/net/e100.c	2006-04-07 16:21:46.000000000 -0500
+++ linux-2.6.17-rc1/drivers/net/e100.c	2006-04-07 18:10:52.411266545 -0500
@@ -2780,6 +2780,80 @@ static void e100_shutdown(struct pci_dev
 		DPRINTK(PROBE,ERR, "Error enabling wake\n");
 }
 
+/* ------------------ PCI Error Recovery infrastructure  -------------- */
+/**
+ * e100_io_error_detected - called when PCI error is detected.
+ * @pdev: Pointer to PCI device
+ * @state: The current pci conneection state
+ */
+static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	/* Similar to calling e100_down(), but avoids adpater I/O. */
+	netdev->stop(netdev);
+
+	/* Detach; put netif into state similar to hotplug unplug. */
+	netif_poll_enable(netdev);
+	netif_device_detach(netdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * e100_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	if (pci_enable_device(pdev)) {
+		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	pci_set_master(pdev);
+
+	/* Only one device per card can do a reset */
+	if (0 != PCI_FUNC(pdev->devfn))
+		return PCI_ERS_RESULT_RECOVERED;
+	e100_hw_reset(nic);
+	e100_phy_init(nic);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * e100_io_resume - resume normal operations
+ * @pdev: Pointer to PCI device
+ *
+ * Resume normal operations after an error recovery
+ * sequence has been completed.
+ */
+static void e100_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	/* ack any pending wake events, disable PME */
+	pci_enable_wake(pdev, 0, 0);
+
+	netif_device_attach(netdev);
+	if (netif_running(netdev)) {
+		e100_open(netdev);
+		mod_timer(&nic->watchdog, jiffies);
+	}
+}
+
+static struct pci_error_handlers e100_err_handler = {
+	.error_detected = e100_io_error_detected,
+	.slot_reset = e100_io_slot_reset,
+	.resume = e100_io_resume,
+};
 
 static struct pci_driver e100_driver = {
 	.name =         DRV_NAME,
@@ -2791,6 +2865,7 @@ static struct pci_driver e100_driver = {
 	.resume =       e100_resume,
 #endif
 	.shutdown =     e100_shutdown,
+	.err_handler = &e100_err_handler,
 };
 
 static int __init e100_init_module(void)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI Error Recovery: e100 network device driver
  2006-04-07 23:11   ` Linas Vepstas
@ 2006-04-08  0:03     ` Alexey Dobriyan
  2006-04-08  8:12     ` Francois Romieu
  1 sibling, 0 replies; 6+ messages in thread
From: Alexey Dobriyan @ 2006-04-08  0:03 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Greg KH, Jeff Garzik, netdev, linux-pci, linux-kernel,
	linuxppc-dev, john.ronciak, jesse.brandeburg, jeffrey.t.kirsher

On Fri, Apr 07, 2006 at 06:11:34PM -0500, Linas Vepstas wrote:
> --- linux-2.6.17-rc1.orig/drivers/net/e100.c
> +++ linux-2.6.17-rc1/drivers/net/e100.c

> + * @state: The current pci conneection state

connection

> +static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +
> +	/* Similar to calling e100_down(), but avoids adpater I/O. */

adapter

> +static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	if (pci_enable_device(pdev)) {
> +		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +	pci_set_master(pdev);
> +
> +	/* Only one device per card can do a reset */
> +	if (0 != PCI_FUNC(pdev->devfn))

Wrong order.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI Error Recovery: e100 network device driver
  2006-04-07 23:11   ` Linas Vepstas
  2006-04-08  0:03     ` Alexey Dobriyan
@ 2006-04-08  8:12     ` Francois Romieu
  1 sibling, 0 replies; 6+ messages in thread
From: Francois Romieu @ 2006-04-08  8:12 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Greg KH, Jeff Garzik, netdev, linux-pci, linux-kernel,
	linuxppc-dev, john.ronciak, jesse.brandeburg, jeffrey.t.kirsher

Linas Vepstas <linas@austin.ibm.com> :
> Index: linux-2.6.17-rc1/drivers/net/e100.c
> ===================================================================
> --- linux-2.6.17-rc1.orig/drivers/net/e100.c	2006-04-07 16:21:46.000000000 -0500
> +++ linux-2.6.17-rc1/drivers/net/e100.c	2006-04-07 18:10:52.411266545 -0500
[...]
> +static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)

80 cols limit.

[...]
> +static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	if (pci_enable_device(pdev)) {
> +		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");

- The driver supports {get/set}_msglevel. Please consider using netif_msg_xxx
  (see include/linux/netdevice.h).

- s/e100/DRV_NAME/ (or netdev->name, or pci_name(...) depending on the
  context).

[...]
> +static struct pci_error_handlers e100_err_handler = {
> +	.error_detected = e100_io_error_detected,
> +	.slot_reset = e100_io_slot_reset,
> +	.resume = e100_io_resume,
> +};

Nit: I'd rather follow the style in the declaration of e100_driver.

-- 
Ueimor

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] PCI Error Recovery: e100 network device driver
  2006-04-06 22:24 [PATCH] PCI Error Recovery: e100 network device driver Linas Vepstas
  2006-04-06 22:46 ` Greg KH
@ 2006-04-29  2:48 ` Zhang, Yanmin
  1 sibling, 0 replies; 6+ messages in thread
From: Zhang, Yanmin @ 2006-04-29  2:48 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Jeff Garzik, netdev, linux-pci, LKML, linuxppc-dev, john.ronciak,
	jesse.brandeburg, jeffrey.t.kirsher

On Fri, 2006-04-07 at 06:24, Linas Vepstas wrote:
> Please apply and forward upstream.
> 
> --linas
> 
> [PATCH] PCI Error Recovery: e100 network device driver
> 
> Various PCI bus errors can be signaled by newer PCI controllers.  This
> patch adds the PCI error recovery callbacks to the intel ethernet e100
> device driver. The patch has been tested, and appears to work well.
> 
> Signed-off-by: Linas Vepstas <linas@linas.org>
> Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
I am enabling PCI-Express AER (Advanced Error Reporting) in kernel and
glad to see many drivers to support pci error handling.


> 
> ----
> 
>  drivers/net/e100.c |   65 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 65 insertions(+)
> 
> Index: linux-2.6.17-rc1/drivers/net/e100.c
> ===================================================================
> --- linux-2.6.17-rc1.orig/drivers/net/e100.c	2006-04-05 09:56:06.000000000 -0500
> +++ linux-2.6.17-rc1/drivers/net/e100.c	2006-04-06 15:17:29.000000000 -0500
> @@ -2781,6 +2781,70 @@ static void e100_shutdown(struct pci_dev
>  }
>  
> 
> +/* ------------------ PCI Error Recovery infrastructure  -------------- */
> +/** e100_io_error_detected() is called when PCI error is detected */
> +static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +
> +	/* Same as calling e100_down(netdev_priv(netdev)), but generic */
> +	netdev->stop(netdev);
e100 stop method e100_close calls e100_down which would do IO. Does it
violate the rule defined in Documentation/pci-error-recovery.txt that
error_detected shouldn't do any IO?
Suggest to create a new function, such like e100_close_noreset.


> +
> +	/* Detach; put netif into state similar to hotplug unplug */
> +	netif_poll_enable(netdev);
> +	netif_device_detach(netdev);
> +
> +	/* Request a slot reset. */
> +	return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +/** e100_io_slot_reset is called after the pci bus has been reset.
> + *  Restart the card from scratch. */
> +static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	if(pci_enable_device(pdev)) {
> +		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +	pci_set_master(pdev);
> +
> +	/* Only one device per card can do a reset */
> +	if (0 != PCI_FUNC (pdev->devfn))
> +		return PCI_ERS_RESULT_RECOVERED;
> +	e100_hw_reset(nic);
> +	e100_phy_init(nic);
Should pci_set_master be called after e100_hw_reset like in function
e100_probe?


> +
> +	return PCI_ERS_RESULT_RECOVERED;
> +}
> +
> +/** e100_io_resume is called when the error recovery driver
> + *  tells us that its OK to resume normal operation.
> + */
> +static void e100_io_resume(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	/* ack any pending wake events, disable PME */
> +	pci_enable_wake(pdev, 0, 0);
> +
> +	netif_device_attach(netdev);
> +	if(netif_running(netdev)) {
> +		e100_open (netdev);
> +		mod_timer(&nic->watchdog, jiffies);
e100_open calls e100_up which already sets watchdog timer. Why to set
it again?

> +	}
> +}
> +

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-04-29  2:51 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-04-06 22:24 [PATCH] PCI Error Recovery: e100 network device driver Linas Vepstas
2006-04-06 22:46 ` Greg KH
2006-04-07 23:11   ` Linas Vepstas
2006-04-08  0:03     ` Alexey Dobriyan
2006-04-08  8:12     ` Francois Romieu
2006-04-29  2:48 ` Zhang, Yanmin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).