* [Intel-wired-lan] [PATCH v1] igc: Add pcie error handler support
@ 2020-01-27 11:58 Sasha Neftin
2020-01-27 14:53 ` Paul Menzel
0 siblings, 1 reply; 3+ messages in thread
From: Sasha Neftin @ 2020-01-27 11:58 UTC (permalink / raw)
To: intel-wired-lan
Add pcie error detection, slot reset and resume capability
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
---
drivers/net/ethernet/intel/igc/igc_main.c | 105 ++++++++++++++++++++++++++++++
1 file changed, 105 insertions(+)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index fa72460e255a..b0656ae2fcb3 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -5076,6 +5076,110 @@ static void igc_shutdown(struct pci_dev *pdev)
}
}
+/**
+ * igc_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ **/
+static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ netif_device_detach(netdev);
+
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ if (netif_running(netdev))
+ igc_down(adapter);
+ pci_disable_device(pdev);
+
+ /* Request a slot slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * igc_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the igc_resume routine.
+ **/
+static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct igc_hw *hw = &adapter->hw;
+ pci_ers_result_t result;
+
+ if (pci_enable_device_mem(pdev)) {
+ dev_err(&pdev->dev,
+ "Cannot re-enable PCI device after reset.\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ pci_enable_wake(pdev, PCI_D3hot, 0);
+ pci_enable_wake(pdev, PCI_D3cold, 0);
+
+ /* In case of PCI error, adapter lose its HW address
+ * so we should re-assign it here.
+ */
+ hw->hw_addr = adapter->io_addr;
+
+ igc_reset(adapter);
+ wr32(IGC_WUS, ~0);
+ result = PCI_ERS_RESULT_RECOVERED;
+ }
+
+ return result;
+}
+
+/**
+ * igc_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the igc_resume routine.
+ */
+static void igc_io_resume(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ u32 err = 0;
+
+ rtnl_lock();
+ if (netif_running(netdev)) {
+ err = igc_open(netdev);
+ if (err) {
+ dev_err(&pdev->dev, "igic_open failed after reset\n");
+ return;
+ }
+ }
+
+ netif_device_attach(netdev);
+
+ /* let the f/w know that the h/w is now under the control of the
+ * driver.
+ */
+ igc_get_hw_control(adapter);
+ rtnl_unlock();
+}
+
+static const struct pci_error_handlers igc_err_handler = {
+ .error_detected = igc_io_error_detected,
+ .slot_reset = igc_io_slot_reset,
+ .resume = igc_io_resume,
+};
+
#ifdef CONFIG_PM
static const struct dev_pm_ops igc_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
@@ -5093,6 +5197,7 @@ static struct pci_driver igc_driver = {
.driver.pm = &igc_pm_ops,
#endif
.shutdown = igc_shutdown,
+ .err_handler = &igc_err_handler,
};
/**
--
2.11.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [Intel-wired-lan] [PATCH v1] igc: Add pcie error handler support
2020-01-27 11:58 [Intel-wired-lan] [PATCH v1] igc: Add pcie error handler support Sasha Neftin
@ 2020-01-27 14:53 ` Paul Menzel
2020-01-28 7:31 ` Neftin, Sasha
0 siblings, 1 reply; 3+ messages in thread
From: Paul Menzel @ 2020-01-27 14:53 UTC (permalink / raw)
To: intel-wired-lan
Dear Sasha,
Thank you for your patch set.
On 2020-01-27 12:58, Sasha Neftin wrote:
> Add pcie error detection, slot reset and resume capability
Tested how?
> Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
> ---
> drivers/net/ethernet/intel/igc/igc_main.c | 105 ++++++++++++++++++++++++++++++
> 1 file changed, 105 insertions(+)
>
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index fa72460e255a..b0656ae2fcb3 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -5076,6 +5076,110 @@ static void igc_shutdown(struct pci_dev *pdev)
> }
> }
>
> +/**
> + * igc_io_error_detected - called when PCI error is detected
> + * @pdev: Pointer to PCI device
> + * @state: The current pci connection state
PCI
> + *
> + * This function is called after a PCI bus error affecting
> + * this device has been detected.
Document the two possible return codes?
> + **/
> +static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
> + pci_channel_state_t state)
> +{
> + struct net_device *netdev = pci_get_drvdata(pdev);
> + struct igc_adapter *adapter = netdev_priv(netdev);
> +
> + netif_device_detach(netdev);
> +
> + if (state == pci_channel_io_perm_failure)
> + return PCI_ERS_RESULT_DISCONNECT;
> +
> + if (netif_running(netdev))
> + igc_down(adapter);
> + pci_disable_device(pdev);
> +
> + /* Request a slot slot reset. */
One slot?
> + return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +/**
> + * igc_io_slot_reset - called after the pci bus has been reset.
PCI
> + * @pdev: Pointer to PCI device
> + *
> + * Restart the card from scratch, as if from a cold-boot. Implementation
> + * resembles the first-half of the igc_resume routine.
Shouldn?t the common code be factored out then?
> + **/
> +static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
> +{
> + struct net_device *netdev = pci_get_drvdata(pdev);
> + struct igc_adapter *adapter = netdev_priv(netdev);
> + struct igc_hw *hw = &adapter->hw;
> + pci_ers_result_t result;
> +
> + if (pci_enable_device_mem(pdev)) {
> + dev_err(&pdev->dev,
> + "Cannot re-enable PCI device after reset.\n");
*Could not*, so it?s clear, it was tried, and not a policy decision.
> + result = PCI_ERS_RESULT_DISCONNECT;
> + } else {
> + pci_set_master(pdev);
> + pci_restore_state(pdev);
> + pci_save_state(pdev);
> +
> + pci_enable_wake(pdev, PCI_D3hot, 0);
> + pci_enable_wake(pdev, PCI_D3cold, 0);
> +
> + /* In case of PCI error, adapter lose its HW address
lose*s*
> + * so we should re-assign it here.
> + */
> + hw->hw_addr = adapter->io_addr;
> +
> + igc_reset(adapter);
> + wr32(IGC_WUS, ~0);
> + result = PCI_ERS_RESULT_RECOVERED;
> + }
> +
> + return result;
You can get rid of the if-else statement, by returning in the if branch,
and use the else branch as the follow-on(?). Then you can return the
result directly too, and even remove the variable `result`.
> +}
> +
> +/**
> + * igc_io_resume - called when traffic can start flowing again.
start to flow
> + * @pdev: Pointer to PCI device
> + *
> + * This callback is called when the error recovery driver tells us that
> + * its OK to resume normal operation. Implementation resembles the
> + * second-half of the igc_resume routine.
> + */
> +static void igc_io_resume(struct pci_dev *pdev)
> +{
> + struct net_device *netdev = pci_get_drvdata(pdev);
> + struct igc_adapter *adapter = netdev_priv(netdev);
> + u32 err = 0;
> +
> + rtnl_lock();
> + if (netif_running(netdev)) {
> + err = igc_open(netdev);
> + if (err) {
Use `if (igc_open(netdev)) {`, and remove variable `err`?
> + dev_err(&pdev->dev, "igic_open failed after reset\n");
> + return;
> + }
> + }
> +
> + netif_device_attach(netdev);
> +
> + /* let the f/w know that the h/w is now under the control of the
> + * driver.
> + */
> + igc_get_hw_control(adapter);
> + rtnl_unlock();
> +}
> +
> +static const struct pci_error_handlers igc_err_handler = {
> + .error_detected = igc_io_error_detected,
> + .slot_reset = igc_io_slot_reset,
> + .resume = igc_io_resume,
> +};
> +
> #ifdef CONFIG_PM
> static const struct dev_pm_ops igc_pm_ops = {
> SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
> @@ -5093,6 +5197,7 @@ static struct pci_driver igc_driver = {
> .driver.pm = &igc_pm_ops,
> #endif
> .shutdown = igc_shutdown,
> + .err_handler = &igc_err_handler,
> };
>
> /**
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 5174 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://lists.osuosl.org/pipermail/intel-wired-lan/attachments/20200127/b35d9905/attachment.p7s>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Intel-wired-lan] [PATCH v1] igc: Add pcie error handler support
2020-01-27 14:53 ` Paul Menzel
@ 2020-01-28 7:31 ` Neftin, Sasha
0 siblings, 0 replies; 3+ messages in thread
From: Neftin, Sasha @ 2020-01-28 7:31 UTC (permalink / raw)
To: intel-wired-lan
On 1/27/2020 16:53, Paul Menzel wrote:
> Dear Sasha,
>
>
> Thank you for your patch set.
>
>
> On 2020-01-27 12:58, Sasha Neftin wrote:
>> Add pcie error detection, slot reset and resume capability
>
> Tested how?
>
Tested on the platform with error-prone PCIe slot and system has been
recovered. To more testing, I would recommend using peripheral
equipment. Lecroy's PCIe test board could be a good option.
>> Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
>> ---
>> drivers/net/ethernet/intel/igc/igc_main.c | 105 ++++++++++++++++++++++++++++++
>> 1 file changed, 105 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
>> index fa72460e255a..b0656ae2fcb3 100644
>> --- a/drivers/net/ethernet/intel/igc/igc_main.c
>> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
>> @@ -5076,6 +5076,110 @@ static void igc_shutdown(struct pci_dev *pdev)
>> }
>> }
>>
>> +/**
>> + * igc_io_error_detected - called when PCI error is detected
>> + * @pdev: Pointer to PCI device
>> + * @state: The current pci connection state
>
> PCI
>
good. I will change.
>> + *
>> + * This function is called after a PCI bus error affecting
>> + * this device has been detected.
>
> Document the two possible return codes?
>
I will elaborate on the comment.
>> + **/
>> +static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
>> + pci_channel_state_t state)
>> +{
>> + struct net_device *netdev = pci_get_drvdata(pdev);
>> + struct igc_adapter *adapter = netdev_priv(netdev);
>> +
>> + netif_device_detach(netdev);
>> +
>> + if (state == pci_channel_io_perm_failure)
>> + return PCI_ERS_RESULT_DISCONNECT;
>> +
>> + if (netif_running(netdev))
>> + igc_down(adapter);
>> + pci_disable_device(pdev);
>> +
>> + /* Request a slot slot reset. */
>
> One slot?
>
yes, typo.
>> + return PCI_ERS_RESULT_NEED_RESET;
>> +}
>> +
>> +/**
>> + * igc_io_slot_reset - called after the pci bus has been reset.
>
> PCI
>
ok.
>> + * @pdev: Pointer to PCI device
>> + *
>> + * Restart the card from scratch, as if from a cold-boot. Implementation
>> + * resembles the first-half of the igc_resume routine.
>
> Shouldn?t the common code be factored out then?
>
I prefer stay align with our legacy code in other drivers.
>> + **/
>> +static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
>> +{
>> + struct net_device *netdev = pci_get_drvdata(pdev);
>> + struct igc_adapter *adapter = netdev_priv(netdev);
>> + struct igc_hw *hw = &adapter->hw;
>> + pci_ers_result_t result;
>> +
>> + if (pci_enable_device_mem(pdev)) {
>> + dev_err(&pdev->dev,
>> + "Cannot re-enable PCI device after reset.\n");
>
> *Could not*, so it?s clear, it was tried, and not a policy decision.
>
ok.
>> + result = PCI_ERS_RESULT_DISCONNECT;
>> + } else {
>> + pci_set_master(pdev);
>> + pci_restore_state(pdev);
>> + pci_save_state(pdev);
>> +
>> + pci_enable_wake(pdev, PCI_D3hot, 0);
>> + pci_enable_wake(pdev, PCI_D3cold, 0);
>> +
>> + /* In case of PCI error, adapter lose its HW address
>
> lose*s*
>
ok.
>> + * so we should re-assign it here.
>> + */
>> + hw->hw_addr = adapter->io_addr;
>> +
>> + igc_reset(adapter);
>> + wr32(IGC_WUS, ~0);
>> + result = PCI_ERS_RESULT_RECOVERED;
>> + }
>> +
>> + return result;
>
> You can get rid of the if-else statement, by returning in the if branch,
> and use the else branch as the follow-on(?). Then you can return the
> result directly too, and even remove the variable `result`.
>
I prefer stay with a legacy code to align with other ours drivers.
>> +}
>> +
>> +/**
>> + * igc_io_resume - called when traffic can start flowing again.
>
> start to flow
>
ok, thanks.
>> + * @pdev: Pointer to PCI device
>> + *
>> + * This callback is called when the error recovery driver tells us that
>> + * its OK to resume normal operation. Implementation resembles the
>> + * second-half of the igc_resume routine.
>> + */
>> +static void igc_io_resume(struct pci_dev *pdev)
>> +{
>> + struct net_device *netdev = pci_get_drvdata(pdev);
>> + struct igc_adapter *adapter = netdev_priv(netdev);
>> + u32 err = 0;
>> +
>> + rtnl_lock();
>> + if (netif_running(netdev)) {
>> + err = igc_open(netdev);
>> + if (err) {
>
> Use `if (igc_open(netdev)) {`, and remove variable `err`?
>
good idea. thanks
>> + dev_err(&pdev->dev, "igic_open failed after reset\n");
>> + return;
>> + }
>> + }
>> +
>> + netif_device_attach(netdev);
>> +
>> + /* let the f/w know that the h/w is now under the control of the
>> + * driver.
>> + */
>> + igc_get_hw_control(adapter);
>> + rtnl_unlock();
>> +}
>> +
>> +static const struct pci_error_handlers igc_err_handler = {
>> + .error_detected = igc_io_error_detected,
>> + .slot_reset = igc_io_slot_reset,
>> + .resume = igc_io_resume,
>> +};
>> +
>> #ifdef CONFIG_PM
>> static const struct dev_pm_ops igc_pm_ops = {
>> SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
>> @@ -5093,6 +5197,7 @@ static struct pci_driver igc_driver = {
>> .driver.pm = &igc_pm_ops,
>> #endif
>> .shutdown = igc_shutdown,
>> + .err_handler = &igc_err_handler,
>> };
>>
>> /**
>>
>
Paul, thanks for your feedback - I will submit v2 and address your comments.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-01-28 7:31 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-27 11:58 [Intel-wired-lan] [PATCH v1] igc: Add pcie error handler support Sasha Neftin
2020-01-27 14:53 ` Paul Menzel
2020-01-28 7:31 ` Neftin, Sasha
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.