linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Sam Bobroff <sbobroff@linux.ibm.com>
To: linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 14/14] powerpc/eeh: Cleanup control flow in eeh_handle_normal_event()
Date: Wed, 12 Sep 2018 11:23:33 +1000	[thread overview]
Message-ID: <e1cb21f2fa4c9c6fc467412f8cb281c8bf280770.1536715396.git.sbobroff@linux.ibm.com> (raw)
In-Reply-To: <cover.1536715396.git.sbobroff@linux.ibm.com>

Rather than mixing "if (state)" blocks and gotos, convert entirely to
"if (state)" blocks to make the state machine behaviour clearer.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 arch/powerpc/kernel/eeh_driver.c | 196 +++++++++++++++----------------
 1 file changed, 94 insertions(+), 102 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index e7f757cd839b..9446248eb6b8 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -808,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
 		       pe->phb->global_number, pe->addr,
 		       pe->freeze_count);
-		goto hard_fail;
+		result = PCI_ERS_RESULT_DISCONNECT;
 	}
-	pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
-		pe->freeze_count, eeh_max_freezes);
 
 	/* Walk the various device drivers attached to this slot through
 	 * a reset sequence, giving each an opportunity to do what it needs
@@ -823,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 	 * the error. Override the result if necessary to have partially
 	 * hotplug for this case.
 	 */
-	pr_info("EEH: Notify device drivers to shutdown\n");
-	eeh_set_channel_state(pe, pci_channel_io_frozen);
-	eeh_set_irq_state(pe, false);
-	eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
-		      &result);
-	if ((pe->type & EEH_PE_PHB) &&
-	    result != PCI_ERS_RESULT_NONE &&
-	    result != PCI_ERS_RESULT_NEED_RESET)
-		result = PCI_ERS_RESULT_NEED_RESET;
+	if (result != PCI_ERS_RESULT_DISCONNECT) {
+		pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+			pe->freeze_count, eeh_max_freezes);
+		pr_info("EEH: Notify device drivers to shutdown\n");
+		eeh_set_channel_state(pe, pci_channel_io_frozen);
+		eeh_set_irq_state(pe, false);
+		eeh_pe_report("error_detected(IO frozen)", pe,
+			      eeh_report_error, &result);
+		if ((pe->type & EEH_PE_PHB) &&
+		    result != PCI_ERS_RESULT_NONE &&
+		    result != PCI_ERS_RESULT_NEED_RESET)
+			result = PCI_ERS_RESULT_NEED_RESET;
+	}
 
 	/* Get the current PCI slot state. This can take a long time,
 	 * sometimes over 300 seconds for certain systems.
 	 */
-	rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
-	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
-		pr_warn("EEH: Permanent failure\n");
-		goto hard_fail;
+	if (result != PCI_ERS_RESULT_DISCONNECT) {
+		rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+		if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+			pr_warn("EEH: Permanent failure\n");
+			result = PCI_ERS_RESULT_DISCONNECT;
+		}
 	}
 
 	/* Since rtas may enable MMIO when posting the error log,
 	 * don't post the error log until after all dev drivers
 	 * have been informed.
 	 */
-	pr_info("EEH: Collect temporary log\n");
-	eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+	if (result != PCI_ERS_RESULT_DISCONNECT) {
+		pr_info("EEH: Collect temporary log\n");
+		eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+	}
 
 	/* If all device drivers were EEH-unaware, then shut
 	 * down all of the device drivers, and hope they
@@ -859,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		if (rc) {
 			pr_warn("%s: Unable to reset, err=%d\n",
 				__func__, rc);
-			goto hard_fail;
+			result = PCI_ERS_RESULT_DISCONNECT;
 		}
 	}
 
@@ -868,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_info("EEH: Enable I/O for affected devices\n");
 		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 
-		if (rc < 0)
-			goto hard_fail;
-		if (rc) {
+		if (rc < 0) {
+			result = PCI_ERS_RESULT_DISCONNECT;
+		} else if (rc) {
 			result = PCI_ERS_RESULT_NEED_RESET;
 		} else {
 			pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -884,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_info("EEH: Enabled DMA for affected devices\n");
 		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
 
-		if (rc < 0)
-			goto hard_fail;
-		if (rc) {
+		if (rc < 0) {
+			result = PCI_ERS_RESULT_DISCONNECT;
+		} else if (rc) {
 			result = PCI_ERS_RESULT_NEED_RESET;
 		} else {
 			/*
@@ -899,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		}
 	}
 
-	/* If any device has a hard failure, then shut off everything. */
-	if (result == PCI_ERS_RESULT_DISCONNECT) {
-		pr_warn("EEH: Device driver gave up\n");
-		goto hard_fail;
-	}
-
 	/* If any device called out for a reset, then reset the slot */
 	if (result == PCI_ERS_RESULT_NEED_RESET) {
 		pr_info("EEH: Reset without hotplug activity\n");
@@ -912,89 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 		if (rc) {
 			pr_warn("%s: Cannot reset, err=%d\n",
 				__func__, rc);
-			goto hard_fail;
+			result = PCI_ERS_RESULT_DISCONNECT;
+		} else {
+			result = PCI_ERS_RESULT_NONE;
+			eeh_set_channel_state(pe, pci_channel_io_normal);
+			eeh_set_irq_state(pe, true);
+			eeh_pe_report("slot_reset", pe, eeh_report_reset,
+				      &result);
 		}
-
-		pr_info("EEH: Notify device drivers "
-			"the completion of reset\n");
-		result = PCI_ERS_RESULT_NONE;
-		eeh_set_channel_state(pe, pci_channel_io_normal);
-		eeh_set_irq_state(pe, true);
-		eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
 	}
 
-	/* All devices should claim they have recovered by now. */
-	if ((result != PCI_ERS_RESULT_RECOVERED) &&
-	    (result != PCI_ERS_RESULT_NONE)) {
-		pr_warn("EEH: Not recovered\n");
-		goto hard_fail;
-	}
-
-	/*
-	 * For those hot removed VFs, we should add back them after PF get
-	 * recovered properly.
-	 */
-	list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
-				 rmv_entry) {
-		eeh_add_virt_device(edev);
-		list_del(&edev->rmv_entry);
-	}
-
-	/* Tell all device drivers that they can resume operations */
-	pr_info("EEH: Notify device driver to resume\n");
-	eeh_set_channel_state(pe, pci_channel_io_normal);
-	eeh_set_irq_state(pe, true);
-	eeh_pe_report("resume", pe, eeh_report_resume, NULL);
-	eeh_for_each_pe(pe, tmp_pe) {
-		eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
-			edev->mode &= ~EEH_DEV_NO_HANDLER;
-			edev->in_error = false;
+	if ((result == PCI_ERS_RESULT_RECOVERED) ||
+	    (result == PCI_ERS_RESULT_NONE)) {
+		/*
+		 * For those hot removed VFs, we should add back them after PF
+		 * get recovered properly.
+		 */
+		list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+					 rmv_entry) {
+			eeh_add_virt_device(edev);
+			list_del(&edev->rmv_entry);
 		}
-	}
 
-	pr_info("EEH: Recovery successful.\n");
-	goto final;
+		/* Tell all device drivers that they can resume operations */
+		pr_info("EEH: Notify device driver to resume\n");
+		eeh_set_channel_state(pe, pci_channel_io_normal);
+		eeh_set_irq_state(pe, true);
+		eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+		eeh_for_each_pe(pe, tmp_pe) {
+			eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+				edev->mode &= ~EEH_DEV_NO_HANDLER;
+				edev->in_error = false;
+			}
+		}
 
-hard_fail:
-	/*
-	 * About 90% of all real-life EEH failures in the field
-	 * are due to poorly seated PCI cards. Only 10% or so are
-	 * due to actual, failed cards.
-	 */
-	pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
-	       "Please try reseating or replacing it\n",
-		pe->phb->global_number, pe->addr);
+		pr_info("EEH: Recovery successful.\n");
+	} else  {
+		/*
+		 * About 90% of all real-life EEH failures in the field
+		 * are due to poorly seated PCI cards. Only 10% or so are
+		 * due to actual, failed cards.
+		 */
+		pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+		       "Please try reseating or replacing it\n",
+			pe->phb->global_number, pe->addr);
 
-	eeh_slot_error_detail(pe, EEH_LOG_PERM);
+		eeh_slot_error_detail(pe, EEH_LOG_PERM);
 
-	/* Notify all devices that they're about to go down. */
-	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
-	eeh_set_irq_state(pe, false);
-	eeh_pe_report("error_detected(permanent failure)", pe,
-		      eeh_report_failure, NULL);
+		/* Notify all devices that they're about to go down. */
+		eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+		eeh_set_irq_state(pe, false);
+		eeh_pe_report("error_detected(permanent failure)", pe,
+			      eeh_report_failure, NULL);
 
-	/* Mark the PE to be removed permanently */
-	eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+		/* Mark the PE to be removed permanently */
+		eeh_pe_state_mark(pe, EEH_PE_REMOVED);
 
-	/*
-	 * Shut down the device drivers for good. We mark
-	 * all removed devices correctly to avoid access
-	 * the their PCI config any more.
-	 */
-	if (pe->type & EEH_PE_VF) {
-		eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-	} else {
-		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+		/*
+		 * Shut down the device drivers for good. We mark
+		 * all removed devices correctly to avoid access
+		 * the their PCI config any more.
+		 */
+		if (pe->type & EEH_PE_VF) {
+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+		} else {
+			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
-		pci_lock_rescan_remove();
-		pci_hp_remove_devices(bus);
-		pci_unlock_rescan_remove();
-		/* The passed PE should no longer be used */
-		return;
+			pci_lock_rescan_remove();
+			pci_hp_remove_devices(bus);
+			pci_unlock_rescan_remove();
+			/* The passed PE should no longer be used */
+			return;
+		}
 	}
-final:
 	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 }
 
-- 
2.19.0.2.gcad72f5712

      parent reply	other threads:[~2018-09-12  1:23 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-12  1:23 [PATCH 00/14] EEH refactoring 3 Sam Bobroff
2018-09-12  1:23 ` [PATCH 01/14] powerpc/eeh: Fix possible null deref in eeh_dump_dev_log() Sam Bobroff
2018-10-15  4:00   ` [01/14] " Michael Ellerman
2018-09-12  1:23 ` [PATCH 02/14] powerpc/eeh: Fix null deref for devices removed during EEH Sam Bobroff
2018-09-12  1:23 ` [PATCH 03/14] powerpc/eeh: Fix use of EEH_PE_KEEP on wrong field Sam Bobroff
2018-09-12  1:23 ` [PATCH 04/14] powerpc/eeh: Cleanup EEH_POSTPONED_PROBE Sam Bobroff
2018-09-12  1:23 ` [PATCH 05/14] powerpc/eeh: Cleanup unused field in eeh_dev Sam Bobroff
2018-09-12  1:23 ` [PATCH 06/14] powerpc/eeh: Cleanup eeh_add_virt_device() Sam Bobroff
2018-09-12  1:23 ` [PATCH 07/14] powerpc/eeh: Cleanup list_head field names Sam Bobroff
2018-09-12  1:23 ` [PATCH 08/14] powerpc/eeh: Cleanup field names in eeh_rmv_data Sam Bobroff
2018-09-12  1:23 ` [PATCH 09/14] powerpc/eeh: Cleanup logic in eeh_rmv_from_parent_pe() Sam Bobroff
2018-09-12  1:23 ` [PATCH 10/14] powerpc/eeh: Cleanup eeh_enabled() Sam Bobroff
2018-09-12  1:23 ` [PATCH 11/14] powerpc/eeh: Cleanup unnecessary eeh_pe_state_mark_with_cfg() Sam Bobroff
2018-09-12  1:23 ` [PATCH 12/14] powerpc/eeh: Cleanup eeh_pe_state_mark() Sam Bobroff
2018-09-12  1:23 ` [PATCH 13/14] powerpc/eeh: Cleanup eeh_ops.wait_state() Sam Bobroff
2018-09-12  1:23 ` Sam Bobroff [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e1cb21f2fa4c9c6fc467412f8cb281c8bf280770.1536715396.git.sbobroff@linux.ibm.com \
    --to=sbobroff@linux.ibm.com \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).