All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jubin John <jubin.john@intel.com>
To: gregkh@linuxfoundation.org, devel@driverdev.osuosl.org
Cc: linux-rdma@vger.kernel.org, dledford@redhat.com
Subject: [PATCH 01/18] staging/rdma/hfi1: Support alternate firmware names
Date: Fri, 13 Nov 2015 19:36:59 -0800	[thread overview]
Message-ID: <1447472236-622-1-git-send-email-jubin.john@intel.com> (raw)

From: Dean Luick <dean.luick@intel.com>

Add support for an automatic fallback for firmware names.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
---
 drivers/staging/rdma/hfi1/firmware.c |  189 ++++++++++++++++++++++++++++------
 1 files changed, 157 insertions(+), 32 deletions(-)

diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c
index b4bdcf3..e80eecf 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/staging/rdma/hfi1/firmware.c
@@ -68,6 +68,10 @@
 #define DEFAULT_FW_SBUS_NAME "hfi1_sbus.fw"
 #define DEFAULT_FW_PCIE_NAME "hfi1_pcie.fw"
 #define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat"
+#define ALT_FW_8051_NAME_ASIC "hfi1_dc8051_d.fw"
+#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
+#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
+#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
 
 static uint fw_8051_load = 1;
 static uint fw_fabric_serdes_load = 1;
@@ -158,7 +162,8 @@ struct firmware_details {
 static DEFINE_MUTEX(fw_mutex);
 enum fw_state {
 	FW_EMPTY,
-	FW_ACQUIRED,
+	FW_TRY,
+	FW_FINAL,
 	FW_ERR
 };
 static enum fw_state fw_state = FW_EMPTY;
@@ -428,8 +433,8 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 
 	ret = request_firmware(&fdet->fw, name, &dd->pcidev->dev);
 	if (ret) {
-		dd_dev_err(dd, "cannot load firmware \"%s\", err %d\n",
-			name, ret);
+		dd_dev_err(dd, "cannot find firmware \"%s\", err %d\n",
+			   name, ret);
 		return ret;
 	}
 
@@ -539,28 +544,53 @@ done:
 static void dispose_one_firmware(struct firmware_details *fdet)
 {
 	release_firmware(fdet->fw);
-	fdet->fw = NULL;
+	/* erase all previous information */
+	memset(fdet, 0, sizeof(*fdet));
 }
 
 /*
- * Called by all HFIs when loading their firmware - i.e. device probe time.
- * The first one will do the actual firmware load.  Use a mutex to resolve
- * any possible race condition.
+ * Obtain the 4 firmwares from the OS.  All must be obtained at once or not
+ * at all.  If called with the firmware state in FW_TRY, use alternate names.
+ * On exit, this routine will have set the firmware state to one of FW_TRY,
+ * FW_FINAL, or FW_ERR.
  *
- * The call to this routine cannot be moved to driver load because the kernel
- * call request_firmware() requires a device which is only available after
- * the first device probe.
+ * Must be holding fw_mutex.
  */
-static int obtain_firmware(struct hfi1_devdata *dd)
+static void __obtain_firmware(struct hfi1_devdata *dd)
 {
 	int err = 0;
 
-	mutex_lock(&fw_mutex);
-	if (fw_state == FW_ACQUIRED) {
-		goto done;	/* already acquired */
-	} else if (fw_state == FW_ERR) {
-		err = fw_err;
-		goto done;	/* already tried and failed */
+	if (fw_state == FW_FINAL)	/* nothing more to obtain */
+		return;
+	if (fw_state == FW_ERR)		/* already in error */
+		return;
+
+	/* fw_state is FW_EMPTY or FW_TRY */
+retry:
+	if (fw_state == FW_TRY) {
+		/*
+		 * We tried the original and it failed.  Move to the
+		 * alternate.
+		 */
+		dd_dev_info(dd, "using alternate firmware names\n");
+		/*
+		 * Let others run.  Some systems, when missing firmware, does
+		 * something that holds for 30 seconds.  If we do that twice
+		 * in a row it triggers task blocked warning.
+		 */
+		cond_resched();
+		if (fw_8051_load)
+			dispose_one_firmware(&fw_8051);
+		if (fw_fabric_serdes_load)
+			dispose_one_firmware(&fw_fabric);
+		if (fw_sbus_load)
+			dispose_one_firmware(&fw_sbus);
+		if (fw_pcie_serdes_load)
+			dispose_one_firmware(&fw_pcie);
+		fw_8051_name = ALT_FW_8051_NAME_ASIC;
+		fw_fabric_serdes_name = ALT_FW_FABRIC_NAME;
+		fw_sbus_name = ALT_FW_SBUS_NAME;
+		fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
 	}
 
 	if (fw_8051_load) {
@@ -588,27 +618,82 @@ static int obtain_firmware(struct hfi1_devdata *dd)
 			goto done;
 	}
 
+done:
+	if (err) {
+		/* oops, had problems obtaining a firmware */
+		if (fw_state == FW_EMPTY) {
+			/* retry with alternate */
+			fw_state = FW_TRY;
+			goto retry;
+		}
+		fw_state = FW_ERR;
+		fw_err = -ENOENT;
+	} else {
+		/* success */
+		if (fw_state == FW_EMPTY)
+			fw_state = FW_TRY;	/* may retry later */
+		else
+			fw_state = FW_FINAL;	/* cannot try again */
+	}
+}
+
+/*
+ * Called by all HFIs when loading their firmware - i.e. device probe time.
+ * The first one will do the actual firmware load.  Use a mutex to resolve
+ * any possible race condition.
+ *
+ * The call to this routine cannot be moved to driver load because the kernel
+ * call request_firmware() requires a device which is only available after
+ * the first device probe.
+ */
+static int obtain_firmware(struct hfi1_devdata *dd)
+{
+	unsigned long timeout;
+	int err = 0;
+
+	mutex_lock(&fw_mutex);
+
+	/* 40s delay due to long delay on missing firmware on some systems */
+	timeout = jiffies + msecs_to_jiffies(40000);
+	while (fw_state == FW_TRY) {
+		/*
+		 * Another device is trying the firmware.  Wait until it
+		 * decides what works (or not).
+		 */
+		if (time_after(jiffies, timeout)) {
+			/* waited too long */
+			dd_dev_err(dd, "Timeout waiting for firmware try");
+			fw_state = FW_ERR;
+			fw_err = -ETIMEDOUT;
+			break;
+		}
+		mutex_unlock(&fw_mutex);
+		msleep(20);	/* arbitrary delay */
+		mutex_lock(&fw_mutex);
+	}
+	/* not in FW_TRY state */
+
+	if (fw_state == FW_FINAL)
+		goto done;	/* already acquired */
+	else if (fw_state == FW_ERR)
+		goto done;	/* already tried and failed */
+	/* fw_state is FW_EMPTY */
+
+	/* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */
+	__obtain_firmware(dd);
+
 	if (platform_config_load) {
 		platform_config = NULL;
 		err = request_firmware(&platform_config, platform_config_name,
 						&dd->pcidev->dev);
-		if (err) {
-			err = 0;
+		if (err)
 			platform_config = NULL;
-		}
 	}
 
-	/* success */
-	fw_state = FW_ACQUIRED;
-
 done:
-	if (err) {
-		fw_err = err;
-		fw_state = FW_ERR;
-	}
 	mutex_unlock(&fw_mutex);
 
-	return err;
+	return fw_err;
 }
 
 /*
@@ -638,6 +723,38 @@ void dispose_firmware(void)
 }
 
 /*
+ * Called with the result of a firmware download.
+ *
+ * Return 1 to retry loading the firmware, 0 to stop.
+ */
+static int retry_firmware(struct hfi1_devdata *dd, int load_result)
+{
+	int retry;
+
+	mutex_lock(&fw_mutex);
+
+	if (load_result == 0) {
+		/*
+		 * The load succeeded, so expect all others to do the same.
+		 * Do not retry again.
+		 */
+		if (fw_state == FW_TRY)
+			fw_state = FW_FINAL;
+		retry = 0;	/* do NOT retry */
+	} else if (fw_state == FW_TRY) {
+		/* load failed, obtain alternate firmware */
+		__obtain_firmware(dd);
+		retry = (fw_state == FW_FINAL);
+	} else {
+		/* else in FW_FINAL or FW_ERR, no retry in either case */
+		retry = 0;
+	}
+
+	mutex_unlock(&fw_mutex);
+	return retry;
+}
+
+/*
  * Write a block of data to a given array CSR.  All calls will be in
  * multiples of 8 bytes.
  */
@@ -1248,7 +1365,9 @@ int load_firmware(struct hfi1_devdata *dd)
 				fabric_serdes_addrs[dd->hfi1_id],
 				NUM_FABRIC_SERDES);
 		turn_off_spicos(dd, SPICO_FABRIC);
-		ret = load_fabric_serdes_firmware(dd, &fw_fabric);
+		do {
+			ret = load_fabric_serdes_firmware(dd, &fw_fabric);
+		} while (retry_firmware(dd, ret));
 
 		clear_sbus_fast_mode(dd);
 		release_hw_mutex(dd);
@@ -1257,7 +1376,9 @@ int load_firmware(struct hfi1_devdata *dd)
 	}
 
 	if (fw_8051_load) {
-		ret = load_8051_firmware(dd, &fw_8051);
+		do {
+			ret = load_8051_firmware(dd, &fw_8051);
+		} while (retry_firmware(dd, ret));
 		if (ret)
 			return ret;
 	}
@@ -1570,7 +1691,9 @@ int load_pcie_firmware(struct hfi1_devdata *dd)
 
 	if (fw_sbus_load && (dd->flags & HFI1_DO_INIT_ASIC)) {
 		turn_off_spicos(dd, SPICO_SBUS);
-		ret = load_sbus_firmware(dd, &fw_sbus);
+		do {
+			ret = load_sbus_firmware(dd, &fw_sbus);
+		} while (retry_firmware(dd, ret));
 		if (ret)
 			goto done;
 	}
@@ -1581,7 +1704,9 @@ int load_pcie_firmware(struct hfi1_devdata *dd)
 					pcie_serdes_broadcast[dd->hfi1_id],
 					pcie_serdes_addrs[dd->hfi1_id],
 					NUM_PCIE_SERDES);
-		ret = load_pcie_serdes_firmware(dd, &fw_pcie);
+		do {
+			ret = load_pcie_serdes_firmware(dd, &fw_pcie);
+		} while (retry_firmware(dd, ret));
 		if (ret)
 			goto done;
 	}
-- 
1.7.0.7

             reply	other threads:[~2015-11-14  3:36 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-14  3:36 Jubin John [this message]
2015-11-14  3:37 ` [PATCH 02/18] staging/rdma/hfi1: Decode CNP opcode Jubin John
     [not found]   ` <1447472236-622-2-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:09     ` Greg KH
2015-11-20 14:37       ` Marciniszyn, Mike
2015-11-14  3:37 ` [PATCH 03/18] staging/rdma/hfi1: Add aeth name syndrome decode Jubin John
     [not found]   ` <1447472236-622-3-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:09     ` Greg KH
     [not found]       ` <20151120010940.GC11695-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>
2015-11-20 21:43         ` Marciniszyn, Mike
2015-11-14  3:37 ` [PATCH 04/18] staging/rdma/hfi1: Fix qp.h comments Jubin John
2015-11-14  3:37 ` [PATCH 05/18] staging/rdma/hfi1: Clean up comments Jubin John
     [not found]   ` <1447472236-622-5-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:10     ` Greg KH
     [not found]       ` <20151120011000.GD11695-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>
2015-11-20 14:46         ` Marciniszyn, Mike
2015-11-14  3:37 ` [PATCH 06/18] staging/rdma/hfi: Add one-time LCB reset Jubin John
2015-11-20  1:10   ` Greg KH
2015-11-14  3:37 ` [PATCH 07/18] staging/rdma/hfi1: Extend quiet timeout Jubin John
     [not found]   ` <1447472236-622-7-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:10     ` Greg KH
     [not found]       ` <20151120011052.GG11695-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>
2015-11-20 15:21         ` Marciniszyn, Mike
     [not found]           ` <32E1700B9017364D9B60AED9960492BC259BD7AF-RjuIdWtd+YbTXloPLtfHfbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2015-11-20 22:04             ` Dan Carpenter
2015-11-14  3:37 ` [PATCH 08/18] staging/rdma/hfi1: Add a credit push on diagpkt allocate fail Jubin John
2015-11-14  3:37 ` [PATCH 09/18] staging/rdma/hfi1: Correctly limit VLs against SDMA engines Jubin John
     [not found]   ` <1447472236-622-9-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:10     ` Greg KH
     [not found]       ` <20151120011017.GE11695-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>
2015-11-20 14:59         ` Marciniszyn, Mike
2015-11-14  3:37 ` [PATCH 10/18] staging/rdma/hfi1: Adds software counters for bitfields within various error status fields Jubin John
2015-11-14  3:37 ` [PATCH 11/18] staging/rdma/hfi1: Destroy workqueues if hfi1_register_ib_device() call returns error Jubin John
2015-11-14  3:37 ` [PATCH 12/18] staging/rdma/hfi1: Unexpected link up pkey values are not an error Jubin John
2015-11-14  3:37 ` [PATCH 13/18] staging/rdma/hfi1: remove SPC freeze error messages Jubin John
2015-11-14  3:37 ` [PATCH 14/18] staging/rdma/hfi1: unknown frame messages are not errors Jubin John
2015-11-14  3:37 ` [PATCH 15/18] staging/rdma/hfi1: Consider VL15 MTU also when calculating the maximum VL MTU Jubin John
2015-11-14  3:37 ` [PATCH 16/18] staging/rdma/hfi1: Fix Xmit Wait calculation Jubin John
2015-11-14  3:37 ` [PATCH 17/18] staging/rdma/hfi1: Adding counter resolutions for DataPortCounters Jubin John
2015-11-14  3:37 ` [PATCH 18/18] staging/rdma/hfi1: Workaround CONFIG_SDMA_VERBOSITY timing issue Jubin John
     [not found]   ` <1447472236-622-18-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:11     ` Greg KH
2015-11-20 16:01       ` Marciniszyn, Mike
     [not found]         ` <32E1700B9017364D9B60AED9960492BC259BD8A2-RjuIdWtd+YbTXloPLtfHfbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2015-11-20 17:57           ` Greg KH
     [not found] ` <1447472236-622-1-git-send-email-jubin.john-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-11-20  1:08   ` [PATCH 01/18] staging/rdma/hfi1: Support alternate firmware names Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1447472236-622-1-git-send-email-jubin.john@intel.com \
    --to=jubin.john@intel.com \
    --cc=devel@driverdev.osuosl.org \
    --cc=dledford@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.