platform/x86: ISST: Account for increased timeout in some cases
diff mbox series

Message ID 20210330220840.3113959-1-srinivas.pandruvada@linux.intel.com
State Accepted
Commit 5c782817a981981917ec3c647cf521022ee07143
Headers show
Series
  • platform/x86: ISST: Account for increased timeout in some cases
Related show

Commit Message

Srinivas Pandruvada March 30, 2021, 10:08 p.m. UTC
In some cases when firmware is busy or updating, some mailbox commands
still timeout on some newer CPUs. To fix this issue, change how we
process timeout.

With this change, replaced timeout from using simple count with real
timeout in micro-seconds using ktime. When the command response takes
more than average processing time, yield to other tasks. The worst case
timeout is extended upto 1 milli-second.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 .../intel_speed_select_if/isst_if_mbox_pci.c  | 33 +++++++++++++------
 1 file changed, 23 insertions(+), 10 deletions(-)

Comments

Hans de Goede April 7, 2021, 12:38 p.m. UTC | #1
Hi,

On 3/31/21 12:08 AM, Srinivas Pandruvada wrote:
> In some cases when firmware is busy or updating, some mailbox commands
> still timeout on some newer CPUs. To fix this issue, change how we
> process timeout.
> 
> With this change, replaced timeout from using simple count with real
> timeout in micro-seconds using ktime. When the command response takes
> more than average processing time, yield to other tasks. The worst case
> timeout is extended upto 1 milli-second.
> 
> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

Thank you for your patch, I've applied this patch to my review-hans 
branch:
https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans

Note it will show up in my review-hans branch once I've pushed my
local branch there, which might take a while.

Once I've run some tests on this branch the patches there will be
added to the platform-drivers-x86/for-next branch and eventually
will be included in the pdx86 pull-request to Linus for the next
merge-window.

Regards,

Hans


> ---
>  .../intel_speed_select_if/isst_if_mbox_pci.c  | 33 +++++++++++++------
>  1 file changed, 23 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
> index a2a2d923e60c..df1fc6c719f3 100644
> --- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
> +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
> @@ -21,12 +21,16 @@
>  #define PUNIT_MAILBOX_BUSY_BIT		31
>  
>  /*
> - * The average time to complete some commands is about 40us. The current
> - * count is enough to satisfy 40us. But when the firmware is very busy, this
> - * causes timeout occasionally.  So increase to deal with some worst case
> - * scenarios. Most of the command still complete in few us.
> + * The average time to complete mailbox commands is less than 40us. Most of
> + * the commands complete in few micro seconds. But the same firmware handles
> + * requests from all power management features.
> + * We can create a scenario where we flood the firmware with requests then
> + * the mailbox response can be delayed for 100s of micro seconds. So define
> + * two timeouts. One for average case and one for long.
> + * If the firmware is taking more than average, just call cond_resched().
>   */
> -#define OS_MAILBOX_RETRY_COUNT		100
> +#define OS_MAILBOX_TIMEOUT_AVG_US	40
> +#define OS_MAILBOX_TIMEOUT_MAX_US	1000
>  
>  struct isst_if_device {
>  	struct mutex mutex;
> @@ -35,11 +39,13 @@ struct isst_if_device {
>  static int isst_if_mbox_cmd(struct pci_dev *pdev,
>  			    struct isst_if_mbox_cmd *mbox_cmd)
>  {
> -	u32 retries, data;
> +	s64 tm_delta = 0;
> +	ktime_t tm;
> +	u32 data;
>  	int ret;
>  
>  	/* Poll for rb bit == 0 */
> -	retries = OS_MAILBOX_RETRY_COUNT;
> +	tm = ktime_get();
>  	do {
>  		ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
>  					    &data);
> @@ -48,11 +54,14 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
>  
>  		if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
>  			ret = -EBUSY;
> +			tm_delta = ktime_us_delta(ktime_get(), tm);
> +			if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
> +				cond_resched();
>  			continue;
>  		}
>  		ret = 0;
>  		break;
> -	} while (--retries);
> +	} while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);
>  
>  	if (ret)
>  		return ret;
> @@ -74,7 +83,8 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
>  		return ret;
>  
>  	/* Poll for rb bit == 0 */
> -	retries = OS_MAILBOX_RETRY_COUNT;
> +	tm_delta = 0;
> +	tm = ktime_get();
>  	do {
>  		ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
>  					    &data);
> @@ -83,6 +93,9 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
>  
>  		if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
>  			ret = -EBUSY;
> +			tm_delta = ktime_us_delta(ktime_get(), tm);
> +			if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
> +				cond_resched();
>  			continue;
>  		}
>  
> @@ -96,7 +109,7 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
>  		mbox_cmd->resp_data = data;
>  		ret = 0;
>  		break;
> -	} while (--retries);
> +	} while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);
>  
>  	return ret;
>  }
>

Patch
diff mbox series

diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
index a2a2d923e60c..df1fc6c719f3 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
+++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
@@ -21,12 +21,16 @@ 
 #define PUNIT_MAILBOX_BUSY_BIT		31
 
 /*
- * The average time to complete some commands is about 40us. The current
- * count is enough to satisfy 40us. But when the firmware is very busy, this
- * causes timeout occasionally.  So increase to deal with some worst case
- * scenarios. Most of the command still complete in few us.
+ * The average time to complete mailbox commands is less than 40us. Most of
+ * the commands complete in few micro seconds. But the same firmware handles
+ * requests from all power management features.
+ * We can create a scenario where we flood the firmware with requests then
+ * the mailbox response can be delayed for 100s of micro seconds. So define
+ * two timeouts. One for average case and one for long.
+ * If the firmware is taking more than average, just call cond_resched().
  */
-#define OS_MAILBOX_RETRY_COUNT		100
+#define OS_MAILBOX_TIMEOUT_AVG_US	40
+#define OS_MAILBOX_TIMEOUT_MAX_US	1000
 
 struct isst_if_device {
 	struct mutex mutex;
@@ -35,11 +39,13 @@  struct isst_if_device {
 static int isst_if_mbox_cmd(struct pci_dev *pdev,
 			    struct isst_if_mbox_cmd *mbox_cmd)
 {
-	u32 retries, data;
+	s64 tm_delta = 0;
+	ktime_t tm;
+	u32 data;
 	int ret;
 
 	/* Poll for rb bit == 0 */
-	retries = OS_MAILBOX_RETRY_COUNT;
+	tm = ktime_get();
 	do {
 		ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
 					    &data);
@@ -48,11 +54,14 @@  static int isst_if_mbox_cmd(struct pci_dev *pdev,
 
 		if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
 			ret = -EBUSY;
+			tm_delta = ktime_us_delta(ktime_get(), tm);
+			if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
+				cond_resched();
 			continue;
 		}
 		ret = 0;
 		break;
-	} while (--retries);
+	} while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);
 
 	if (ret)
 		return ret;
@@ -74,7 +83,8 @@  static int isst_if_mbox_cmd(struct pci_dev *pdev,
 		return ret;
 
 	/* Poll for rb bit == 0 */
-	retries = OS_MAILBOX_RETRY_COUNT;
+	tm_delta = 0;
+	tm = ktime_get();
 	do {
 		ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
 					    &data);
@@ -83,6 +93,9 @@  static int isst_if_mbox_cmd(struct pci_dev *pdev,
 
 		if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
 			ret = -EBUSY;
+			tm_delta = ktime_us_delta(ktime_get(), tm);
+			if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
+				cond_resched();
 			continue;
 		}
 
@@ -96,7 +109,7 @@  static int isst_if_mbox_cmd(struct pci_dev *pdev,
 		mbox_cmd->resp_data = data;
 		ret = 0;
 		break;
-	} while (--retries);
+	} while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);
 
 	return ret;
 }