All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] iommu: arm-smmu: Set SMTNMB_TLBEN in ACR to enable caching of bypass entries
@ 2016-11-04  9:55 ` Nipun Gupta
  0 siblings, 0 replies; 4+ messages in thread
From: Nipun Gupta @ 2016-11-04  9:55 UTC (permalink / raw)
  To: robin.murphy-5wv7dgnIgG8, will.deacon-5wv7dgnIgG8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: stuart.yoder-3arQi8VN3Tc

The SMTNMB_TLBEN in the Auxiliary Configuration Register (ACR) provides an
option to enable the updation of TLB in case of bypass transactions due to
no stream match in the stream match table. This reduces the latencies of
the subsequent transactions with the same stream-id which bypasses the SMMU.
This provides a significant performance benefit for certain networking
workloads.

With this change substantial performance improvement of ~9% is observed with
DPDK l3fwd application (http://dpdk.org/doc/guides/sample_app_ug/l3_forward.html)
on NXP's LS2088a platform.

Signed-off-by: Nipun Gupta <nipun.gupta-3arQi8VN3Tc@public.gmane.org>
---
Changes for v2:
    - Incorporated Robin's comments on v1 related to
	Setting SMTNMB_TLBEN in ACR only for MMU-500 as ACR is implementation dependent
	Code comments and Naming convention
Changes for v3:
    - Added correct patch version

 drivers/iommu/arm-smmu.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ce2a9d4..05901be 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -247,6 +247,7 @@ enum arm_smmu_s2cr_privcfg {
 #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
 
 #define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN	(1 << 8)
 
 #define CB_PAR_F			(1 << 0)
 
@@ -1569,16 +1570,22 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	for (i = 0; i < smmu->num_mapping_groups; ++i)
 		arm_smmu_write_sme(smmu, i);
 
-	/*
-	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
-	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
-	 * bit is only present in MMU-500r2 onwards.
-	 */
-	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
-	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
-	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
+	if (smmu->model == ARM_MMU500) {
+		/*
+		 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+		 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+		 * bit is only present in MMU-500r2 onwards.
+		 */
+		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+		major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
 		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
-		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+		if (major >= 2)
+			reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+		/*
+		 * Allow unmatched Stream IDs to allocate bypass
+		 * TLB entries for reduced latency.
+		 */
+		reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
 		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
 	}
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3] iommu: arm-smmu: Set SMTNMB_TLBEN in ACR to enable caching of bypass entries
@ 2016-11-04  9:55 ` Nipun Gupta
  0 siblings, 0 replies; 4+ messages in thread
From: Nipun Gupta @ 2016-11-04  9:55 UTC (permalink / raw)
  To: linux-arm-kernel

The SMTNMB_TLBEN in the Auxiliary Configuration Register (ACR) provides an
option to enable the updation of TLB in case of bypass transactions due to
no stream match in the stream match table. This reduces the latencies of
the subsequent transactions with the same stream-id which bypasses the SMMU.
This provides a significant performance benefit for certain networking
workloads.

With this change substantial performance improvement of ~9% is observed with
DPDK l3fwd application (http://dpdk.org/doc/guides/sample_app_ug/l3_forward.html)
on NXP's LS2088a platform.

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
---
Changes for v2:
    - Incorporated Robin's comments on v1 related to
	Setting SMTNMB_TLBEN in ACR only for MMU-500 as ACR is implementation dependent
	Code comments and Naming convention
Changes for v3:
    - Added correct patch version

 drivers/iommu/arm-smmu.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ce2a9d4..05901be 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -247,6 +247,7 @@ enum arm_smmu_s2cr_privcfg {
 #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
 
 #define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN	(1 << 8)
 
 #define CB_PAR_F			(1 << 0)
 
@@ -1569,16 +1570,22 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	for (i = 0; i < smmu->num_mapping_groups; ++i)
 		arm_smmu_write_sme(smmu, i);
 
-	/*
-	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
-	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
-	 * bit is only present in MMU-500r2 onwards.
-	 */
-	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
-	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
-	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
+	if (smmu->model == ARM_MMU500) {
+		/*
+		 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+		 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+		 * bit is only present in MMU-500r2 onwards.
+		 */
+		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+		major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
 		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
-		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+		if (major >= 2)
+			reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+		/*
+		 * Allow unmatched Stream IDs to allocate bypass
+		 * TLB entries for reduced latency.
+		 */
+		reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
 		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
 	}
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] iommu: arm-smmu: Set SMTNMB_TLBEN in ACR to enable caching of bypass entries
  2016-11-04  9:55 ` Nipun Gupta
@ 2016-11-04 14:19     ` Robin Murphy
  -1 siblings, 0 replies; 4+ messages in thread
From: Robin Murphy @ 2016-11-04 14:19 UTC (permalink / raw)
  To: Nipun Gupta, will.deacon-5wv7dgnIgG8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: stuart.yoder-3arQi8VN3Tc

On 04/11/16 09:55, Nipun Gupta wrote:
> The SMTNMB_TLBEN in the Auxiliary Configuration Register (ACR) provides an
> option to enable the updation of TLB in case of bypass transactions due to
> no stream match in the stream match table. This reduces the latencies of
> the subsequent transactions with the same stream-id which bypasses the SMMU.
> This provides a significant performance benefit for certain networking
> workloads.
> 
> With this change substantial performance improvement of ~9% is observed with
> DPDK l3fwd application (http://dpdk.org/doc/guides/sample_app_ug/l3_forward.html)
> on NXP's LS2088a platform.

Reviewed-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>

> Signed-off-by: Nipun Gupta <nipun.gupta-3arQi8VN3Tc@public.gmane.org>
> ---
> Changes for v2:
>     - Incorporated Robin's comments on v1 related to
> 	Setting SMTNMB_TLBEN in ACR only for MMU-500 as ACR is implementation dependent
> 	Code comments and Naming convention
> Changes for v3:
>     - Added correct patch version
> 
>  drivers/iommu/arm-smmu.c | 25 ++++++++++++++++---------
>  1 file changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index ce2a9d4..05901be 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -247,6 +247,7 @@ enum arm_smmu_s2cr_privcfg {
>  #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
>  
>  #define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
> +#define ARM_MMU500_ACR_SMTNMB_TLBEN	(1 << 8)
>  
>  #define CB_PAR_F			(1 << 0)
>  
> @@ -1569,16 +1570,22 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
>  	for (i = 0; i < smmu->num_mapping_groups; ++i)
>  		arm_smmu_write_sme(smmu, i);
>  
> -	/*
> -	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
> -	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
> -	 * bit is only present in MMU-500r2 onwards.
> -	 */
> -	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
> -	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
> -	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
> +	if (smmu->model == ARM_MMU500) {
> +		/*
> +		 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
> +		 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
> +		 * bit is only present in MMU-500r2 onwards.
> +		 */
> +		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
> +		major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
>  		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
> -		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
> +		if (major >= 2)
> +			reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
> +		/*
> +		 * Allow unmatched Stream IDs to allocate bypass
> +		 * TLB entries for reduced latency.
> +		 */
> +		reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
>  		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
>  	}
>  
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3] iommu: arm-smmu: Set SMTNMB_TLBEN in ACR to enable caching of bypass entries
@ 2016-11-04 14:19     ` Robin Murphy
  0 siblings, 0 replies; 4+ messages in thread
From: Robin Murphy @ 2016-11-04 14:19 UTC (permalink / raw)
  To: linux-arm-kernel

On 04/11/16 09:55, Nipun Gupta wrote:
> The SMTNMB_TLBEN in the Auxiliary Configuration Register (ACR) provides an
> option to enable the updation of TLB in case of bypass transactions due to
> no stream match in the stream match table. This reduces the latencies of
> the subsequent transactions with the same stream-id which bypasses the SMMU.
> This provides a significant performance benefit for certain networking
> workloads.
> 
> With this change substantial performance improvement of ~9% is observed with
> DPDK l3fwd application (http://dpdk.org/doc/guides/sample_app_ug/l3_forward.html)
> on NXP's LS2088a platform.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>

> Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
> ---
> Changes for v2:
>     - Incorporated Robin's comments on v1 related to
> 	Setting SMTNMB_TLBEN in ACR only for MMU-500 as ACR is implementation dependent
> 	Code comments and Naming convention
> Changes for v3:
>     - Added correct patch version
> 
>  drivers/iommu/arm-smmu.c | 25 ++++++++++++++++---------
>  1 file changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index ce2a9d4..05901be 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -247,6 +247,7 @@ enum arm_smmu_s2cr_privcfg {
>  #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
>  
>  #define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
> +#define ARM_MMU500_ACR_SMTNMB_TLBEN	(1 << 8)
>  
>  #define CB_PAR_F			(1 << 0)
>  
> @@ -1569,16 +1570,22 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
>  	for (i = 0; i < smmu->num_mapping_groups; ++i)
>  		arm_smmu_write_sme(smmu, i);
>  
> -	/*
> -	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
> -	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
> -	 * bit is only present in MMU-500r2 onwards.
> -	 */
> -	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
> -	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
> -	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
> +	if (smmu->model == ARM_MMU500) {
> +		/*
> +		 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
> +		 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
> +		 * bit is only present in MMU-500r2 onwards.
> +		 */
> +		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
> +		major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
>  		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
> -		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
> +		if (major >= 2)
> +			reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
> +		/*
> +		 * Allow unmatched Stream IDs to allocate bypass
> +		 * TLB entries for reduced latency.
> +		 */
> +		reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
>  		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
>  	}
>  
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-11-04 14:19 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-04  9:55 [PATCH v3] iommu: arm-smmu: Set SMTNMB_TLBEN in ACR to enable caching of bypass entries Nipun Gupta
2016-11-04  9:55 ` Nipun Gupta
     [not found] ` <1478253323-9231-1-git-send-email-nipun.gupta-3arQi8VN3Tc@public.gmane.org>
2016-11-04 14:19   ` Robin Murphy
2016-11-04 14:19     ` Robin Murphy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.