All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] arm64: alternative:flush cache with unpatched code
@ 2018-05-31 20:37 Rohit Khanna
  2018-06-01  9:03 ` Mark Rutland
  0 siblings, 1 reply; 20+ messages in thread
From: Rohit Khanna @ 2018-05-31 20:37 UTC (permalink / raw)
  To: linux-arm-kernel

In the current implementation,  __apply_alternatives patches
flush_icache_range and then executes it without invalidating the icache.
Thus, icache can contain some of the old instructions for
flush_icache_range. This can cause unpredictable behavior as during
execution we can get a mix of old and new instructions for
flush_icache_range.

This patch :

1. Adds a new function clean_dcache_range_nopatch for flushing kernel
memory range. This function uses non hot-patched code and can be
safely used to flush cache during code patching.

2. Modifies __apply_alternatives so that it uses
clean_dcache_range_nopatch to flush the cache range after patching code.

Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
---
 arch/arm64/include/asm/sysreg.h |  3 +++
 arch/arm64/kernel/alternative.c | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6171178075dc..9d1aee7c9aba 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -617,6 +617,9 @@
 #define MVFR1_FPDNAN_SHIFT		4
 #define MVFR1_FPFTZ_SHIFT		0
 
+/* SYS_CTR_EL0 */
+#define SYS_CTR_ISIZE_SHIFT		0
+#define SYS_CTR_DSIZE_SHIFT		16
 
 #define ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define ID_AA64MMFR0_TGRAN64_SHIFT	24
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 5c4bce4ac381..6b8c5438b37b 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -122,6 +122,41 @@ static void patch_alternative(struct alt_instr *alt,
 	}
 }
 
+/* This is used for flushing kernel memory range after
+ * __apply_alternatives has patched kernel code
+ */
+static void clean_dcache_range_nopatch(void *start, void *end)
+{
+	u64 d_start, i_start, d_size, i_size, ctr_el0;
+
+	/* use sanitised value of ctr_el0 rather than raw value from CPU */
+	ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+	/* size in bytes */
+	d_size = cpuid_feature_extract_unsigned_field(ctr_el0,
+			SYS_CTR_DSIZE_SHIFT);
+	i_size = cpuid_feature_extract_unsigned_field(ctr_el0,
+			SYS_CTR_ISIZE_SHIFT);
+
+	d_start = (u64)start & ~(d_size - 1);
+	while (d_start <= (u64)end) {
+		/* Use civac instead of cvau. This is required
+		 * due to ARM errata 826319, 827319, 824069,
+		 * 819472 on A53
+		 */
+		asm volatile("dc civac, %0" : : "r" (d_start));
+		d_start += d_size;
+	}
+	dsb(ish);
+
+	i_start = (u64)start & ~(i_size - 1);
+	while (i_start <= (u64)end) {
+		asm volatile("ic ivau, %0" : : "r" (i_start));
+		i_start += i_size;
+	}
+	dsb(ish);
+	isb();
+}
+
 static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 {
 	struct alt_instr *alt;
@@ -155,7 +190,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
-		flush_icache_range((uintptr_t)origptr,
+		clean_dcache_range_nopatch((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));
 	}
 }
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-31 20:37 [PATCH] arm64: alternative:flush cache with unpatched code Rohit Khanna
@ 2018-06-01  9:03 ` Mark Rutland
  2018-06-01 19:52   ` Rohit Khanna
  0 siblings, 1 reply; 20+ messages in thread
From: Mark Rutland @ 2018-06-01  9:03 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

As a general thing, could you please add a version number to patches in future?
i.e. this should be PATCHv4.

It really helps keeping track of patches, distinguishing versions, etc.

On Thu, May 31, 2018 at 01:37:34PM -0700, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
> 
> This patch :
> 
> 1. Adds a new function clean_dcache_range_nopatch for flushing kernel
> memory range. This function uses non hot-patched code and can be
> safely used to flush cache during code patching.
> 
> 2. Modifies __apply_alternatives so that it uses
> clean_dcache_range_nopatch to flush the cache range after patching code.
> 
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>  arch/arm64/include/asm/sysreg.h |  3 +++
>  arch/arm64/kernel/alternative.c | 37 ++++++++++++++++++++++++++++++++++++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 6171178075dc..9d1aee7c9aba 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -617,6 +617,9 @@
>  #define MVFR1_FPDNAN_SHIFT		4
>  #define MVFR1_FPFTZ_SHIFT		0
>  
> +/* SYS_CTR_EL0 */
> +#define SYS_CTR_ISIZE_SHIFT		0
> +#define SYS_CTR_DSIZE_SHIFT		16

We already have CTR_DMINLINE_SHIFT in <asm/cache.h>

Can we please add CTR_IMINLIN_SHIFT there too?

Maybe those should be moved into sysreg.h, but that can be a separate cleanup.

>  #define ID_AA64MMFR0_TGRAN4_SHIFT	28
>  #define ID_AA64MMFR0_TGRAN64_SHIFT	24
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..6b8c5438b37b 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -122,6 +122,41 @@ static void patch_alternative(struct alt_instr *alt,
>  	}
>  }
>  
> +/* This is used for flushing kernel memory range after
> + * __apply_alternatives has patched kernel code
> + */
> +static void clean_dcache_range_nopatch(void *start, void *end)
> +{
> +	u64 d_start, i_start, d_size, i_size, ctr_el0;

I don't think we need separate i_start and d_start variables. A 'start' or
'cur' variable could be used for both.

> +
> +	/* use sanitised value of ctr_el0 rather than raw value from CPU */
> +	ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> +	/* size in bytes */
> +	d_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> +			SYS_CTR_DSIZE_SHIFT);
> +	i_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> +			SYS_CTR_ISIZE_SHIFT);

This isn't the size in bytes. Each is log2 the number of (4-byte) words.

i.e. the size in bytes is (xMinLine << 2).

> +
> +	d_start = (u64)start & ~(d_size - 1);
> +	while (d_start <= (u64)end) {
> +		/* Use civac instead of cvau. This is required
> +		 * due to ARM errata 826319, 827319, 824069,
> +		 * 819472 on A53
> +		 */
> +		asm volatile("dc civac, %0" : : "r" (d_start));

Either this needs a memory clobber, or we need barrier() first, to ensure that
the compiler doesn't re-order this against some of the patching code, however
unlikely that may be.

> +		d_start += d_size;
> +	}

The loop can be simplified to:

	do {
		asm ( ... );
	} while (d_start += d_size, d_start < (u64)end)

> +	dsb(ish);
> +
> +	i_start = (u64)start & ~(i_size - 1);
> +	while (i_start <= (u64)end) {
> +		asm volatile("ic ivau, %0" : : "r" (i_start));
> +		i_start += i_size;
> +	}

Likewise here.

Thanks,
Mark.

> +	dsb(ish);
> +	isb();
> +}
> +
>  static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>  {
>  	struct alt_instr *alt;
> @@ -155,7 +190,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>  
>  		alt_cb(alt, origptr, updptr, nr_inst);
>  
> -		flush_icache_range((uintptr_t)origptr,
> +		clean_dcache_range_nopatch((uintptr_t)origptr,
>  				   (uintptr_t)(origptr + nr_inst));
>  	}
>  }
> -- 
> 2.1.4
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-01  9:03 ` Mark Rutland
@ 2018-06-01 19:52   ` Rohit Khanna
  2018-06-01 21:43     ` Rohit Khanna
  2018-06-04  9:01     ` Mark Rutland
  0 siblings, 2 replies; 20+ messages in thread
From: Rohit Khanna @ 2018-06-01 19:52 UTC (permalink / raw)
  To: linux-arm-kernel

[RK] - Thanks for the comments Mark. Reply inlined.

Thanks
Rohit
________________________________________
From: Mark Rutland <mark.rutland@arm.com>
Sent: Friday, June 1, 2018 2:03 AM
To: Rohit Khanna
Cc: catalin.marinas at arm.com; robin.murphy at arm.com; Suzuki.Poulose at arm.com; linux-arm-kernel at lists.infradead.org; Alexander Van Brunt; Bo Yan; will.deacon at arm.com
Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

Hi,

As a general thing, could you please add a version number to patches in future?
i.e. this should be PATCHv4.

It really helps keeping track of patches, distinguishing versions, etc.

On Thu, May 31, 2018 at 01:37:34PM -0700, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
>
> This patch :
>
> 1. Adds a new function clean_dcache_range_nopatch for flushing kernel
> memory range. This function uses non hot-patched code and can be
> safely used to flush cache during code patching.
>
> 2. Modifies __apply_alternatives so that it uses
> clean_dcache_range_nopatch to flush the cache range after patching code.
>
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>  arch/arm64/include/asm/sysreg.h |  3 +++
>  arch/arm64/kernel/alternative.c | 37 ++++++++++++++++++++++++++++++++++++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 6171178075dc..9d1aee7c9aba 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -617,6 +617,9 @@
>  #define MVFR1_FPDNAN_SHIFT           4
>  #define MVFR1_FPFTZ_SHIFT            0
>
> +/* SYS_CTR_EL0 */
> +#define SYS_CTR_ISIZE_SHIFT          0
> +#define SYS_CTR_DSIZE_SHIFT          16

We already have CTR_DMINLINE_SHIFT in <asm/cache.h>

Can we please add CTR_IMINLIN_SHIFT there too?

Maybe those should be moved into sysreg.h, but that can be a separate cleanup.

[RK] -  <asm/cache.h> doesnt contain CTR_DMINLINE_SHIFT.

>  #define ID_AA64MMFR0_TGRAN4_SHIFT    28
>  #define ID_AA64MMFR0_TGRAN64_SHIFT   24
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..6b8c5438b37b 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -122,6 +122,41 @@ static void patch_alternative(struct alt_instr *alt,
>       }
>  }
>
> +/* This is used for flushing kernel memory range after
> + * __apply_alternatives has patched kernel code
> + */
> +static void clean_dcache_range_nopatch(void *start, void *end)
> +{
> +     u64 d_start, i_start, d_size, i_size, ctr_el0;

I don't think we need separate i_start and d_start variables. A 'start' or
'cur' variable could be used for both.
[RK] - ok.

> +
> +     /* use sanitised value of ctr_el0 rather than raw value from CPU */
> +     ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> +     /* size in bytes */
> +     d_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> +                     SYS_CTR_DSIZE_SHIFT);
> +     i_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> +                     SYS_CTR_ISIZE_SHIFT);

This isn't the size in bytes. Each is log2 the number of (4-byte) words.

i.e. the size in bytes is (xMinLine << 2).
[RK] - This doesnt seem right. For eg if IMinLine = 4 or 0b100
           then with above formula ICacheSize in Bytes = 4 << 2 = 16
           The correct formula should be (4 << xMinLine). 
            So in case IMinLine = 4 or 0b100,
            ICacheSizeBytes = 4 << 4 = 64B             

> +
> +     d_start = (u64)start & ~(d_size - 1);
> +     while (d_start <= (u64)end) {
> +             /* Use civac instead of cvau. This is required
> +              * due to ARM errata 826319, 827319, 824069,
> +              * 819472 on A53
> +              */
> +             asm volatile("dc civac, %0" : : "r" (d_start));

Either this needs a memory clobber, or we need barrier() first, to ensure that
the compiler doesn't re-order this against some of the patching code, however
unlikely that may be.
[RK] - So add barrier() before calling clean_dcache_range_nopatch() ?

> +             d_start += d_size;
> +     }

The loop can be simplified to:

        do {
                asm ( ... );
        } while (d_start += d_size, d_start < (u64)end)
[RK] - ok

> +     dsb(ish);
> +
> +     i_start = (u64)start & ~(i_size - 1);
> +     while (i_start <= (u64)end) {
> +             asm volatile("ic ivau, %0" : : "r" (i_start));
> +             i_start += i_size;
> +     }

Likewise here.
[RK] - ok

Thanks,
Mark.

> +     dsb(ish);
> +     isb();
> +}
> +
>  static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>  {
>       struct alt_instr *alt;
> @@ -155,7 +190,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>
>               alt_cb(alt, origptr, updptr, nr_inst);
>
> -             flush_icache_range((uintptr_t)origptr,
> +             clean_dcache_range_nopatch((uintptr_t)origptr,
>                                  (uintptr_t)(origptr + nr_inst));
>       }
>  }
> --
> 2.1.4
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-01 19:52   ` Rohit Khanna
@ 2018-06-01 21:43     ` Rohit Khanna
  2018-06-04  9:01     ` Mark Rutland
  1 sibling, 0 replies; 20+ messages in thread
From: Rohit Khanna @ 2018-06-01 21:43 UTC (permalink / raw)
  To: linux-arm-kernel

1 more comment inline.

Thanks
Rohit
________________________________________
From: linux-arm-kernel <linux-arm-kernel-bounces@lists.infradead.org> on behalf of Rohit Khanna <rokhanna@nvidia.com>
Sent: Friday, June 1, 2018 12:52 PM
To: Mark Rutland
Cc: Suzuki.Poulose at arm.com; catalin.marinas at arm.com; will.deacon at arm.com; Bo Yan; robin.murphy at arm.com; Alexander Van Brunt; linux-arm-kernel at lists.infradead.org
Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

[RK] - Thanks for the comments Mark. Reply inlined.

Thanks
Rohit
________________________________________
From: Mark Rutland <mark.rutland@arm.com>
Sent: Friday, June 1, 2018 2:03 AM
To: Rohit Khanna
Cc: catalin.marinas at arm.com; robin.murphy at arm.com; Suzuki.Poulose at arm.com; linux-arm-kernel at lists.infradead.org; Alexander Van Brunt; Bo Yan; will.deacon at arm.com
Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

Hi,

As a general thing, could you please add a version number to patches in future?
i.e. this should be PATCHv4.

It really helps keeping track of patches, distinguishing versions, etc.

On Thu, May 31, 2018 at 01:37:34PM -0700, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
>
> This patch :
>
> 1. Adds a new function clean_dcache_range_nopatch for flushing kernel
> memory range. This function uses non hot-patched code and can be
> safely used to flush cache during code patching.
>
> 2. Modifies __apply_alternatives so that it uses
> clean_dcache_range_nopatch to flush the cache range after patching code.
>
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>  arch/arm64/include/asm/sysreg.h |  3 +++
>  arch/arm64/kernel/alternative.c | 37 ++++++++++++++++++++++++++++++++++++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 6171178075dc..9d1aee7c9aba 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -617,6 +617,9 @@
>  #define MVFR1_FPDNAN_SHIFT           4
>  #define MVFR1_FPFTZ_SHIFT            0
>
> +/* SYS_CTR_EL0 */
> +#define SYS_CTR_ISIZE_SHIFT          0
> +#define SYS_CTR_DSIZE_SHIFT          16

We already have CTR_DMINLINE_SHIFT in <asm/cache.h>

Can we please add CTR_IMINLIN_SHIFT there too?

Maybe those should be moved into sysreg.h, but that can be a separate cleanup.

[RK] -  <asm/cache.h> doesnt contain CTR_DMINLINE_SHIFT.
[RK ] - Sorry, I was looking at kernel-4.14 and couldnt find it there. I can find it in linux-next repo.

>  #define ID_AA64MMFR0_TGRAN4_SHIFT    28
>  #define ID_AA64MMFR0_TGRAN64_SHIFT   24
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..6b8c5438b37b 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -122,6 +122,41 @@ static void patch_alternative(struct alt_instr *alt,
>       }
>  }
>
> +/* This is used for flushing kernel memory range after
> + * __apply_alternatives has patched kernel code
> + */
> +static void clean_dcache_range_nopatch(void *start, void *end)
> +{
> +     u64 d_start, i_start, d_size, i_size, ctr_el0;

I don't think we need separate i_start and d_start variables. A 'start' or
'cur' variable could be used for both.
[RK] - ok.

> +
> +     /* use sanitised value of ctr_el0 rather than raw value from CPU */
> +     ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> +     /* size in bytes */
> +     d_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> +                     SYS_CTR_DSIZE_SHIFT);
> +     i_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> +                     SYS_CTR_ISIZE_SHIFT);

This isn't the size in bytes. Each is log2 the number of (4-byte) words.

i.e. the size in bytes is (xMinLine << 2).
[RK] - This doesnt seem right. For eg if IMinLine = 4 or 0b100
           then with above formula ICacheSize in Bytes = 4 << 2 = 16
           The correct formula should be (4 << xMinLine).
            So in case IMinLine = 4 or 0b100,
            ICacheSizeBytes = 4 << 4 = 64B

> +
> +     d_start = (u64)start & ~(d_size - 1);
> +     while (d_start <= (u64)end) {
> +             /* Use civac instead of cvau. This is required
> +              * due to ARM errata 826319, 827319, 824069,
> +              * 819472 on A53
> +              */
> +             asm volatile("dc civac, %0" : : "r" (d_start));

Either this needs a memory clobber, or we need barrier() first, to ensure that
the compiler doesn't re-order this against some of the patching code, however
unlikely that may be.
[RK] - So add barrier() before calling clean_dcache_range_nopatch() ?

> +             d_start += d_size;
> +     }

The loop can be simplified to:

        do {
                asm ( ... );
        } while (d_start += d_size, d_start < (u64)end)
[RK] - ok

> +     dsb(ish);
> +
> +     i_start = (u64)start & ~(i_size - 1);
> +     while (i_start <= (u64)end) {
> +             asm volatile("ic ivau, %0" : : "r" (i_start));
> +             i_start += i_size;
> +     }

Likewise here.
[RK] - ok

Thanks,
Mark.

> +     dsb(ish);
> +     isb();
> +}
> +
>  static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>  {
>       struct alt_instr *alt;
> @@ -155,7 +190,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>
>               alt_cb(alt, origptr, updptr, nr_inst);
>
> -             flush_icache_range((uintptr_t)origptr,
> +             clean_dcache_range_nopatch((uintptr_t)origptr,
>                                  (uintptr_t)(origptr + nr_inst));
>       }
>  }
> --
> 2.1.4
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel at lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-01 19:52   ` Rohit Khanna
  2018-06-01 21:43     ` Rohit Khanna
@ 2018-06-04  9:01     ` Mark Rutland
  1 sibling, 0 replies; 20+ messages in thread
From: Mark Rutland @ 2018-06-04  9:01 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jun 01, 2018 at 07:52:05PM +0000, Rohit Khanna wrote:
> [RK] - Thanks for the comments Mark. Reply inlined.
> 
> Thanks
> Rohit
> ________________________________________
> From: Mark Rutland <mark.rutland@arm.com>
> Sent: Friday, June 1, 2018 2:03 AM
> To: Rohit Khanna
> Cc: catalin.marinas at arm.com; robin.murphy at arm.com; Suzuki.Poulose at arm.com; linux-arm-kernel at lists.infradead.org; Alexander Van Brunt; Bo Yan; will.deacon at arm.com
> Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

[...]

> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -617,6 +617,9 @@
> >  #define MVFR1_FPDNAN_SHIFT           4
> >  #define MVFR1_FPFTZ_SHIFT            0
> >
> > +/* SYS_CTR_EL0 */
> > +#define SYS_CTR_ISIZE_SHIFT          0
> > +#define SYS_CTR_DSIZE_SHIFT          16
> 
> We already have CTR_DMINLINE_SHIFT in <asm/cache.h>
> 
> Can we please add CTR_IMINLIN_SHIFT there too?
> 
> Maybe those should be moved into sysreg.h, but that can be a separate cleanup.
> 
> [RK] -  <asm/cache.h> doesnt contain CTR_DMINLINE_SHIFT.

It's on line 23 of arch/arm64/include/asm/cache.h, looking at v4.17.

[...]

> > +     /* use sanitised value of ctr_el0 rather than raw value from CPU */
> > +     ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> > +     /* size in bytes */
> > +     d_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> > +                     SYS_CTR_DSIZE_SHIFT);
> > +     i_size = cpuid_feature_extract_unsigned_field(ctr_el0,
> > +                     SYS_CTR_ISIZE_SHIFT);
> 
> This isn't the size in bytes. Each is log2 the number of (4-byte) words.
> 
> i.e. the size in bytes is (xMinLine << 2).
> [RK] - This doesnt seem right. For eg if IMinLine = 4 or 0b100
>            then with above formula ICacheSize in Bytes = 4 << 2 = 16
>            The correct formula should be (4 << xMinLine). 
>             So in case IMinLine = 4 or 0b100,
>             ICacheSizeBytes = 4 << 4 = 64B             

Whoops. You are correct with 4 << xMinLine.

[...]

> > +     d_start = (u64)start & ~(d_size - 1);
> > +     while (d_start <= (u64)end) {
> > +             /* Use civac instead of cvau. This is required
> > +              * due to ARM errata 826319, 827319, 824069,
> > +              * 819472 on A53
> > +              */
> > +             asm volatile("dc civac, %0" : : "r" (d_start));
> 
> Either this needs a memory clobber, or we need barrier() first, to ensure that
> the compiler doesn't re-order this against some of the patching code, however
> unlikely that may be.
> [RK] - So add barrier() before calling clean_dcache_range_nopatch() ?

I'd put it before the loop here so that it's clearly associated with the DC CIVAC.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-06 15:44             ` Will Deacon
@ 2018-06-06 16:16               ` Alexander Van Brunt
  0 siblings, 0 replies; 20+ messages in thread
From: Alexander Van Brunt @ 2018-06-06 16:16 UTC (permalink / raw)
  To: linux-arm-kernel

> You know we already do this on boot, right? If it's a real issue, I'd argue
> that it's the hypervisor's problem to solve.

I am very aware. I would like to fix the rest of boot and change KVM to trap when the guest does something like that.

But, if you don't want the patch without using an invalidate all, I would rather get Rohit's patch in with an invalidate all than have a system that fails to boot.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-05 17:07           ` Alexander Van Brunt
@ 2018-06-06 15:44             ` Will Deacon
  2018-06-06 16:16               ` Alexander Van Brunt
  0 siblings, 1 reply; 20+ messages in thread
From: Will Deacon @ 2018-06-06 15:44 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jun 05, 2018 at 05:07:54PM +0000, Alexander Van Brunt wrote:
> > 1. Boot. This happens once, and we end up putting *all* secondary cores into
> >    a tight loop anyway, so I don't see that the performance of
> >    __flush_icache_all is relevant
> 
> Native boot happens once. But, each VM that boots will slow down all of
> the other VM's on the system. A VM boot can happen thousands of times.
> 
> I really don't want to cause the whole system to hiccup for a millisecond
> or more when there are only a few cache lines that need to be invalidated.

You know we already do this on boot, right? If it's a real issue, I'd argue
that it's the hypervisor's problem to solve.

Anyway, please can you back this up with some real numbers that show the
impact on a real use-case? It feels like we've quickly getting into
hypothetical territory here because you have a instinctive dislike for full
I-cache invalidation.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-05 16:55         ` Will Deacon
@ 2018-06-05 17:07           ` Alexander Van Brunt
  2018-06-06 15:44             ` Will Deacon
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Van Brunt @ 2018-06-05 17:07 UTC (permalink / raw)
  To: linux-arm-kernel

> 1. Boot. This happens once, and we end up putting *all* secondary cores into
>    a tight loop anyway, so I don't see that the performance of
>    __flush_icache_all is relevant

Native boot happens once. But, each VM that boots will slow down all of the other VM's on the system. A VM boot can happen thousands of times.

I really don't want to cause the whole system to hiccup for a millisecond or more when there are only a few cache lines that need to be invalidated.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-04 19:34       ` Alexander Van Brunt
@ 2018-06-05 16:55         ` Will Deacon
  2018-06-05 17:07           ` Alexander Van Brunt
  0 siblings, 1 reply; 20+ messages in thread
From: Will Deacon @ 2018-06-05 16:55 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Alex,

On Mon, Jun 04, 2018 at 07:34:10PM +0000, Alexander Van Brunt wrote:
> __flush_icache_all all cache is slow in large systems. It flushes the
> instruction caches cache in the inner-shareable domain. That includes
> other NUMA nodes in multi-socket systems. The CPU issuing the invalidate
> has to wait for all of the other CPUs to complete the invalidate
> instruction. The remote CPU's responding to the request all need to slow
> down.
> 
> In contrast, flushes by range can be checked in a snoop filter to see if
> the addresses are relevant to the CPU. So, it scales to systems with more
> than two clusters much better.
> 
> The flush ignores the VMID. So, one guest OS can slow down others. That
> creates a big covert channel between guests unless the hypervisor traps
> and emulates it by invalidating an entire VMID. By flushing by VA range,
> the hardware only flushes lines associated with the VMID, ASID, and VA
> associated with the line.
> 
> Selfishly, NVIDIA's Denver CPU's are even more sensitive because the
> optimized code stored in DRAM is effectively a very large (on the order of
> 64 MB) instruction cache. "ic ialluis" can result in the entire
> optimization cache for all guests to be invalidated.
> 
> I am aware that the arguments I made apply to TLB invalidates and the
> other places that Linux calls __flush_icache_all. But, that doesn't mean I
> don't like those either. For now, I just don't want more calls to
> __flush_icache_all.

Sure, but there are only two cases to consider here:

1. Boot. This happens once, and we end up putting *all* secondary cores into
   a tight loop anyway, so I don't see that the performance of
   __flush_icache_all is relevant

2. On module load. This happens right before we start remapping the module
   areas and sending out TLB invalidation so, again, not sure I see why this
   is such a big deal. In fact, it looks like the core module loading code
   does invalidation by range, so if we remove it from __apply_alternatives
   then we can avoid doing it twice.

In other words, something like the diff below.

Will

--->8

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index a91933b1e2e6..934f99215bd4 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -28,7 +28,12 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
 				 __le32 *origptr, __le32 *updptr, int nr_inst);
 
 void __init apply_alternatives_all(void);
-void apply_alternatives(void *start, size_t length);
+
+#ifdef CONFIG_MODULES
+void apply_alternatives_module(void *start, size_t length);
+#else
+void apply_alternatives_module(void *start, size_t length) { }
+#endif
 
 #define ALTINSTR_ENTRY(feature,cb)					      \
 	" .word 661b - .\n"				/* label           */ \
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 9bbffc7a301f..627d62ff2ddd 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -21,6 +21,7 @@
 #define CTR_L1IP_SHIFT		14
 #define CTR_L1IP_MASK		3
 #define CTR_DMINLINE_SHIFT	16
+#define CTR_IMINLINE_SHIFT	0
 #define CTR_ERG_SHIFT		20
 #define CTR_CWG_SHIFT		24
 #define CTR_CWG_MASK		15
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 5c4bce4ac381..6c813f58824b 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -122,7 +122,29 @@ static void patch_alternative(struct alt_instr *alt,
 	}
 }
 
-static void __apply_alternatives(void *alt_region, bool use_linear_alias)
+static void clean_dcache_range_nopatch(u64 start, u64 end)
+{
+	u64 cur, d_size, i_size, ctr_el0;
+
+	/* use sanitised value of ctr_el0 rather than raw value from CPU */
+	ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+	/* size in bytes */
+	d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
+			CTR_DMINLINE_SHIFT);
+	i_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
+			CTR_IMINLINE_SHIFT);
+
+	cur = start & ~(d_size - 1);
+	do {
+		/*
+		 * We must clean+invalidate in order to avoid Cortex-A53
+		 * errata 826319, 827319, 824069 and 819472.
+		 */
+		asm volatile("dc civac, %0" : : "r" (cur) : "memory");
+	} while (cur += d_size, cur < end);
+}
+
+static void __apply_alternatives(void *alt_region, bool is_module)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
@@ -145,7 +167,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 		pr_info_once("patching kernel code\n");
 
 		origptr = ALT_ORIG_PTR(alt);
-		updptr = use_linear_alias ? lm_alias(origptr) : origptr;
+		updptr = is_module ? origptr : lm_alias(origptr);
 		nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
 
 		if (alt->cpufeature < ARM64_CB_PATCH)
@@ -155,9 +177,23 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
-		flush_icache_range((uintptr_t)origptr,
-				   (uintptr_t)(origptr + nr_inst));
+		if (is_module)
+			continue;
+
+		clean_dcache_range_nopatch((u64)origptr,
+					   (u64)(origptr + nr_inst));
 	}
+
+	/*
+	 * The core module code takes care of cache maintenance in
+	 * flush_module_icache().
+	 */
+	if (is_module)
+		return;
+
+	dsb(ish);
+	__flush_icache_all();
+	isb();
 }
 
 /*
@@ -178,7 +214,7 @@ static int __apply_alternatives_multi_stop(void *unused)
 		isb();
 	} else {
 		BUG_ON(alternatives_applied);
-		__apply_alternatives(&region, true);
+		__apply_alternatives(&region, false);
 		/* Barriers provided by the cache flushing */
 		WRITE_ONCE(alternatives_applied, 1);
 	}
@@ -192,12 +228,14 @@ void __init apply_alternatives_all(void)
 	stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
 }
 
-void apply_alternatives(void *start, size_t length)
+#ifdef CONFIG_MODULES
+void apply_alternatives_module(void *start, size_t length)
 {
 	struct alt_region region = {
 		.begin	= start,
 		.end	= start + length,
 	};
 
-	__apply_alternatives(&region, false);
+	__apply_alternatives(&region, true);
 }
+#endif
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 155fd91e78f4..f0f27aeefb73 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -448,9 +448,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
-		if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) {
-			apply_alternatives((void *)s->sh_addr, s->sh_size);
-		}
+		if (strcmp(".altinstructions", secstrs + s->sh_name) == 0)
+			apply_alternatives_module((void *)s->sh_addr, s->sh_size);
 #ifdef CONFIG_ARM64_MODULE_PLTS
 		if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
 		    !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-06-04  9:16     ` Will Deacon
@ 2018-06-04 19:34       ` Alexander Van Brunt
  2018-06-05 16:55         ` Will Deacon
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Van Brunt @ 2018-06-04 19:34 UTC (permalink / raw)
  To: linux-arm-kernel

__flush_icache_all all cache is slow in large systems. It flushes the instruction caches cache in the inner-shareable domain. That includes other NUMA nodes in multi-socket systems. The CPU issuing the invalidate has to wait for all of the other CPUs to complete the invalidate instruction. The remote CPU's responding to the request all need to slow down.

In contrast, flushes by range can be checked in a snoop filter to see if the addresses are relevant to the CPU. So, it scales to systems with more than two clusters much better.

The flush ignores the VMID. So, one guest OS can slow down others. That creates a big covert channel between guests unless the hypervisor traps and emulates it by invalidating an entire VMID. By flushing by VA range, the hardware only flushes lines associated with the VMID, ASID, and VA associated with the line.

Selfishly, NVIDIA's Denver CPU's are even more sensitive because the optimized code stored in DRAM is effectively a very large (on the order of 64 MB) instruction cache. "ic ialluis" can result in the entire optimization cache for all guests to be invalidated.

I am aware that the arguments I made apply to TLB invalidates and the other places that Linux calls __flush_icache_all. But, that doesn't mean I don't like those either. For now, I just don't want more calls to __flush_icache_all.

Alex VB
________________________________________
From: Will Deacon <will.deacon@arm.com>
Sent: Monday, June 4, 2018 2:16 AM
To: Rohit Khanna
Cc: catalin.marinas at arm.com; robin.murphy at arm.com; mark.rutland at arm.com; Suzuki.Poulose at arm.com; Bo Yan; Alexander Van Brunt; linux-arm-kernel at lists.infradead.org
Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

On Thu, May 31, 2018 at 05:45:11PM +0000, Rohit Khanna wrote:
> Will, thanks for the comments.
> I will push a new patch set.
> We want to avoid using __flush_icache_all as much as possible and hence
> trying to flush cache by range.

Why? We're going to do it once at boot and once on each module load, so I
can't see it being a performance concern.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-31 17:45   ` Rohit Khanna
@ 2018-06-04  9:16     ` Will Deacon
  2018-06-04 19:34       ` Alexander Van Brunt
  0 siblings, 1 reply; 20+ messages in thread
From: Will Deacon @ 2018-06-04  9:16 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, May 31, 2018 at 05:45:11PM +0000, Rohit Khanna wrote:
> Will, thanks for the comments.
> I will push a new patch set.
> We want to avoid using __flush_icache_all as much as possible and hence
> trying to flush cache by range.

Why? We're going to do it once at boot and once on each module load, so I
can't see it being a performance concern.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
@ 2018-06-02  0:39 Rohit Khanna
  0 siblings, 0 replies; 20+ messages in thread
From: Rohit Khanna @ 2018-06-02  0:39 UTC (permalink / raw)
  To: linux-arm-kernel

In the current implementation,  __apply_alternatives patches
flush_icache_range and then executes it without invalidating the icache.
Thus, icache can contain some of the old instructions for
flush_icache_range. This can cause unpredictable behavior as during
execution we can get a mix of old and new instructions for
flush_icache_range.

This patch :

1. Adds a new function clean_dcache_range_nopatch for flushing kernel
   memory range. This function uses non hot-patched code and can be
   safely used to flush cache during code patching.

2. Modifies __apply_alternatives so that it uses
   clean_dcache_range_nopatch to flush the cache range after patching code.

Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
---
 arch/arm64/include/asm/cache.h  |  1 +
 arch/arm64/kernel/alternative.c | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 5df5cfe1c143..9211ecd85b15 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -21,6 +21,7 @@
 #define CTR_L1IP_SHIFT		14
 #define CTR_L1IP_MASK		3
 #define CTR_DMINLINE_SHIFT	16
+#define CTR_IMINLINE_SHIT	0
 #define CTR_ERG_SHIFT		20
 #define CTR_CWG_SHIFT		24
 #define CTR_CWG_MASK		15
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 5c4bce4ac381..da5815807aeb 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -122,6 +122,41 @@ static void patch_alternative(struct alt_instr *alt,
 	}
 }
 
+/* This is used for flushing kernel memory range after
+ * __apply_alternatives has patched kernel code
+ */
+static void clean_dcache_range_nopatch(void *start, void *end)
+{
+	u64 cur, d_size, i_size, ctr_el0;
+
+	/* use sanitised value of ctr_el0 rather than raw value from CPU */
+	ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+	/* size in bytes */
+	d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
+			CTR_DMINLINE_SHIFT);
+	i_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
+			CTR_IMINLINE_SHIT);
+
+	cur = (u64)start & ~(d_size - 1);
+	/* Ensure compiler doesn't reorder this against patching code */
+	barrier();
+	do {
+		/* Use civac instead of cvau. This is required
+		 * due to ARM errata 826319, 827319, 824069,
+		 * 819472 on A53
+		 */
+		asm volatile("dc civac, %0" : : "r" (cur));
+	} while (cur += d_size, cur < (u64)end);
+	dsb(ish);
+
+	cur = (u64)start & ~(i_size - 1);
+	do {
+		asm volatile("ic ivau, %0" : : "r" (cur));
+	} while (cur += i_size, cur < (u64)end);
+	dsb(ish);
+	isb();
+}
+
 static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 {
 	struct alt_instr *alt;
@@ -155,7 +190,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
-		flush_icache_range((uintptr_t)origptr,
+		clean_dcache_range_nopatch((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));
 	}
 }
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-30  9:00 ` Will Deacon
@ 2018-05-31 17:45   ` Rohit Khanna
  2018-06-04  9:16     ` Will Deacon
  0 siblings, 1 reply; 20+ messages in thread
From: Rohit Khanna @ 2018-05-31 17:45 UTC (permalink / raw)
  To: linux-arm-kernel

Will, thanks for the comments.
I will push a new patch set.
We want to avoid using __flush_icache_all as much as possible and hence trying to flush cache by range.

Thanks
Rohit
________________________________________
From: Will Deacon <will.deacon@arm.com>
Sent: Wednesday, May 30, 2018 2:00 AM
To: Rohit Khanna
Cc: catalin.marinas at arm.com; robin.murphy at arm.com; mark.rutland at arm.com; Suzuki.Poulose at arm.com; Bo Yan; Alexander Van Brunt; linux-arm-kernel at lists.infradead.org
Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

Hi Rohit,

Please keep me on cc for future versions of this patch. Comments inline.

On Tue, May 29, 2018 at 11:11:28AM -0700, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
>
> This patch :
> 1. Adds a new function flush_cache_kernel_range for flushing kernel
> memory range. This function uses non hot-patched code and can be
> safely used to flush cache during code patching.
>
> 2. Modifies __apply_alternatives so that it uses
> flush_cache_kernel_range to flush the cache range after patching code.
>
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>  arch/arm64/kernel/alternative.c | 31 +++++++++++++++++++++++++++++--
>  1 file changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..e93cfd26a314 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -122,6 +122,33 @@ static void patch_alternative(struct alt_instr *alt,
>       }
>  }
>
> +/* This is used for flushing kernel memory range after
> + * __apply_alternatives has patched kernel code
> + */
> +static void flush_cache_kernel_range(void *start, void *end)
> +{

How about something like clean_dcache_range_nopatch instead?

> +     u64 d_start, i_start, d_size, i_size;
> +
> +     /* use sanitized value of ctr_el0 rather than raw value from CPU */
> +     d_size = 4 << ((arm64_ftr_reg_ctrel0.sys_val >> 0x10) & 0xF); /* bytes */
> +     i_size = 4 << (arm64_ftr_reg_ctrel0.sys_val & 0xF); /* bytes */

You should be able to use read_sanitised_ftr_reg() and
cpuid_feature_extract_unsigned_field() here.

> +     d_start = (u64)start & ~(d_size - 1);
> +     while (d_start <= (u64)end) {

Please add a comment about the A53 erratum this is handling by using
clean+inv.

> +             asm volatile("dc civac, %0" : : "r" (d_start));
> +             d_start += d_size;
> +     }
> +     dsb(ish);
> +
> +     i_start = (u64)start & ~(i_size - 1);
> +     while (i_start <= (u64)end) {
> +             asm volatile("ic ivau, %0" : : "r" (i_start));
> +             i_start += i_size;
> +     }
> +     dsb(ish);
> +     isb();

As I mentioned before, I think it would be simpler just to avoid doing the
I-cache invalidation by range and instead call __flush_icache_all once we've
exiting the loop in __apply_alternatives.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-29 18:11 Rohit Khanna
@ 2018-05-30  9:00 ` Will Deacon
  2018-05-31 17:45   ` Rohit Khanna
  0 siblings, 1 reply; 20+ messages in thread
From: Will Deacon @ 2018-05-30  9:00 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rohit,

Please keep me on cc for future versions of this patch. Comments inline.

On Tue, May 29, 2018 at 11:11:28AM -0700, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
> 
> This patch :
> 1. Adds a new function flush_cache_kernel_range for flushing kernel
> memory range. This function uses non hot-patched code and can be
> safely used to flush cache during code patching.
> 
> 2. Modifies __apply_alternatives so that it uses
> flush_cache_kernel_range to flush the cache range after patching code.
> 
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>  arch/arm64/kernel/alternative.c | 31 +++++++++++++++++++++++++++++--
>  1 file changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..e93cfd26a314 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -122,6 +122,33 @@ static void patch_alternative(struct alt_instr *alt,
>  	}
>  }
>  
> +/* This is used for flushing kernel memory range after
> + * __apply_alternatives has patched kernel code
> + */
> +static void flush_cache_kernel_range(void *start, void *end)
> +{

How about something like clean_dcache_range_nopatch instead?

> +	u64 d_start, i_start, d_size, i_size;
> +
> +	/* use sanitized value of ctr_el0 rather than raw value from CPU */
> +	d_size = 4 << ((arm64_ftr_reg_ctrel0.sys_val >> 0x10) & 0xF); /* bytes */
> +	i_size = 4 << (arm64_ftr_reg_ctrel0.sys_val & 0xF); /* bytes */

You should be able to use read_sanitised_ftr_reg() and
cpuid_feature_extract_unsigned_field() here.

> +	d_start = (u64)start & ~(d_size - 1);
> +	while (d_start <= (u64)end) {

Please add a comment about the A53 erratum this is handling by using
clean+inv.

> +		asm volatile("dc civac, %0" : : "r" (d_start));
> +		d_start += d_size;
> +	}
> +	dsb(ish);
> +
> +	i_start = (u64)start & ~(i_size - 1);
> +	while (i_start <= (u64)end) {
> +		asm volatile("ic ivau, %0" : : "r" (i_start));
> +		i_start += i_size;
> +	}
> +	dsb(ish);
> +	isb();

As I mentioned before, I think it would be simpler just to avoid doing the
I-cache invalidation by range and instead call __flush_icache_all once we've
exiting the loop in __apply_alternatives.

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
@ 2018-05-29 18:11 Rohit Khanna
  2018-05-30  9:00 ` Will Deacon
  0 siblings, 1 reply; 20+ messages in thread
From: Rohit Khanna @ 2018-05-29 18:11 UTC (permalink / raw)
  To: linux-arm-kernel

In the current implementation,  __apply_alternatives patches
flush_icache_range and then executes it without invalidating the icache.
Thus, icache can contain some of the old instructions for
flush_icache_range. This can cause unpredictable behavior as during
execution we can get a mix of old and new instructions for
flush_icache_range.

This patch :
1. Adds a new function flush_cache_kernel_range for flushing kernel
memory range. This function uses non hot-patched code and can be
safely used to flush cache during code patching.

2. Modifies __apply_alternatives so that it uses
flush_cache_kernel_range to flush the cache range after patching code.

Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
---
 arch/arm64/kernel/alternative.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 5c4bce4ac381..e93cfd26a314 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -122,6 +122,33 @@ static void patch_alternative(struct alt_instr *alt,
 	}
 }
 
+/* This is used for flushing kernel memory range after
+ * __apply_alternatives has patched kernel code
+ */
+static void flush_cache_kernel_range(void *start, void *end)
+{
+	u64 d_start, i_start, d_size, i_size;
+
+	/* use sanitized value of ctr_el0 rather than raw value from CPU */
+	d_size = 4 << ((arm64_ftr_reg_ctrel0.sys_val >> 0x10) & 0xF); /* bytes */
+	i_size = 4 << (arm64_ftr_reg_ctrel0.sys_val & 0xF); /* bytes */
+
+	d_start = (u64)start & ~(d_size - 1);
+	while (d_start <= (u64)end) {
+		asm volatile("dc civac, %0" : : "r" (d_start));
+		d_start += d_size;
+	}
+	dsb(ish);
+
+	i_start = (u64)start & ~(i_size - 1);
+	while (i_start <= (u64)end) {
+		asm volatile("ic ivau, %0" : : "r" (i_start));
+		i_start += i_size;
+	}
+	dsb(ish);
+	isb();
+}
+
 static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 {
 	struct alt_instr *alt;
@@ -155,8 +182,8 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
-		flush_icache_range((uintptr_t)origptr,
-				   (uintptr_t)(origptr + nr_inst));
+		flush_cache_kernel_range((void *)origptr,
+				(void *)(origptr + nr_inst));
 	}
 }
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-22 18:07 Rohit Khanna
@ 2018-05-23  9:06 ` Will Deacon
  0 siblings, 0 replies; 20+ messages in thread
From: Will Deacon @ 2018-05-23  9:06 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rohit,

On Tue, May 22, 2018 at 11:07:31AM -0700, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
> 
> This patch :
> 
> 1. Adds a new function flush_cache_kernel_range for flushing kernel
> memory range. This function is not patched during boot and can be safely
> used to flush cache during code patching.
> 
> 2. Modifies __apply_alternatives so that it uses
> flush_cache_kernel_range to flush the cache range after patching code.
> 
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>  arch/arm64/include/asm/cacheflush.h |  1 +
>  arch/arm64/kernel/alternative.c     |  2 +-
>  arch/arm64/mm/cache.S               | 42 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 44 insertions(+), 1 deletion(-)

Thanks for the patch; this is a horrible bug :)

Comments below.

> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 30334d81b021..1366f00297c3 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -81,6 +81,48 @@ ENDPROC(flush_icache_range)
>  ENDPROC(__flush_cache_user_range)
>  
>  /*
> + *	flush_cache_kernel_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified kernel
> + *	region.
> + *	This is typically used when code has been written to a kernel memory
> + *	region and will be executed.
> + *
> + *	NOTE - This macro cannot have "alternatives" applied to it as its
> + *	used to update alternatives
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +ENTRY(flush_cache_kernel_range)
> +	raw_dcache_line_size x2, x3
> +	sub	x3, x2, #1
> +	bic	x4, x0, x3
> +1:
> +	dc	civac, x4	/* Use civac instead of cvau. This is required
> +				 * due to ARM errata 826319, 827319, 824069,
> +				 * 819472 on A53
> +				 */
> +	add	x4, x4, x2
> +	cmp	x4, x1
> +	b.lo	1b
> +	dsb	ish
> +
> +	raw_icache_line_size x2, x3

Won't this be broken on systems that misreport the line size in CTR?

> +	sub	x3, x2, #1
> +	bic	x4, x0, x3
> +1:
> +	ic	ivau, x4			// invalidate I line PoU
> +	add	x4, x4, x2
> +	cmp	x4, x1
> +	b.lo	1b
> +	dsb	ish
> +	isb
> +	mov	x0, #0
> +	ret
> +ENDPROC(flush_cache_kernel_range)

Generally, I don't think we want to expose this function as it's not going
to be the right thing to call outside of the alternatives patching. Instead,
can we:

  1. Code this up in alternative.c, mostly in C code and always using the
     sanitised feature register value for the CTR

  2. Just call __flush_icache_all once after patching is complete (and
     add a comment to __flush_icache_all saying that it's used by the
     patching code)

That way, we're keeping the implementation private and simple so we can
hopefully avoid running into this problem again.

Cheers,

Will

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-22 15:09 ` Suzuki K Poulose
@ 2018-05-22 18:08   ` Rohit Khanna
  0 siblings, 0 replies; 20+ messages in thread
From: Rohit Khanna @ 2018-05-22 18:08 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Suzuki, I have modified the patch and will send it for review.

Thanks
Rohit
________________________________________
From: Suzuki K Poulose <Suzuki.Poulose@arm.com>
Sent: Tuesday, May 22, 2018 8:09 AM
To: Rohit Khanna; catalin.marinas at arm.com; robin.murphy at arm.com; mark.rutland at arm.com
Cc: Bo Yan; Alexander Van Brunt; linux-arm-kernel at lists.infradead.org
Subject: Re: [PATCH] arm64: alternative:flush cache with unpatched code

Rohit,

On 22/05/18 02:27, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
>
> This patch :
>
> 1. Adds a new function flush_cache_kernel_range for flushing kernel
> memory range. This function is not patched during boot and can be safely
> used to flush cache during code patching.
>
> 2. Modifies __apply_alternatives so that it uses
> flush_cache_kernel_range to flush the cache range after patching code.
>
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>   arch/arm64/include/asm/cacheflush.h |  1 +
>   arch/arm64/kernel/alternative.c     |  2 +-
>   arch/arm64/mm/cache.S               | 42 +++++++++++++++++++++++++++++++++++++
>   3 files changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index 0094c6653b06..54692fabdf74 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -73,6 +73,7 @@
>    */
>   extern void flush_icache_range(unsigned long start, unsigned long end);
>   extern int  invalidate_icache_range(unsigned long start, unsigned long end);
> +extern void flush_cache_kernel_range(unsigned long start, unsigned long end);
>   extern void __flush_dcache_area(void *addr, size_t len);
>   extern void __inval_dcache_area(void *addr, size_t len);
>   extern void __clean_dcache_area_poc(void *addr, size_t len);
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..a5408a3c297a 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -155,7 +155,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>
>               alt_cb(alt, origptr, updptr, nr_inst);
>
> -             flush_icache_range((uintptr_t)origptr,
> +             flush_cache_kernel_range((uintptr_t)origptr,
>                                  (uintptr_t)(origptr + nr_inst));
>       }
>   }
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 30334d81b021..4dd09352a044 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -81,6 +81,48 @@ ENDPROC(flush_icache_range)
>   ENDPROC(__flush_cache_user_range)
>
>   /*
> + *   flush_cache_kernel_range(start,end)
> + *
> + *   Ensure that the I and D caches are coherent within specified kernel
> + *   region.
> + *   This is typically used when code has been written to a kernel memory
> + *   region and will be executed.
> + *
> + *   NOTE - This macro cannot have "alternatives" applied to it as its
> + *   used to update alternatives
> + *
> + *   - start   - virtual start address of region
> + *   - end     - virtual end address of region
> + */
> +ENTRY(flush_cache_kernel_range)
> +     dcache_line_size x2, x3
...

> +     icache_line_size x2, x3

You must use raw_{d,i}cache_line_size helpers above to avoid
using hot-patched code. The above helpers are patched if you
have mismatched cache line sizes.

Suzuki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
@ 2018-05-22 18:07 Rohit Khanna
  2018-05-23  9:06 ` Will Deacon
  0 siblings, 1 reply; 20+ messages in thread
From: Rohit Khanna @ 2018-05-22 18:07 UTC (permalink / raw)
  To: linux-arm-kernel

In the current implementation,  __apply_alternatives patches
flush_icache_range and then executes it without invalidating the icache.
Thus, icache can contain some of the old instructions for
flush_icache_range. This can cause unpredictable behavior as during
execution we can get a mix of old and new instructions for
flush_icache_range.

This patch :

1. Adds a new function flush_cache_kernel_range for flushing kernel
memory range. This function is not patched during boot and can be safely
used to flush cache during code patching.

2. Modifies __apply_alternatives so that it uses
flush_cache_kernel_range to flush the cache range after patching code.

Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/kernel/alternative.c     |  2 +-
 arch/arm64/mm/cache.S               | 42 +++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 0094c6653b06..54692fabdf74 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -73,6 +73,7 @@
  */
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern int  invalidate_icache_range(unsigned long start, unsigned long end);
+extern void flush_cache_kernel_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 5c4bce4ac381..a5408a3c297a 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -155,7 +155,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
-		flush_icache_range((uintptr_t)origptr,
+		flush_cache_kernel_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));
 	}
 }
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 30334d81b021..1366f00297c3 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -81,6 +81,48 @@ ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
 /*
+ *	flush_cache_kernel_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified kernel
+ *	region.
+ *	This is typically used when code has been written to a kernel memory
+ *	region and will be executed.
+ *
+ *	NOTE - This macro cannot have "alternatives" applied to it as its
+ *	used to update alternatives
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(flush_cache_kernel_range)
+	raw_dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+1:
+	dc	civac, x4	/* Use civac instead of cvau. This is required
+				 * due to ARM errata 826319, 827319, 824069,
+				 * 819472 on A53
+				 */
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	1b
+	dsb	ish
+
+	raw_icache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+1:
+	ic	ivau, x4			// invalidate I line PoU
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	1b
+	dsb	ish
+	isb
+	mov	x0, #0
+	ret
+ENDPROC(flush_cache_kernel_range)
+
+/*
  *	invalidate_icache_range(start,end)
  *
  *	Ensure that the I cache is invalid within specified region.
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
  2018-05-22  1:27 Rohit Khanna
@ 2018-05-22 15:09 ` Suzuki K Poulose
  2018-05-22 18:08   ` Rohit Khanna
  0 siblings, 1 reply; 20+ messages in thread
From: Suzuki K Poulose @ 2018-05-22 15:09 UTC (permalink / raw)
  To: linux-arm-kernel



Rohit,

On 22/05/18 02:27, Rohit Khanna wrote:
> In the current implementation,  __apply_alternatives patches
> flush_icache_range and then executes it without invalidating the icache.
> Thus, icache can contain some of the old instructions for
> flush_icache_range. This can cause unpredictable behavior as during
> execution we can get a mix of old and new instructions for
> flush_icache_range.
> 
> This patch :
> 
> 1. Adds a new function flush_cache_kernel_range for flushing kernel
> memory range. This function is not patched during boot and can be safely
> used to flush cache during code patching.
> 
> 2. Modifies __apply_alternatives so that it uses
> flush_cache_kernel_range to flush the cache range after patching code.
> 
> Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
> ---
>   arch/arm64/include/asm/cacheflush.h |  1 +
>   arch/arm64/kernel/alternative.c     |  2 +-
>   arch/arm64/mm/cache.S               | 42 +++++++++++++++++++++++++++++++++++++
>   3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index 0094c6653b06..54692fabdf74 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -73,6 +73,7 @@
>    */
>   extern void flush_icache_range(unsigned long start, unsigned long end);
>   extern int  invalidate_icache_range(unsigned long start, unsigned long end);
> +extern void flush_cache_kernel_range(unsigned long start, unsigned long end);
>   extern void __flush_dcache_area(void *addr, size_t len);
>   extern void __inval_dcache_area(void *addr, size_t len);
>   extern void __clean_dcache_area_poc(void *addr, size_t len);
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 5c4bce4ac381..a5408a3c297a 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -155,7 +155,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>   
>   		alt_cb(alt, origptr, updptr, nr_inst);
>   
> -		flush_icache_range((uintptr_t)origptr,
> +		flush_cache_kernel_range((uintptr_t)origptr,
>   				   (uintptr_t)(origptr + nr_inst));
>   	}
>   }
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index 30334d81b021..4dd09352a044 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -81,6 +81,48 @@ ENDPROC(flush_icache_range)
>   ENDPROC(__flush_cache_user_range)
>   
>   /*
> + *	flush_cache_kernel_range(start,end)
> + *
> + *	Ensure that the I and D caches are coherent within specified kernel
> + *	region.
> + *	This is typically used when code has been written to a kernel memory
> + *	region and will be executed.
> + *
> + *	NOTE - This macro cannot have "alternatives" applied to it as its
> + *	used to update alternatives
> + *
> + *	- start   - virtual start address of region
> + *	- end     - virtual end address of region
> + */
> +ENTRY(flush_cache_kernel_range)
> +	dcache_line_size x2, x3
...

> +	icache_line_size x2, x3

You must use raw_{d,i}cache_line_size helpers above to avoid
using hot-patched code. The above helpers are patched if you
have mismatched cache line sizes.

Suzuki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] arm64: alternative:flush cache with unpatched code
@ 2018-05-22  1:27 Rohit Khanna
  2018-05-22 15:09 ` Suzuki K Poulose
  0 siblings, 1 reply; 20+ messages in thread
From: Rohit Khanna @ 2018-05-22  1:27 UTC (permalink / raw)
  To: linux-arm-kernel

In the current implementation,  __apply_alternatives patches
flush_icache_range and then executes it without invalidating the icache.
Thus, icache can contain some of the old instructions for
flush_icache_range. This can cause unpredictable behavior as during
execution we can get a mix of old and new instructions for
flush_icache_range.

This patch :

1. Adds a new function flush_cache_kernel_range for flushing kernel
memory range. This function is not patched during boot and can be safely
used to flush cache during code patching.

2. Modifies __apply_alternatives so that it uses
flush_cache_kernel_range to flush the cache range after patching code.

Signed-off-by: Rohit Khanna <rokhanna@nvidia.com>
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/kernel/alternative.c     |  2 +-
 arch/arm64/mm/cache.S               | 42 +++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 0094c6653b06..54692fabdf74 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -73,6 +73,7 @@
  */
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern int  invalidate_icache_range(unsigned long start, unsigned long end);
+extern void flush_cache_kernel_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 5c4bce4ac381..a5408a3c297a 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -155,7 +155,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
-		flush_icache_range((uintptr_t)origptr,
+		flush_cache_kernel_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));
 	}
 }
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 30334d81b021..4dd09352a044 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -81,6 +81,48 @@ ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
 /*
+ *	flush_cache_kernel_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified kernel
+ *	region.
+ *	This is typically used when code has been written to a kernel memory
+ *	region and will be executed.
+ *
+ *	NOTE - This macro cannot have "alternatives" applied to it as its
+ *	used to update alternatives
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(flush_cache_kernel_range)
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+1:
+	dc	civac, x4	/* Use civac instead of cvau. This is required
+				 * due to ARM errata 826319, 827319, 824069,
+				 * 819472 on A53
+				 */
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	1b
+	dsb	ish
+
+	icache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+1:
+	ic	ivau, x4			// invalidate I line PoU
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	1b
+	dsb	ish
+	isb
+	mov	x0, #0
+	ret
+ENDPROC(flush_cache_kernel_range)
+
+/*
  *	invalidate_icache_range(start,end)
  *
  *	Ensure that the I cache is invalid within specified region.
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2018-06-06 16:16 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-31 20:37 [PATCH] arm64: alternative:flush cache with unpatched code Rohit Khanna
2018-06-01  9:03 ` Mark Rutland
2018-06-01 19:52   ` Rohit Khanna
2018-06-01 21:43     ` Rohit Khanna
2018-06-04  9:01     ` Mark Rutland
  -- strict thread matches above, loose matches on Subject: below --
2018-06-02  0:39 Rohit Khanna
2018-05-29 18:11 Rohit Khanna
2018-05-30  9:00 ` Will Deacon
2018-05-31 17:45   ` Rohit Khanna
2018-06-04  9:16     ` Will Deacon
2018-06-04 19:34       ` Alexander Van Brunt
2018-06-05 16:55         ` Will Deacon
2018-06-05 17:07           ` Alexander Van Brunt
2018-06-06 15:44             ` Will Deacon
2018-06-06 16:16               ` Alexander Van Brunt
2018-05-22 18:07 Rohit Khanna
2018-05-23  9:06 ` Will Deacon
2018-05-22  1:27 Rohit Khanna
2018-05-22 15:09 ` Suzuki K Poulose
2018-05-22 18:08   ` Rohit Khanna

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.