Currently we issue an MFENCE before and after flushing a range. This means that if we flush a bunch of single page ranges -- like with the cpa array, we issue a whole bunch of superfluous MFENCEs. Reorgainze the code a little to avoid this. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/mm/pageattr.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -251,15 +251,7 @@ static unsigned long __cpa_addr(struct c * Flushing functions */ -/** - * clflush_cache_range - flush a cache range with clflush - * @vaddr: virtual start address - * @size: number of bytes to flush - * - * clflushopt is an unordered instruction which needs fencing with mfence or - * sfence to avoid ordering issues. - */ -void clflush_cache_range(void *vaddr, unsigned int size) +static void clflush_cache_range_opt(void *vaddr, unsigned int size) { const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); @@ -268,11 +260,22 @@ void clflush_cache_range(void *vaddr, un if (p >= vend) return; - mb(); - for (; p < vend; p += clflush_size) clflushopt(p); +} +/** + * clflush_cache_range - flush a cache range with clflush + * @vaddr: virtual start address + * @size: number of bytes to flush + * + * clflushopt is an unordered instruction which needs fencing with mfence or + * sfence to avoid ordering issues. + */ +void clflush_cache_range(void *vaddr, unsigned int size) +{ + mb(); + clflush_cache_range_opt(vaddr, size); mb(); } EXPORT_SYMBOL_GPL(clflush_cache_range); @@ -339,6 +342,7 @@ static void cpa_flush(struct cpa_data *d if (!cache) return; + mb(); for (i = 0; i < cpa->numpages; i++) { unsigned long addr = __cpa_addr(cpa, i); unsigned int level; @@ -349,8 +353,9 @@ static void cpa_flush(struct cpa_data *d * Only flush present addresses: */ if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range((void *)addr, PAGE_SIZE); + clflush_cache_range_opt((void *)addr, PAGE_SIZE); } + mb(); } static bool overlaps(unsigned long r1_start, unsigned long r1_end,