Re: [PATCH v2 1/2] riscv: Align shared mappings to SHMLBA

From: Palmer Dabbelt <palmerdabbelt@google.com>
To: consult-mg@gstardust.com
Cc: linux-riscv@lists.infradead.org, consult-mg@gstardust.com,
	aou@eecs.berkeley.edu, Paul Walmsley <paul.walmsley@sifive.com>
Subject: Re: [PATCH v2 1/2] riscv: Align shared mappings to SHMLBA
Date: Thu, 05 Dec 2019 15:03:12 -0800 (PST)	[thread overview]
Message-ID: <mhng-97bbfd0b-0d80-4e3a-8129-2e671b51157a@palmerdabbelt-glaptop> (raw)
In-Reply-To: <20191126224446.15145-2-consult-mg@gstardust.com>

On Tue, 26 Nov 2019 14:44:45 PST (-0800), consult-mg@gstardust.com wrote:
> Align shared mappings according to SHMLBA for VIPT cache performance.
>
> arch_get_unmapped_area() and arch_get_unmapped_area_topdown() are
> essentially copies of their default implementations in mm/mmap.c,
> modified to align the address to SHMLBA for shared mappings, i.e.
> where MAP_SHARED is specified or a file pointer is provided.
>
> Allow MAP_FIXED to request unaligned shared mappings.  Although this
> may potentially reduce performance, very little software does this, as
> it is not portable across architectures that enforce alignment.
>
> Signed-off-by: Marc Gauthier <consult-mg@gstardust.com>
> ---
>  arch/riscv/include/asm/pgtable.h |   4 ++
>  arch/riscv/kernel/sys_riscv.c    | 113 +++++++++++++++++++++++++++++++
>  2 files changed, 117 insertions(+)
>
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index d3221017194d..7d1cc47ac5f9 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -436,6 +436,10 @@ extern void *dtb_early_va;
>  extern void setup_bootmem(void);
>  extern void paging_init(void);
>
> +/* We provide arch_get_unmapped_area to handle VIPT caches efficiently. */
> +#define HAVE_ARCH_UNMAPPED_AREA
> +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
> +
>  /*
>   * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
>   * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index f3619f59d85c..3e739b30b1f7 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -3,11 +3,15 @@
>   * Copyright (C) 2012 Regents of the University of California
>   * Copyright (C) 2014 Darius Rad <darius@bluespec.com>
>   * Copyright (C) 2017 SiFive
> + * Copyright (C) 2019 Aril Inc
>   */
>
>  #include <linux/syscalls.h>
>  #include <asm/unistd.h>
>  #include <asm/cacheflush.h>
> +#include <linux/shm.h>
> +#include <linux/mman.h>
> +#include <linux/security.h>
>
>  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>  			   unsigned long prot, unsigned long flags,
> @@ -65,3 +69,112 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
>
>  	return 0;
>  }
> +
> +/*
> + * RISC-V requires implementations to function correctly in the presence
> + * of cache aliasing, regardless of page alignment.  It says nothing about
> + * performance.  To ensure healthy performance with commonly implemented
> + * VIPT caches, the following code avoids most cases of cache aliasing by
> + * aligning shared mappings such that all mappings of a given physical
> + * page of an object are at a multiple of SHMLBA bytes from each other.
> + *
> + * It does not enforce alignment.  Using MAP_FIXED to request unaligned
> + * shared mappings is not common, and may perform poorly with VIPT caches.
> + */
> +unsigned long
> +arch_get_unmapped_area(struct file *filp, unsigned long addr,
> +			unsigned long len, unsigned long pgoff,
> +			unsigned long flags)
> +{
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *vma, *prev;
> +	struct vm_unmapped_area_info info;
> +	const unsigned long pgoffset = pgoff << PAGE_SHIFT;
> +	int do_align = (filp || (flags & MAP_SHARED));
> +
> +	if (len > TASK_SIZE - mmap_min_addr)
> +		return -ENOMEM;
> +
> +	if (flags & MAP_FIXED)
> +		return addr;
> +
> +	if (addr) {
> +		if (do_align)
> +			addr = ALIGN(addr, SHMLBA) + (pgoffset & (SHMLBA - 1));
> +		else
> +			addr = PAGE_ALIGN(addr);
> +		vma = find_vma_prev(mm, addr, &prev);
> +		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
> +		    (!vma || addr + len <= vm_start_gap(vma)) &&
> +		    (!prev || addr >= vm_end_gap(prev)))
> +			return addr;
> +	}
> +
> +	info.flags = 0;
> +	info.length = len;
> +	info.low_limit = mm->mmap_base;
> +	info.high_limit = TASK_SIZE;
> +	info.align_mask = do_align ? SHMLBA - 1 : 0;
> +	info.align_offset = pgoffset;
> +	return vm_unmapped_area(&info);
> +}
> +
> +/*
> + * Similar to arch_get_unmapped_area(), but allocating top-down from below the
> + * stack's low limit (the base).
> + */
> +unsigned long
> +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
> +				unsigned long len, unsigned long pgoff,
> +				unsigned long flags)
> +{
> +	struct vm_area_struct *vma, *prev;
> +	struct mm_struct *mm = current->mm;
> +	struct vm_unmapped_area_info info;
> +	const unsigned long pgoffset = pgoff << PAGE_SHIFT;
> +	int do_align = (filp || (flags & MAP_SHARED));
> +
> +	/* requested length too big for entire address space */
> +	if (len > TASK_SIZE - mmap_min_addr)
> +		return -ENOMEM;
> +
> +	if (flags & MAP_FIXED)
> +		return addr;
> +
> +	/* requesting a specific address */
> +	if (addr) {
> +		if (do_align)
> +			addr = ALIGN(addr, SHMLBA) + (pgoffset & (SHMLBA - 1));
> +		else
> +			addr = PAGE_ALIGN(addr);
> +		vma = find_vma_prev(mm, addr, &prev);
> +		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
> +				(!vma || addr + len <= vm_start_gap(vma)) &&
> +				(!prev || addr >= vm_end_gap(prev)))
> +			return addr;
> +	}
> +
> +	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
> +	info.length = len;
> +	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
> +	info.high_limit = mm->mmap_base;
> +	info.align_mask = do_align ? SHMLBA - 1 : 0;
> +	info.align_offset = pgoffset;
> +	addr = vm_unmapped_area(&info);
> +
> +	/*
> +	 * A failed mmap() very likely causes application failure,
> +	 * so fall back to the bottom-up function here. This scenario
> +	 * can happen with large stack limits and large mmap()
> +	 * allocations.
> +	 */
> +	if (offset_in_page(addr)) {
> +		VM_BUG_ON(addr != -ENOMEM);
> +		info.flags = 0;
> +		info.low_limit = TASK_UNMAPPED_BASE;
> +		info.high_limit = TASK_SIZE;
> +		addr = vm_unmapped_area(&info);
> +	}
> +
> +	return addr;
> +}

It really feels like this should be generic code to me.  It looks like the only
major difference between this and the routines in arch/arm/mm/mmap.c is whether
or not MAP_FIXED alignment is enforced, so we could probably just make the
arch-specific code be arch_cache_is_vipt_aliasing() which on RISC-V would
always be false and on ARM would be the current cache_is_vipt_aliasing().

ARM is also using a different alignment expression, but I think they may be
equivilant.  They have

    #define COLOUR_ALIGN(addr,pgoff)                \
            ((((addr)+SHMLBA-1)&~(SHMLBA-1)) +      \
             (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))

LMK if you're OK doing this, or if you want me to take over the patch set.