linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: Christophe Leroy <christophe.leroy@c-s.fr>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	aneesh.kumar@linux.vnet.ibm.com
Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH v4 16/20] powerpc/mm: Extend pte_fragment functionality to nohash/32
Date: Wed, 19 Sep 2018 08:33:39 +0530	[thread overview]
Message-ID: <89038b3e-f1cd-a4a0-481b-46cef2b5e388@linux.ibm.com> (raw)
In-Reply-To: <a2f88be3bdaddc54a5836d6049c616b1739aef42.1537288312.git.christophe.leroy@c-s.fr>

On 9/18/18 10:27 PM, Christophe Leroy wrote:
> In order to allow the 8xx to handle pte_fragments, this patch
> extends the use of pte_fragments to nohash/32 platforms.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>   arch/powerpc/include/asm/mmu-40x.h           |  1 +
>   arch/powerpc/include/asm/mmu-44x.h           |  1 +
>   arch/powerpc/include/asm/mmu-8xx.h           |  1 +
>   arch/powerpc/include/asm/mmu-book3e.h        |  1 +
>   arch/powerpc/include/asm/mmu_context.h       |  2 +-
>   arch/powerpc/include/asm/nohash/32/pgalloc.h | 43 +++++++++++-----------------
>   arch/powerpc/include/asm/nohash/32/pgtable.h |  7 +++--
>   arch/powerpc/include/asm/page.h              |  6 +---
>   arch/powerpc/include/asm/pgtable.h           |  8 ++++++
>   arch/powerpc/mm/Makefile                     |  3 ++
>   arch/powerpc/mm/mmu_context_nohash.c         |  1 +
>   arch/powerpc/mm/pgtable-frag.c               |  6 ++++
>   arch/powerpc/mm/pgtable_32.c                 |  8 ++++--
>   13 files changed, 51 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mmu-40x.h b/arch/powerpc/include/asm/mmu-40x.h
> index 74f4edb5916e..7c77ceed71d6 100644
> --- a/arch/powerpc/include/asm/mmu-40x.h
> +++ b/arch/powerpc/include/asm/mmu-40x.h
> @@ -58,6 +58,7 @@ typedef struct {
>   	unsigned int	id;
>   	unsigned int	active;
>   	unsigned long	vdso_base;
> +	void *pte_frag;
>   } mm_context_t;
> 
>   #endif /* !__ASSEMBLY__ */
> diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
> index 295b3dbb2698..3d72e889ae7b 100644
> --- a/arch/powerpc/include/asm/mmu-44x.h
> +++ b/arch/powerpc/include/asm/mmu-44x.h
> @@ -109,6 +109,7 @@ typedef struct {
>   	unsigned int	id;
>   	unsigned int	active;
>   	unsigned long	vdso_base;
> +	void *pte_frag;
>   } mm_context_t;
> 
>   #endif /* !__ASSEMBLY__ */
> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
> index fa05aa566ece..750cef6f65e3 100644
> --- a/arch/powerpc/include/asm/mmu-8xx.h
> +++ b/arch/powerpc/include/asm/mmu-8xx.h
> @@ -179,6 +179,7 @@ typedef struct {
>   	unsigned int id;
>   	unsigned int active;
>   	unsigned long vdso_base;
> +	void *pte_frag;
>   #ifdef CONFIG_PPC_MM_SLICES
>   	u16 user_psize;		/* page size index */
>   	unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
> diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
> index e20072972e35..8e8aad5172ab 100644
> --- a/arch/powerpc/include/asm/mmu-book3e.h
> +++ b/arch/powerpc/include/asm/mmu-book3e.h
> @@ -230,6 +230,7 @@ typedef struct {
>   	unsigned int	id;
>   	unsigned int	active;
>   	unsigned long	vdso_base;
> +	void *pte_frag;
>   } mm_context_t;
> 
>   /* Page size definitions, common between 32 and 64-bit
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index b2f89b621b15..7f2c37a3f99d 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -222,7 +222,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
>   	return 0;
>   }
> 
> -#ifndef CONFIG_PPC_BOOK3S_64
> +#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_BOOK3S_32)
>   static inline void arch_exit_mmap(struct mm_struct *mm)
>   {
>   }
> diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> index f3fec9052f31..e69423ad8e2e 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
> @@ -27,6 +27,9 @@ extern void __bad_pte(pmd_t *pmd);
>   extern struct kmem_cache *pgtable_cache[];
>   #define PGT_CACHE(shift) pgtable_cache[shift]
> 
> +pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel);
> +void pte_fragment_free(unsigned long *table, int kernel);
> +
>   static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>   {
>   	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
> @@ -58,11 +61,10 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
>   static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   				pgtable_t pte_page)
>   {
> -	*pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER |
> -		      _PMD_PRESENT);
> +	*pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
>   }
> 
> -#define pmd_pgtable(pmd) pmd_page(pmd)
> +#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
>   #else
> 
>   static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
> @@ -74,49 +76,38 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
>   static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
>   				pgtable_t pte_page)
>   {
> -	*pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
> +	*pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
>   }
> 
> -#define pmd_pgtable(pmd) pmd_page(pmd)
> +#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
>   #endif
> 
> -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
> +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> +					  unsigned long address)
>   {
> -	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
> +	return (pte_t *)pte_fragment_alloc(mm, address, 1);
>   }
> 
> -static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
> +static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> +				      unsigned long address)
>   {
> -	struct page *ptepage;
> -
> -	gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
> -
> -	ptepage = alloc_pages(flags, 0);
> -	if (!ptepage)
> -		return NULL;
> -	if (!pgtable_page_ctor(ptepage)) {
> -		__free_page(ptepage);
> -		return NULL;
> -	}
> -	return ptepage;
> +	return (pgtable_t)pte_fragment_alloc(mm, address, 0);
>   }
> 
>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>   {
> -	free_page((unsigned long)pte);
> +	pte_fragment_free((unsigned long *)pte, 1);
>   }
> 
>   static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
>   {
> -	pgtable_page_dtor(ptepage);
> -	__free_page(ptepage);
> +	pte_fragment_free((unsigned long *)ptepage, 0);
>   }
> 
>   static inline void pgtable_free(void *table, unsigned index_size)
>   {
>   	if (!index_size) {
> -		pgtable_page_dtor(virt_to_page(table));
> -		free_page((unsigned long)table);
> +		pte_fragment_free((unsigned long *)table, 0);
>   	} else {
>   		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
>   		kmem_cache_free(PGT_CACHE(index_size), table);
> @@ -155,6 +146,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
>   				  unsigned long address)
>   {
>   	tlb_flush_pgtable(tlb, address);
> -	pgtable_free_tlb(tlb, page_address(table), 0);
> +	pgtable_free_tlb(tlb, table, 0);
>   }
>   #endif /* _ASM_POWERPC_PGALLOC_32_H */
> diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
> index d2908a8038e8..73e2b1fbdb36 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@ -336,12 +336,12 @@ static inline int pte_young(pte_t pte)
>    */
>   #ifndef CONFIG_BOOKE
>   #define pmd_page_vaddr(pmd)	\
> -	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
> +	((unsigned long)__va(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
>   #define pmd_page(pmd)		\
>   	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
>   #else
>   #define pmd_page_vaddr(pmd)	\
> -	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
> +	((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
>   #define pmd_page(pmd)		\
>   	pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
>   #endif
> @@ -360,7 +360,8 @@ static inline int pte_young(pte_t pte)
>   	(pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \
>   				  pte_index(addr))
>   #define pte_offset_map(dir, addr)		\
> -	((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
> +	((pte_t *)(kmap_atomic(pmd_page(*(dir))) + \
> +		   (pmd_page_vaddr(*(dir)) & ~PAGE_MASK)) + pte_index(addr))
>   #define pte_unmap(pte)		kunmap_atomic(pte)
> 
>   /*
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index f6a1265face2..27d1c16601ee 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -335,7 +335,7 @@ void arch_free_page(struct page *page, int order);
>   #endif
> 
>   struct vm_area_struct;
> -#ifdef CONFIG_PPC_BOOK3S_64
> +#if !defined(CONFIG_PPC_BOOK3E_64) && !defined(CONFIG_PPC_BOOK3S_32)
>   /*
>    * For BOOK3s 64 with 4k and 64K linux page size
>    * we want to use pointers, because the page table
> @@ -343,12 +343,8 @@ struct vm_area_struct;
>    */
>   typedef pte_t *pgtable_t;
>   #else
> -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
> -typedef pte_t *pgtable_t;
> -#else
>   typedef struct page *pgtable_t;
>   #endif
> -#endif
> 


Now that is getting complicated. Is there a way to move that to platform 
header instead of that complicated #if?

>   #include <asm-generic/memory_model.h>
>   #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index 8b38f7730211..1865a3e4ab8c 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -94,12 +94,20 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr);
>   void pgtable_cache_add(unsigned int shift);
>   void pgtable_cache_init(void);
> 
> +pte_t *early_alloc_pte(void);
> +
>   #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
>   void mark_initmem_nx(void);
>   #else
>   static inline void mark_initmem_nx(void) { }
>   #endif
> 
> +#ifndef PTE_FRAG_NR
> +#define PTE_FRAG_NR		1
> +#define PTE_FRAG_SIZE_SHIFT	PAGE_SHIFT
> +#define PTE_FRAG_SIZE		PAGE_SIZE
> +#endif
> +

IMHO we should avoid that. The #ifndef challenge is that we should 
always make sure the header inclusion is correct so that platform 
headers get included before. Why not move it to the platform that want 
to use pte fragmentation?


>   #endif /* __ASSEMBLY__ */
> 
>   #endif /* _ASM_POWERPC_PGTABLE_H */
> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
> index bd43b3ee52cb..e1deb15fe85e 100644
> --- a/arch/powerpc/mm/Makefile
> +++ b/arch/powerpc/mm/Makefile
> @@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
>   obj-$(CONFIG_PPC_BOOK3S_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o \
>   				   $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o \
>   				   pgtable-frag.o
> +ifndef CONFIG_PPC_BOOK3S_32
> +obj-$(CONFIG_PPC32)		+= pgtable-frag.o
> +endif
>   obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o tlb-radix.o
>   obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
>   obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(BITS).o
> diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
> index 4d80239ef83c..98f0ef463dc8 100644
> --- a/arch/powerpc/mm/mmu_context_nohash.c
> +++ b/arch/powerpc/mm/mmu_context_nohash.c
> @@ -385,6 +385,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
>   #endif
>   	mm->context.id = MMU_NO_CONTEXT;
>   	mm->context.active = 0;
> +	mm->context.pte_frag = NULL;
>   	return 0;
>   }
> 
> diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
> index ab4910e92aaf..d554a1cbc56d 100644
> --- a/arch/powerpc/mm/pgtable-frag.c
> +++ b/arch/powerpc/mm/pgtable-frag.c
> @@ -30,6 +30,7 @@ static void pte_frag_destroy(void *pte_frag)
>   	}
>   }
> 
> +#ifdef CONFIG_PPC_BOOK3S_64
>   static void pmd_frag_destroy(void *pmd_frag)
>   {
>   	int count;
> @@ -44,6 +45,7 @@ static void pmd_frag_destroy(void *pmd_frag)
>   		__free_page(page);
>   	}
>   }
> +#endif
> 
>   static void destroy_pagetable_cache(struct mm_struct *mm)
>   {
> @@ -53,15 +55,18 @@ static void destroy_pagetable_cache(struct mm_struct *mm)
>   	if (frag)
>   		pte_frag_destroy(frag);
> 
> +#ifdef CONFIG_PPC_BOOK3S_64
>   	frag = mm->context.pmd_frag;
>   	if (frag)
>   		pmd_frag_destroy(frag);
> +#endif
>   }
> 
>   void arch_exit_mmap(struct mm_struct *mm)
>   {
>   	destroy_pagetable_cache(mm);
> 
> +#ifdef CONFIG_PPC_BOOK3S_64
>   	if (radix_enabled()) {
>   		/*
>   		 * Radix doesn't have a valid bit in the process table
> @@ -79,6 +84,7 @@ void arch_exit_mmap(struct mm_struct *mm)
>   		 */
>   		process_tb[mm->context.id].prtb0 = 0;
>   	}
> +#endif
>   }
> 

is there a way to avoid all that #ifdef? May be redo the frag code such 
that we have few helpers that is platform independent?

>   static pte_t *get_pte_from_cache(struct mm_struct *mm)
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index 7900b613e6e5..81e6b18d1955 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -195,12 +195,16 @@ EXPORT_SYMBOL(iounmap);
>   static __init pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
>   {
>   	if (!pmd_present(*pmdp)) {
> -		pte_t *ptep = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
> +		pte_t *ptep = __va(memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE));
> 
>   		if (!ptep)
>   			return NULL;
> 
> -		clear_page(ptep);
> +		if (PTE_FRAG_SIZE == PAGE_SIZE)
> +			clear_page(ptep);
> +		else
> +			memset(ptep, 0, PTE_FRAG_SIZE);
> +
>   		pmd_populate_kernel(&init_mm, pmdp, ptep);
>   	}
>   	return pte_offset_kernel(pmdp, va);
> 


  reply	other threads:[~2018-09-19  3:35 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-18 16:57 [PATCH v4 00/20] Implement use of HW assistance on TLB table walk on 8xx Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 01/20] Revert "powerpc/8xx: Use L1 entry APG to handle _PAGE_ACCESSED for CONFIG_SWAP" Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 02/20] powerpc/code-patching: add a helper to get the address of a patch_site Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 03/20] powerpc/8xx: Use patch_site for memory setup patching Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 04/20] powerpc/8xx: Use patch_site for perf counters setup Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 05/20] powerpc/8xx: Move SW perf counters in first 32kb of memory Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 06/20] powerpc/8xx: Temporarily disable 16k pages and 512k hugepages Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 07/20] powerpc/mm: Use hardware assistance in TLB handlers on the 8xx Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 08/20] powerpc/mm: Enable 512k hugepage support with HW assistance " Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 09/20] powerpc/8xx: don't use r12/SPRN_SPRG_SCRATCH2 in TLB Miss handlers Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 10/20] powerpc/8xx: regroup TLB handler routines Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 11/20] powerpc/mm: don't use pte_alloc_one_kernel() before slab is available Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 12/20] powerpc/mm: inline pte_alloc_one() and pte_alloc_one_kernel() in PPC32 Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 13/20] powerpc/book3s32: Remove CONFIG_BOOKE dependent code Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 14/20] powerpc/mm: Move pte_fragment_alloc() to a common location Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 15/20] powerpc/mm: Avoid useless lock with single page fragments Christophe Leroy
2018-09-19  2:56   ` Aneesh Kumar K.V
2018-09-25 16:49     ` Christophe LEROY
2018-09-18 16:57 ` [PATCH v4 16/20] powerpc/mm: Extend pte_fragment functionality to nohash/32 Christophe Leroy
2018-09-19  3:03   ` Aneesh Kumar K.V [this message]
2018-09-25 16:48     ` Christophe LEROY
2018-09-18 16:57 ` [PATCH v4 17/20] powerpc/8xx: Remove PTE_ATOMIC_UPDATES Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 18/20] powerpc/mm: reintroduce 16K pages with HW assistance on 8xx Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 19/20] powerpc/nohash32: allow setting GUARDED attribute in the PMD directly Christophe Leroy
2018-09-18 16:57 ` [PATCH v4 20/20] powerpc/8xx: set " Christophe Leroy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=89038b3e-f1cd-a4a0-481b-46cef2b5e388@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=christophe.leroy@c-s.fr \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).