All of lore.kernel.org
 help / color / mirror / Atom feed
From: Balbir Singh <bsingharora@gmail.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: benh@kernel.crashing.org, paulus@samba.org, mpe@ellerman.id.au,
	Michael Neuling <mikey@neuling.org>,
	linuxppc-dev@lists.ozlabs.org
Subject: Re: [RFC PATCH V1 03/33] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table
Date: Wed, 13 Jan 2016 19:52:10 +1100	[thread overview]
Message-ID: <20160113195210.47618436@cotter.ozlabs.ibm.com> (raw)
In-Reply-To: <1452582968-22669-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

On Tue, 12 Jan 2016 12:45:38 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> This is needed so that we can support both hash and radix page table
> using single kernel. Radix kernel uses a 4 level table.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/Kconfig                          |  1 +
>  arch/powerpc/include/asm/book3s/64/hash-4k.h  | 33
> +--------------------------
> arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +++++++++-------
> arch/powerpc/include/asm/book3s/64/hash.h     |  8 +++++++
> arch/powerpc/include/asm/book3s/64/pgtable.h  | 25
> +++++++++++++++++++- arch/powerpc/include/asm/pgalloc-64.h         |
> 24 ++++++++++++++++--- arch/powerpc/include/asm/pgtable-types.h
> | 13 +++++++---- arch/powerpc/mm/init_64.c                     | 21
> ++++++++++++----- 8 files changed, 90 insertions(+), 55 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 378f1127ca98..618afea4c9fc 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -303,6 +303,7 @@ config ZONE_DMA32

snip
> -
>  #define PTE_INDEX_SIZE  8
> -#define PMD_INDEX_SIZE  10
> -#define PUD_INDEX_SIZE	0
> +#define PMD_INDEX_SIZE  5
> +#define PUD_INDEX_SIZE	5
>  #define PGD_INDEX_SIZE  12
>  

OK, so PMD index split from 10 to 5 and 5 to PMD/PUD? What is the plan
for huge pages, I saw you mentioned it was a TODO

>  #define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
>  #define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
> +#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
>  #define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
>  
>  /* With 4k base page size, hugepage PTEs go at the PMD level */
> @@ -20,8 +19,13 @@
>  #define PMD_SIZE	(1UL << PMD_SHIFT)
>  #define PMD_MASK	(~(PMD_SIZE-1))
>  
> +/* PUD_SHIFT determines what a third-level page table entry can map
> */ +#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
> +#define PUD_SIZE	(1UL << PUD_SHIFT)
> +#define PUD_MASK	(~(PUD_SIZE-1))
> +
>  /* PGDIR_SHIFT determines what a third-level page table entry can
> map */ -#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
> +#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
>  #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
>  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
>  
> @@ -61,6 +65,8 @@
>  #define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
>  /* Bits to mask out from a PGD/PUD to get to the PMD page */

The comment looks like it applied to PMD and not PUD.
>  #define PUD_MASKED_BITS		0x1ff

Given that PUD is now 5 bits, this should be 0x1f?

> +/* FIXME!! check this */
> +#define PGD_MASKED_BITS		0
>  

PGD_MASKED_BITS is 0? Shouldn't it be 0xfe

>  #ifndef __ASSEMBLY__
>  
> @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte,
> unsigned long index); #else
>  #define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
>  #endif
> +#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
>  #define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
>  
> -#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
> -#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
> -
>  #ifdef CONFIG_HUGETLB_PAGE
>  /*
>   * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can
> have diff --git a/arch/powerpc/include/asm/book3s/64/hash.h
> b/arch/powerpc/include/asm/book3s/64/hash.h index
> f46974d0134a..9ff1e056acef 100644 ---
> a/arch/powerpc/include/asm/book3s/64/hash.h +++
> b/arch/powerpc/include/asm/book3s/64/hash.h @@ -226,6 +226,7 @@
>  #define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
>  
>  #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) &
> (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >>
> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address)
> (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define
> pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) 
> @@ -354,8 +355,15 @@ static inline void __ptep_set_access_flags(pte_t
> *ptep, pte_t entry) :"cc");
>  }
>  
> +static inline int pgd_bad(pgd_t pgd)
> +{
> +	return (pgd_val(pgd) == 0);
> +}
> +
>  #define __HAVE_ARCH_PTE_SAME
>  #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) &
> ~_PAGE_HPTEFLAGS) == 0) +#define pgd_page_vaddr(pgd)
> (pgd_val(pgd) & ~PGD_MASKED_BITS) +
>  
>  /* Generic accessors to PTE bits */
>  static inline int pte_write(pte_t pte)
> { return !!(pte_val(pte) & _PAGE_RW);} diff --git
> a/arch/powerpc/include/asm/book3s/64/pgtable.h
> b/arch/powerpc/include/asm/book3s/64/pgtable.h index
> e7162dba987e..8f639401c7ba 100644 ---
> a/arch/powerpc/include/asm/book3s/64/pgtable.h +++
> b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -111,6 +111,26 @@
> static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp =
> __pgd(val); } 
> +static inline void pgd_clear(pgd_t *pgdp)
> +{
> +	*pgdp = __pgd(0);
> +}
> +
> +#define pgd_none(pgd)		(!pgd_val(pgd))
> +#define pgd_present(pgd)	(!pgd_none(pgd))
> +
> +static inline pte_t pgd_pte(pgd_t pgd)
> +{
> +	return __pte(pgd_val(pgd));
> +}
> +
> +static inline pgd_t pte_pgd(pte_t pte)
> +{
> +	return __pgd(pte_val(pte));
> +}
> +
> +extern struct page *pgd_page(pgd_t pgd);
> +
>  /*
>   * Find an entry in a page-table-directory.  We combine the address
> region
>   * (the high order N bits) and the pgd portion of the address.
> @@ -118,9 +138,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned
> long val) 
>  #define pgd_offset(mm, address)	 ((mm)->pgd +
> pgd_index(address)) 
> +#define pud_offset(pgdp, addr)	\
> +	(((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
>  #define pmd_offset(pudp,addr) \
>  	(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
> -
>  #define pte_offset_kernel(dir,addr) \
>  	(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
>  
> @@ -135,6 +156,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned
> long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__,
> pte_val(e)) #define pmd_ERROR(e) \
>  	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__,
> pmd_val(e)) +#define pud_ERROR(e) \
> +	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__,
> pud_val(e)) #define pgd_ERROR(e) \
>  	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__,
> pgd_val(e)) 
> diff --git a/arch/powerpc/include/asm/pgalloc-64.h
> b/arch/powerpc/include/asm/pgalloc-64.h index
> 69ef28a81733..014489a619d0 100644 ---
> a/arch/powerpc/include/asm/pgalloc-64.h +++
> b/arch/powerpc/include/asm/pgalloc-64.h @@ -171,7 +171,25 @@ extern
> void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int
> shift); extern void __tlb_remove_table(void *_table); #endif
>  
> -#define pud_populate(mm, pud, pmd)	pud_set(pud, (unsigned
> long)pmd) +#ifndef __PAGETABLE_PUD_FOLDED
> +/* book3s 64 is 4 level page table */
> +#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, PUD)
> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned
> long addr) +{
> +	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
> +				GFP_KERNEL|__GFP_REPEAT);
> +}
> +
> +static inline void pud_free(struct mm_struct *mm, pud_t *pud)
> +{
> +	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
> +}
> +#endif
> +
> +static inline void pud_populate(struct mm_struct *mm, pud_t *pud,
> pmd_t *pmd) +{
> +	pud_set(pud, (unsigned long)pmd);
> +}
>  
>  static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t
> *pmd, pte_t *pte)
> @@ -233,11 +251,11 @@ static inline void pmd_free(struct mm_struct
> *mm, pmd_t *pmd) 
>  #define __pmd_free_tlb(tlb, pmd, addr)		      \
>  	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
> -#ifndef CONFIG_PPC_64K_PAGES
> +#ifndef __PAGETABLE_PUD_FOLDED
>  #define __pud_free_tlb(tlb, pud, addr)		      \
>  	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
>  
> -#endif /* CONFIG_PPC_64K_PAGES */
> +#endif /* __PAGETABLE_PUD_FOLDED */
>  
>  #define check_pgt_cache()	do { } while (0)
>  
> diff --git a/arch/powerpc/include/asm/pgtable-types.h
> b/arch/powerpc/include/asm/pgtable-types.h index
> 71487e1ca638..43140f8b0592 100644 ---
> a/arch/powerpc/include/asm/pgtable-types.h +++
> b/arch/powerpc/include/asm/pgtable-types.h @@ -21,15 +21,18 @@ static
> inline unsigned long pmd_val(pmd_t x) return x.pmd;
>  }
>  
> -/* PUD level exusts only on 4k pages */
> -#ifndef CONFIG_PPC_64K_PAGES
> +/*
> + * 64 bit hash always use 4 level table. Everybody else use 4 level
> + * only for 4K page size.
> + */
> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
>  typedef struct { unsigned long pud; } pud_t;

>  #define __pud(x)	((pud_t) { (x) })
>  static inline unsigned long pud_val(pud_t x)
>  {
>  	return x.pud;
>  }
> -#endif /* !CONFIG_PPC_64K_PAGES */
> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
>  #endif /* CONFIG_PPC64 */
>  
>  /* PGD level */
> @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd)
>  	return pmd;
>  }
>  
> -#ifndef CONFIG_PPC_64K_PAGES
> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
>  typedef unsigned long pud_t;



>  #define __pud(x)	(x)
>  static inline unsigned long pud_val(pud_t pud)
>  {
>  	return pud;
>  }
> -#endif /* !CONFIG_PPC_64K_PAGES */
> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
>  #endif /* CONFIG_PPC64 */
>  
>  typedef unsigned long pgd_t;
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index 379a6a90644b..8ce1ec24d573 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr)
>  	memset(addr, 0, PGD_TABLE_SIZE);
>  }
>  
> +static void pud_ctor(void *addr)
> +{
> +	memset(addr, 0, PUD_TABLE_SIZE);
> +}
> +
>  static void pmd_ctor(void *addr)
>  {
>  	memset(addr, 0, PMD_TABLE_SIZE);
> @@ -138,14 +143,18 @@ void pgtable_cache_init(void)
>  {
>  	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
>  	pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
> +	/*
> +	 * In all current configs, when the PUD index exists it's the
> +	 * same size as either the pgd or pmd index except with THP
> enabled
> +	 * on book3s 64
> +	 */
> +	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
> +		pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
> +
>  	if (!PGT_CACHE(PGD_INDEX_SIZE)
> || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable
> caches");
> -	/* In all current configs, when the PUD index exists it's the
> -	 * same size as either the pgd or pmd index.  Verify that the
> -	 * initialization above has also created a PUD cache.  This
> -	 * will need re-examiniation if we add new possibilities for
> -	 * the pagetable layout. */
> -	BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
> +	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
> +		panic("Couldn't allocate pud pgtable caches");
>  }
>  
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP

  reply	other threads:[~2016-01-13  8:52 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-12  7:15 [RFC PATCH V1 00/33] Book3s abstraction in preparation for new MMU model Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 01/33] powerpc/mm: add _PAGE_HASHPTE similar to 4K hash Aneesh Kumar K.V
2016-01-13  2:48   ` Balbir Singh
2016-01-13  6:02     ` Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 02/33] powerpc/mm: Split pgtable types to separate header Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 03/33] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table Aneesh Kumar K.V
2016-01-13  8:52   ` Balbir Singh [this message]
2016-01-15  0:25   ` Balbir Singh
2016-01-18  7:32     ` Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 04/33] powerpc/mm: Copy pgalloc (part 1) Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 05/33] powerpc/mm: Copy pgalloc (part 2) Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 06/33] powerpc/mm: Copy pgalloc (part 3) Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 07/33] mm: arch hook for vm_get_page_prot Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 08/33] mm: Some arch may want to use HPAGE_PMD related values as variables Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 09/33] powerpc/mm: Hugetlbfs is book3s_64 and fsl_book3e (32 or 64) Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 10/33] powerpc/mm: free_hugepd_range split to hash and nonhash Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 11/33] powerpc/mm: Use helper instead of opencoding Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 12/33] powerpc/mm: Move hash64 specific defintions to seperate header Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 13/33] powerpc/mm: Move swap related definition ot hash64 header Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 14/33] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
2016-01-12  7:42   ` Denis Kirjanov
2016-01-13  3:57     ` Benjamin Herrenschmidt
2016-01-13  6:07       ` Aneesh Kumar K.V
2016-01-13  7:18         ` Benjamin Herrenschmidt
2016-01-12  7:15 ` [RFC PATCH V1 15/33] powerpc/mm: Use helper for finding pte filter mask for gup Aneesh Kumar K.V
2016-01-13  8:13   ` Denis Kirjanov
2016-01-12  7:15 ` [RFC PATCH V1 16/33] powerpc/mm: Move hash page table related functions to pgtable-hash64.c Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 17/33] mm: Change pmd_huge_pte type in mm_struct Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 18/33] powerpc/mm: Add helper for update page flags during ioremap Aneesh Kumar K.V
2016-01-12  7:45   ` Denis Kirjanov
2016-01-12  7:15 ` [RFC PATCH V1 19/33] powerpc/mm: Rename hash specific page table bits (_PAGE* -> H_PAGE*) Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 20/33] powerpc/mm: Use flush_tlb_page in ptep_clear_flush_young Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 21/33] powerpc/mm: THP is only available on hash64 as of now Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 22/33] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 23/33] powerpc/mm: Create a new headers for tlbflush for hash64 Aneesh Kumar K.V
2016-01-12  7:15 ` [RFC PATCH V1 24/33] powerpc/mm: Hash linux abstraction for page table accessors Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 25/33] powerpc/mm: Hash linux abstraction for functions in pgtable-hash.c Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 26/33] powerpc/mm: Hash linux abstraction for mmu context handling code Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 27/33] powerpc/mm: Move hash related mmu-*.h headers to book3s/ Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 28/33] powerpc/mm: Hash linux abstractions for early init routines Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 29/33] powerpc/mm: Hash linux abstraction for THP Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 30/33] powerpc/mm: Hash linux abstraction for HugeTLB Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 31/33] powerpc/mm: Hash linux abstraction for page table allocator Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 32/33] powerpc/mm: Hash linux abstraction for tlbflush routines Aneesh Kumar K.V
2016-01-12  7:16 ` [RFC PATCH V1 33/33] powerpc/mm: Hash linux abstraction for pte swap encoding Aneesh Kumar K.V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160113195210.47618436@cotter.ozlabs.ibm.com \
    --to=bsingharora@gmail.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mikey@neuling.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.