* [PATCH] sparc64: Add 16GB hugepage support
@ 2017-05-25 0:29 Nitin Gupta
2017-05-25 3:34 ` Paul Gortmaker
0 siblings, 1 reply; 4+ messages in thread
From: Nitin Gupta @ 2017-05-25 0:29 UTC (permalink / raw)
To: David S. Miller
Cc: Nitin Gupta, David S. Miller, Mike Kravetz, Kirill A. Shutemov,
Tom Hromatka, Michal Hocko, Ingo Molnar, Babu Moger, bob picco,
Paul Gortmaker, Thomas Tai, Pavel Tatashin, Atish Patra,
sparclinux, linux-kernel
Orabug: 25362942
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
---
arch/sparc/include/asm/page_64.h | 3 +-
arch/sparc/include/asm/pgtable_64.h | 5 +++
arch/sparc/include/asm/tsb.h | 35 +++++++++++++++++-
arch/sparc/kernel/tsb.S | 2 +-
arch/sparc/mm/hugetlbpage.c | 74 ++++++++++++++++++++++++++-----------
arch/sparc/mm/init_64.c | 41 ++++++++++++++++----
6 files changed, 128 insertions(+), 32 deletions(-)
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 5961b2d..8ee1f97 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -17,6 +17,7 @@
#define HPAGE_SHIFT 23
#define REAL_HPAGE_SHIFT 22
+#define HPAGE_16GB_SHIFT 34
#define HPAGE_2GB_SHIFT 31
#define HPAGE_256MB_SHIFT 28
#define HPAGE_64K_SHIFT 16
@@ -28,7 +29,7 @@
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
-#define HUGE_MAX_HSTATE 4
+#define HUGE_MAX_HSTATE 5
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 6fbd931..2444b02 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -414,6 +414,11 @@ static inline bool is_hugetlb_pmd(pmd_t pmd)
return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
}
+static inline bool is_hugetlb_pud(pud_t pud)
+{
+ return !!(pud_val(pud) & _PAGE_PUD_HUGE);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 32258e0..fbd8da7 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -195,6 +195,36 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
nop; \
699:
+ /* PUD has been loaded into REG1, interpret the value, seeing
+ * if it is a HUGE PUD or a normal one. If it is not valid
+ * then jump to FAIL_LABEL. If it is a HUGE PUD, and it
+ * translates to a valid PTE, branch to PTE_LABEL.
+ *
+ * We have to propagate bits [32:22] from the virtual address
+ * to resolve at 4M granularity.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ sethi %uhi(_PAGE_PUD_HUGE), REG2; \
+ sllx REG2, 32, REG2; \
+ andcc REG1, REG2, %g0; \
+ be,pt %xcc, 700f; \
+ sethi %hi(0x1ffc0000), REG2; \
+ brgez,pn REG1, FAIL_LABEL; \
+ sllx REG2, 1, REG2; \
+ brgez,pn REG1, FAIL_LABEL; \
+ andn REG1, REG2, REG1; \
+ and VADDR, REG2, REG2; \
+ brlz,pt REG1, PTE_LABEL; \
+ or REG1, REG2, REG1; \
+700:
+#else
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ nop;
+#endif
+
/* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid
* then jump to FAIL_LABEL. If it is a HUGE PMD, and it
@@ -209,14 +239,14 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
sethi %uhi(_PAGE_PMD_HUGE), REG2; \
sllx REG2, 32, REG2; \
andcc REG1, REG2, %g0; \
- be,pt %xcc, 700f; \
+ be,pt %xcc, 701f; \
sethi %hi(4 * 1024 * 1024), REG2; \
brgez,pn REG1, FAIL_LABEL; \
andn REG1, REG2, REG1; \
and VADDR, REG2, REG2; \
brlz,pt REG1, PTE_LABEL; \
or REG1, REG2, REG1; \
-700:
+701:
#else
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
brz,pn REG1, FAIL_LABEL; \
@@ -242,6 +272,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index 10689cf..a0a5a13 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -117,7 +117,7 @@ tsb_miss_page_table_walk_sun4v_fastpath:
/* Valid PTE is now in %g5. */
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- sethi %uhi(_PAGE_PMD_HUGE), %g7
+ sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
sllx %g7, 32, %g7
andcc %g5, %g7, %g0
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 7c29d38..62c1e62 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
switch (shift) {
+ case HPAGE_16GB_SHIFT:
+ hugepage_size = _PAGE_SZ16GB_4V;
+ pte_val(entry) |= _PAGE_PUD_HUGE;
+ break;
case HPAGE_2GB_SHIFT:
hugepage_size = _PAGE_SZ2GB_4V;
pte_val(entry) |= _PAGE_PMD_HUGE;
@@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
unsigned int shift;
switch (tte_szbits) {
+ case _PAGE_SZ16GB_4V:
+ shift = HPAGE_16GB_SHIFT;
+ break;
case _PAGE_SZ2GB_4V:
shift = HPAGE_2GB_SHIFT;
break;
@@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- if (pud) {
+ if (!pud)
+ return NULL;
+
+ if (sz >= PUD_SIZE)
+ pte = (pte_t *)pud;
+ else {
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return NULL;
@@ -288,12 +300,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
if (!pgd_none(*pgd)) {
pud = pud_offset(pgd, addr);
if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- if (is_hugetlb_pmd(*pmd))
- pte = (pte_t *)pmd;
- else
- pte = pte_offset_map(pmd, addr);
+ if (is_hugetlb_pud(*pud))
+ pte = (pte_t *)pud;
+ else {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ if (is_hugetlb_pmd(*pmd))
+ pte = (pte_t *)pmd;
+ else
+ pte = pte_offset_map(pmd, addr);
+ }
}
}
}
@@ -304,12 +320,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
- unsigned int i, nptes, orig_shift, shift;
- unsigned long size;
+ unsigned int nptes, orig_shift, shift;
+ unsigned long i, size;
pte_t orig;
size = huge_tte_to_size(entry);
- shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
+
+ shift = PAGE_SHIFT;
+ if (size >= PUD_SIZE)
+ shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ shift = PMD_SHIFT;
+ else
+ shift = PAGE_SHIFT;
+
nptes = size >> shift;
if (!pte_present(*ptep) && pte_present(entry))
@@ -332,19 +356,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- unsigned int i, nptes, hugepage_shift;
+ unsigned int i, nptes, orig_shift, shift;
unsigned long size;
pte_t entry;
entry = *ptep;
size = huge_tte_to_size(entry);
- if (size >= HPAGE_SIZE)
- nptes = size >> PMD_SHIFT;
+
+ shift = PAGE_SHIFT;
+ if (size >= PUD_SIZE)
+ shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ shift = PMD_SHIFT;
else
- nptes = size >> PAGE_SHIFT;
+ shift = PAGE_SHIFT;
- hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
- huge_tte_to_shift(entry);
+ nptes = size >> shift;
+ orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
if (pte_present(entry))
mm->context.hugetlb_pte_count -= nptes;
@@ -353,11 +381,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
for (i = 0; i < nptes; i++)
ptep[i] = __pte(0UL);
- maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
+ maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
if (size == HPAGE_SIZE)
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
- hugepage_shift);
+ orig_shift);
return entry;
}
@@ -370,7 +398,8 @@ int pmd_huge(pmd_t pmd)
int pud_huge(pud_t pud)
{
- return 0;
+ return !pud_none(pud) &&
+ (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
}
static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
@@ -434,8 +463,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
- ceiling);
+ if (is_hugetlb_pud(*pud))
+ pud_clear(pud);
+ else
+ hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+ ceiling);
} while (pud++, addr = next, addr != end);
start &= PGDIR_MASK;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 0cda653..7c0fe73 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
hugepage_shift = ilog2(hugepage_size);
switch (hugepage_shift) {
+ case HPAGE_16GB_SHIFT:
+ hv_pgsz_mask = HV_PGSZ_MASK_16GB;
+ hv_pgsz_idx = HV_PGSZ_IDX_16GB;
+ break;
case HPAGE_2GB_SHIFT:
hv_pgsz_mask = HV_PGSZ_MASK_2GB;
hv_pgsz_idx = HV_PGSZ_IDX_2GB;
@@ -376,6 +380,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
{
struct mm_struct *mm;
unsigned long flags;
+ bool is_huge_tsb;
pte_t pte = *ptep;
if (tlb_type != hypervisor) {
@@ -393,15 +398,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
spin_lock_irqsave(&mm->context.lock, flags);
+ is_huge_tsb = false;
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
- is_hugetlb_pmd(__pmd(pte_val(pte)))) {
- /* We are fabricating 8MB pages using 4MB real hw pages. */
- pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
- __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
- address, pte_val(pte));
- } else
+ if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
+ unsigned long hugepage_size = PAGE_SIZE;
+
+ if (is_vm_hugetlb_page(vma))
+ hugepage_size = huge_page_size(hstate_vma(vma));
+
+ if (hugepage_size >= PUD_SIZE) {
+ unsigned long mask = 0x1ffc00000UL;
+
+ /* Transfer bits [32:22] from address to resolve
+ * at 4M granularity.
+ */
+ pte_val(pte) &= ~mask;
+ pte_val(pte) |= (address & mask);
+ } else if (hugepage_size >= PMD_SIZE) {
+ /* We are fabricating 8MB pages using 4MB
+ * real hw pages.
+ */
+ pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
+ }
+
+ if (hugepage_size >= PMD_SIZE) {
+ __update_mmu_tsb_insert(mm, MM_TSB_HUGE,
+ REAL_HPAGE_SHIFT, address, pte_val(pte));
+ is_huge_tsb = true;
+ }
+ }
#endif
+ if (!is_huge_tsb)
__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
address, pte_val(pte));
--
2.9.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] sparc64: Add 16GB hugepage support
2017-05-25 0:29 [PATCH] sparc64: Add 16GB hugepage support Nitin Gupta
@ 2017-05-25 3:34 ` Paul Gortmaker
2017-05-25 3:45 ` David Miller
0 siblings, 1 reply; 4+ messages in thread
From: Paul Gortmaker @ 2017-05-25 3:34 UTC (permalink / raw)
To: Nitin Gupta
Cc: David S. Miller, Mike Kravetz, Kirill A. Shutemov, Tom Hromatka,
Michal Hocko, Ingo Molnar, Babu Moger, bob picco, Thomas Tai,
Pavel Tatashin, Atish Patra, sparclinux, linux-kernel
[[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
> Orabug: 25362942
>
> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
If this wasn't an accidental git send-email misfire, then there should
be a long log indicating the use case, the perforamnce increase, the
testing that was done, etc. etc.
Normally I'd not notice but since I was Cc'd I figured it was worth a
mention -- for example the vendor ID above doesn't mean a thing to
all the rest of us, hence why I suspect it was a git send-email misfire;
sadly, I think we've all accidentally done that at least once....
Paul.
--
> ---
> arch/sparc/include/asm/page_64.h | 3 +-
> arch/sparc/include/asm/pgtable_64.h | 5 +++
> arch/sparc/include/asm/tsb.h | 35 +++++++++++++++++-
> arch/sparc/kernel/tsb.S | 2 +-
> arch/sparc/mm/hugetlbpage.c | 74 ++++++++++++++++++++++++++-----------
> arch/sparc/mm/init_64.c | 41 ++++++++++++++++----
> 6 files changed, 128 insertions(+), 32 deletions(-)
>
> diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
> index 5961b2d..8ee1f97 100644
> --- a/arch/sparc/include/asm/page_64.h
> +++ b/arch/sparc/include/asm/page_64.h
> @@ -17,6 +17,7 @@
>
> #define HPAGE_SHIFT 23
> #define REAL_HPAGE_SHIFT 22
> +#define HPAGE_16GB_SHIFT 34
> #define HPAGE_2GB_SHIFT 31
> #define HPAGE_256MB_SHIFT 28
> #define HPAGE_64K_SHIFT 16
> @@ -28,7 +29,7 @@
> #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
> #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
> #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
> -#define HUGE_MAX_HSTATE 4
> +#define HUGE_MAX_HSTATE 5
> #endif
>
> #ifndef __ASSEMBLY__
> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
> index 6fbd931..2444b02 100644
> --- a/arch/sparc/include/asm/pgtable_64.h
> +++ b/arch/sparc/include/asm/pgtable_64.h
> @@ -414,6 +414,11 @@ static inline bool is_hugetlb_pmd(pmd_t pmd)
> return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
> }
>
> +static inline bool is_hugetlb_pud(pud_t pud)
> +{
> + return !!(pud_val(pud) & _PAGE_PUD_HUGE);
> +}
> +
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> static inline pmd_t pmd_mkhuge(pmd_t pmd)
> {
> diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
> index 32258e0..fbd8da7 100644
> --- a/arch/sparc/include/asm/tsb.h
> +++ b/arch/sparc/include/asm/tsb.h
> @@ -195,6 +195,36 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> nop; \
> 699:
>
> + /* PUD has been loaded into REG1, interpret the value, seeing
> + * if it is a HUGE PUD or a normal one. If it is not valid
> + * then jump to FAIL_LABEL. If it is a HUGE PUD, and it
> + * translates to a valid PTE, branch to PTE_LABEL.
> + *
> + * We have to propagate bits [32:22] from the virtual address
> + * to resolve at 4M granularity.
> + */
> +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> + brz,pn REG1, FAIL_LABEL; \
> + sethi %uhi(_PAGE_PUD_HUGE), REG2; \
> + sllx REG2, 32, REG2; \
> + andcc REG1, REG2, %g0; \
> + be,pt %xcc, 700f; \
> + sethi %hi(0x1ffc0000), REG2; \
> + brgez,pn REG1, FAIL_LABEL; \
> + sllx REG2, 1, REG2; \
> + brgez,pn REG1, FAIL_LABEL; \
> + andn REG1, REG2, REG1; \
> + and VADDR, REG2, REG2; \
> + brlz,pt REG1, PTE_LABEL; \
> + or REG1, REG2, REG1; \
> +700:
> +#else
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> + brz,pn REG1, FAIL_LABEL; \
> + nop;
> +#endif
> +
> /* PMD has been loaded into REG1, interpret the value, seeing
> * if it is a HUGE PMD or a normal one. If it is not valid
> * then jump to FAIL_LABEL. If it is a HUGE PMD, and it
> @@ -209,14 +239,14 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> sethi %uhi(_PAGE_PMD_HUGE), REG2; \
> sllx REG2, 32, REG2; \
> andcc REG1, REG2, %g0; \
> - be,pt %xcc, 700f; \
> + be,pt %xcc, 701f; \
> sethi %hi(4 * 1024 * 1024), REG2; \
> brgez,pn REG1, FAIL_LABEL; \
> andn REG1, REG2, REG1; \
> and VADDR, REG2, REG2; \
> brlz,pt REG1, PTE_LABEL; \
> or REG1, REG2, REG1; \
> -700:
> +701:
> #else
> #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> brz,pn REG1, FAIL_LABEL; \
> @@ -242,6 +272,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
> srlx REG2, 64 - PAGE_SHIFT, REG2; \
> andn REG2, 0x7, REG2; \
> ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
> + USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
> brz,pn REG1, FAIL_LABEL; \
> sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
> srlx REG2, 64 - PAGE_SHIFT, REG2; \
> diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
> index 10689cf..a0a5a13 100644
> --- a/arch/sparc/kernel/tsb.S
> +++ b/arch/sparc/kernel/tsb.S
> @@ -117,7 +117,7 @@ tsb_miss_page_table_walk_sun4v_fastpath:
> /* Valid PTE is now in %g5. */
>
> #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> - sethi %uhi(_PAGE_PMD_HUGE), %g7
> + sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
> sllx %g7, 32, %g7
>
> andcc %g5, %g7, %g0
> diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
> index 7c29d38..62c1e62 100644
> --- a/arch/sparc/mm/hugetlbpage.c
> +++ b/arch/sparc/mm/hugetlbpage.c
> @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
> pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
>
> switch (shift) {
> + case HPAGE_16GB_SHIFT:
> + hugepage_size = _PAGE_SZ16GB_4V;
> + pte_val(entry) |= _PAGE_PUD_HUGE;
> + break;
> case HPAGE_2GB_SHIFT:
> hugepage_size = _PAGE_SZ2GB_4V;
> pte_val(entry) |= _PAGE_PMD_HUGE;
> @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
> unsigned int shift;
>
> switch (tte_szbits) {
> + case _PAGE_SZ16GB_4V:
> + shift = HPAGE_16GB_SHIFT;
> + break;
> case _PAGE_SZ2GB_4V:
> shift = HPAGE_2GB_SHIFT;
> break;
> @@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>
> pgd = pgd_offset(mm, addr);
> pud = pud_alloc(mm, pgd, addr);
> - if (pud) {
> + if (!pud)
> + return NULL;
> +
> + if (sz >= PUD_SIZE)
> + pte = (pte_t *)pud;
> + else {
> pmd = pmd_alloc(mm, pud, addr);
> if (!pmd)
> return NULL;
> @@ -288,12 +300,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
> if (!pgd_none(*pgd)) {
> pud = pud_offset(pgd, addr);
> if (!pud_none(*pud)) {
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_none(*pmd)) {
> - if (is_hugetlb_pmd(*pmd))
> - pte = (pte_t *)pmd;
> - else
> - pte = pte_offset_map(pmd, addr);
> + if (is_hugetlb_pud(*pud))
> + pte = (pte_t *)pud;
> + else {
> + pmd = pmd_offset(pud, addr);
> + if (!pmd_none(*pmd)) {
> + if (is_hugetlb_pmd(*pmd))
> + pte = (pte_t *)pmd;
> + else
> + pte = pte_offset_map(pmd, addr);
> + }
> }
> }
> }
> @@ -304,12 +320,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
> void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep, pte_t entry)
> {
> - unsigned int i, nptes, orig_shift, shift;
> - unsigned long size;
> + unsigned int nptes, orig_shift, shift;
> + unsigned long i, size;
> pte_t orig;
>
> size = huge_tte_to_size(entry);
> - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
> +
> + shift = PAGE_SHIFT;
> + if (size >= PUD_SIZE)
> + shift = PUD_SHIFT;
> + else if (size >= PMD_SIZE)
> + shift = PMD_SHIFT;
> + else
> + shift = PAGE_SHIFT;
> +
> nptes = size >> shift;
>
> if (!pte_present(*ptep) && pte_present(entry))
> @@ -332,19 +356,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep)
> {
> - unsigned int i, nptes, hugepage_shift;
> + unsigned int i, nptes, orig_shift, shift;
> unsigned long size;
> pte_t entry;
>
> entry = *ptep;
> size = huge_tte_to_size(entry);
> - if (size >= HPAGE_SIZE)
> - nptes = size >> PMD_SHIFT;
> +
> + shift = PAGE_SHIFT;
> + if (size >= PUD_SIZE)
> + shift = PUD_SHIFT;
> + else if (size >= PMD_SIZE)
> + shift = PMD_SHIFT;
> else
> - nptes = size >> PAGE_SHIFT;
> + shift = PAGE_SHIFT;
>
> - hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
> - huge_tte_to_shift(entry);
> + nptes = size >> shift;
> + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
>
> if (pte_present(entry))
> mm->context.hugetlb_pte_count -= nptes;
> @@ -353,11 +381,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> for (i = 0; i < nptes; i++)
> ptep[i] = __pte(0UL);
>
> - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
> + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
> /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
> if (size == HPAGE_SIZE)
> maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
> - hugepage_shift);
> + orig_shift);
>
> return entry;
> }
> @@ -370,7 +398,8 @@ int pmd_huge(pmd_t pmd)
>
> int pud_huge(pud_t pud)
> {
> - return 0;
> + return !pud_none(pud) &&
> + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
> }
>
> static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> @@ -434,8 +463,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
> next = pud_addr_end(addr, end);
> if (pud_none_or_clear_bad(pud))
> continue;
> - hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> - ceiling);
> + if (is_hugetlb_pud(*pud))
> + pud_clear(pud);
> + else
> + hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> + ceiling);
> } while (pud++, addr = next, addr != end);
>
> start &= PGDIR_MASK;
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 0cda653..7c0fe73 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
> hugepage_shift = ilog2(hugepage_size);
>
> switch (hugepage_shift) {
> + case HPAGE_16GB_SHIFT:
> + hv_pgsz_mask = HV_PGSZ_MASK_16GB;
> + hv_pgsz_idx = HV_PGSZ_IDX_16GB;
> + break;
> case HPAGE_2GB_SHIFT:
> hv_pgsz_mask = HV_PGSZ_MASK_2GB;
> hv_pgsz_idx = HV_PGSZ_IDX_2GB;
> @@ -376,6 +380,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
> {
> struct mm_struct *mm;
> unsigned long flags;
> + bool is_huge_tsb;
> pte_t pte = *ptep;
>
> if (tlb_type != hypervisor) {
> @@ -393,15 +398,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
>
> spin_lock_irqsave(&mm->context.lock, flags);
>
> + is_huge_tsb = false;
> #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> - if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
> - is_hugetlb_pmd(__pmd(pte_val(pte)))) {
> - /* We are fabricating 8MB pages using 4MB real hw pages. */
> - pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
> - address, pte_val(pte));
> - } else
> + if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
> + unsigned long hugepage_size = PAGE_SIZE;
> +
> + if (is_vm_hugetlb_page(vma))
> + hugepage_size = huge_page_size(hstate_vma(vma));
> +
> + if (hugepage_size >= PUD_SIZE) {
> + unsigned long mask = 0x1ffc00000UL;
> +
> + /* Transfer bits [32:22] from address to resolve
> + * at 4M granularity.
> + */
> + pte_val(pte) &= ~mask;
> + pte_val(pte) |= (address & mask);
> + } else if (hugepage_size >= PMD_SIZE) {
> + /* We are fabricating 8MB pages using 4MB
> + * real hw pages.
> + */
> + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> + }
> +
> + if (hugepage_size >= PMD_SIZE) {
> + __update_mmu_tsb_insert(mm, MM_TSB_HUGE,
> + REAL_HPAGE_SHIFT, address, pte_val(pte));
> + is_huge_tsb = true;
> + }
> + }
> #endif
> + if (!is_huge_tsb)
> __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
> address, pte_val(pte));
>
> --
> 2.9.2
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] sparc64: Add 16GB hugepage support
2017-05-25 3:34 ` Paul Gortmaker
@ 2017-05-25 3:45 ` David Miller
2017-05-25 4:08 ` Nitin Gupta
0 siblings, 1 reply; 4+ messages in thread
From: David Miller @ 2017-05-25 3:45 UTC (permalink / raw)
To: paul.gortmaker
Cc: nitin.m.gupta, mike.kravetz, kirill.shutemov, tom.hromatka,
mhocko, mingo, babu.moger, bob.picco, thomas.tai, pasha.tatashin,
atish.patra, sparclinux, linux-kernel
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 24 May 2017 23:34:42 -0400
> [[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
>
>> Orabug: 25362942
>>
>> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
>
> If this wasn't an accidental git send-email misfire, then there should
> be a long log indicating the use case, the perforamnce increase, the
> testing that was done, etc. etc.
>
> Normally I'd not notice but since I was Cc'd I figured it was worth a
> mention -- for example the vendor ID above doesn't mean a thing to
> all the rest of us, hence why I suspect it was a git send-email misfire;
> sadly, I think we've all accidentally done that at least once....
Agreed.
No commit message whatsoever is basically unacceptable for something
like this.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] sparc64: Add 16GB hugepage support
2017-05-25 3:45 ` David Miller
@ 2017-05-25 4:08 ` Nitin Gupta
0 siblings, 0 replies; 4+ messages in thread
From: Nitin Gupta @ 2017-05-25 4:08 UTC (permalink / raw)
To: David Miller, paul.gortmaker
Cc: mike.kravetz, kirill.shutemov, tom.hromatka, mhocko, mingo,
babu.moger, bob.picco, thomas.tai, pasha.tatashin, atish.patra,
sparclinux, linux-kernel
On 5/24/17 8:45 PM, David Miller wrote:
> From: Paul Gortmaker <paul.gortmaker@windriver.com>
> Date: Wed, 24 May 2017 23:34:42 -0400
>
>> [[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
>>
>>> Orabug: 25362942
>>>
>>> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
>>
>> If this wasn't an accidental git send-email misfire, then there should
>> be a long log indicating the use case, the perforamnce increase, the
>> testing that was done, etc. etc.
>>
>> Normally I'd not notice but since I was Cc'd I figured it was worth a
>> mention -- for example the vendor ID above doesn't mean a thing to
>> all the rest of us, hence why I suspect it was a git send-email misfire;
>> sadly, I think we've all accidentally done that at least once....
>
> Agreed.
>
> No commit message whatsoever is basically unacceptable for something
> like this.
>
Ok, I will include usage, testing notes, performance numbers etc., in
v2 patch. Still, I do try to include "Orabug" for better tracking of
bugs internally; I hope that's okay.
Thanks,
Nitin
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-05-25 4:09 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-25 0:29 [PATCH] sparc64: Add 16GB hugepage support Nitin Gupta
2017-05-25 3:34 ` Paul Gortmaker
2017-05-25 3:45 ` David Miller
2017-05-25 4:08 ` Nitin Gupta
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).