All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] sparc64: Trim page tables for 8M hugepages
@ 2016-06-23 22:05 ` Nitin Gupta
  0 siblings, 0 replies; 6+ messages in thread
From: Nitin Gupta @ 2016-06-23 22:05 UTC (permalink / raw)
  To: David S. Miller
  Cc: Nitin Gupta, David S. Miller, Andrew Morton, Kirill A. Shutemov,
	Julian Calaby, Aneesh Kumar K.V, Hugh Dickins, Adam Buchbinder,
	Minchan Kim, David Ahern, Stephen Rothwell, Chris Hyser,
	Khalid Aziz, Toshi Kani, Tony Luck, sparclinux, linux-kernel

For PMD aligned (8M) hugepages, we currently allocate
all four page table levels which is wasteful. We now
allocate till PMD level only which saves memory usage
from page tables.

Orabug: 22630259

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
---

Changelog v2 vs v1:
 - Move sparc specific declaration of hugetlb_free_pgd_range
   to arch specific hugetlb.h header.

 arch/sparc/include/asm/pgtable_64.h |    7 +++-
 arch/sparc/include/asm/tsb.h        |    2 +-
 arch/sparc/mm/fault_64.c            |    4 +-
 arch/sparc/mm/hugetlbpage.c         |   68 +++++++---------------------------
 arch/sparc/mm/init_64.c             |    5 ++-
 5 files changed, 27 insertions(+), 59 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index e7d8280..1fb317f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -395,7 +395,7 @@ static inline unsigned long __pte_huge_mask(void)
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-	return __pte(pte_val(pte) | __pte_huge_mask());
+	return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask());
 }
 
 static inline bool is_hugetlb_pte(pte_t pte)
@@ -403,6 +403,11 @@ static inline bool is_hugetlb_pte(pte_t pte)
 	return !!(pte_val(pte) & __pte_huge_mask());
 }
 
+static inline bool is_hugetlb_pmd(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index c6a155c..32258e0 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -203,7 +203,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 * We have to propagate the 4MB bit of the virtual address
 	 * because we are fabricating 8MB pages using 4MB hw pages.
 	 */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
 	brz,pn		REG1, FAIL_LABEL;		\
 	 sethi		%uhi(_PAGE_PMD_HUGE), REG2;	\
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index cb841a3..ff3f9f9 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -111,8 +111,8 @@ static unsigned int get_user_insn(unsigned long tpc)
 	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
 		goto out_irq_enable;
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	if (pmd_trans_huge(*pmdp)) {
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	if (is_hugetlb_pmd(*pmdp)) {
 		pa  = pmd_pfn(*pmdp) << PAGE_SHIFT;
 		pa += tpc & ~HPAGE_MASK;
 
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index ba52e64..cafb5ca 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -131,23 +131,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	pmd_t *pmd;
 	pte_t *pte = NULL;
 
-	/* We must align the address, because our caller will run
-	 * set_huge_pte_at() on whatever we return, which writes out
-	 * all of the sub-ptes for the hugepage range.  So we have
-	 * to give it the first such sub-pte.
-	 */
-	addr &= HPAGE_MASK;
-
 	pgd = pgd_offset(mm, addr);
 	pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
-		pmd = pmd_alloc(mm, pud, addr);
-		if (pmd)
-			pte = pte_alloc_map(mm, pmd, addr);
-	}
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
 	return pte;
 }
 
@@ -155,19 +145,13 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	pmd_t *pmd;
 	pte_t *pte = NULL;
 
-	addr &= HPAGE_MASK;
-
 	pgd = pgd_offset(mm, addr);
 	if (!pgd_none(*pgd)) {
 		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd))
-				pte = pte_offset_map(pmd, addr);
-		}
+		if (!pud_none(*pud))
+			pte = (pte_t *)pmd_offset(pud, addr);
 	}
 	return pte;
 }
@@ -175,67 +159,43 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t entry)
 {
-	int i;
-	pte_t orig[2];
-	unsigned long nptes;
+	pte_t orig;
 
 	if (!pte_present(*ptep) && pte_present(entry))
 		mm->context.huge_pte_count++;
 
 	addr &= HPAGE_MASK;
-
-	nptes = 1 << HUGETLB_PAGE_ORDER;
-	orig[0] = *ptep;
-	orig[1] = *(ptep + nptes / 2);
-	for (i = 0; i < nptes; i++) {
-		*ptep = entry;
-		ptep++;
-		addr += PAGE_SIZE;
-		pte_val(entry) += PAGE_SIZE;
-	}
+	orig = *ptep;
+	*ptep = entry;
 
 	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
-	maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0);
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
-	maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0);
+	maybe_tlb_batch_add(mm, addr, ptep, orig, 0);
+	maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0);
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep)
 {
 	pte_t entry;
-	int i;
-	unsigned long nptes;
 
 	entry = *ptep;
 	if (pte_present(entry))
 		mm->context.huge_pte_count--;
 
 	addr &= HPAGE_MASK;
-	nptes = 1 << HUGETLB_PAGE_ORDER;
-	for (i = 0; i < nptes; i++) {
-		*ptep = __pte(0UL);
-		addr += PAGE_SIZE;
-		ptep++;
-	}
+	*ptep = __pte(0UL);
 
 	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
-	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
 	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
+	maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0);
 
 	return entry;
 }
 
 int pmd_huge(pmd_t pmd)
 {
-	return 0;
+	return !pmd_none(pmd) &&
+		(pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
 }
 
 int pud_huge(pud_t pud)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 14bb0d5..2d94fed 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -346,9 +346,12 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 	spin_lock_irqsave(&mm->context.lock, flags);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
+	if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) {
+		/* We are fabricating 8MB pages using 4MB real hw pages.  */
+		pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
 		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
 					address, pte_val(pte));
+	}
 	else
 #endif
 		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 1/2] sparc64: Trim page tables for 8M hugepages
@ 2016-06-23 22:05 ` Nitin Gupta
  0 siblings, 0 replies; 6+ messages in thread
From: Nitin Gupta @ 2016-06-23 22:05 UTC (permalink / raw)
  To: David S. Miller
  Cc: Nitin Gupta, Andrew Morton, Kirill A. Shutemov, Julian Calaby,
	Aneesh Kumar K.V, Hugh Dickins, Adam Buchbinder, Minchan Kim,
	David Ahern, Stephen Rothwell, Chris Hyser, Khalid Aziz,
	Toshi Kani, Tony Luck, sparclinux, linux-kernel

For PMD aligned (8M) hugepages, we currently allocate
all four page table levels which is wasteful. We now
allocate till PMD level only which saves memory usage
from page tables.

Orabug: 22630259

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
---

Changelog v2 vs v1:
 - Move sparc specific declaration of hugetlb_free_pgd_range
   to arch specific hugetlb.h header.

 arch/sparc/include/asm/pgtable_64.h |    7 +++-
 arch/sparc/include/asm/tsb.h        |    2 +-
 arch/sparc/mm/fault_64.c            |    4 +-
 arch/sparc/mm/hugetlbpage.c         |   68 +++++++---------------------------
 arch/sparc/mm/init_64.c             |    5 ++-
 5 files changed, 27 insertions(+), 59 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index e7d8280..1fb317f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -395,7 +395,7 @@ static inline unsigned long __pte_huge_mask(void)
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-	return __pte(pte_val(pte) | __pte_huge_mask());
+	return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask());
 }
 
 static inline bool is_hugetlb_pte(pte_t pte)
@@ -403,6 +403,11 @@ static inline bool is_hugetlb_pte(pte_t pte)
 	return !!(pte_val(pte) & __pte_huge_mask());
 }
 
+static inline bool is_hugetlb_pmd(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index c6a155c..32258e0 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -203,7 +203,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 * We have to propagate the 4MB bit of the virtual address
 	 * because we are fabricating 8MB pages using 4MB hw pages.
 	 */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
 	brz,pn		REG1, FAIL_LABEL;		\
 	 sethi		%uhi(_PAGE_PMD_HUGE), REG2;	\
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index cb841a3..ff3f9f9 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -111,8 +111,8 @@ static unsigned int get_user_insn(unsigned long tpc)
 	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
 		goto out_irq_enable;
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	if (pmd_trans_huge(*pmdp)) {
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	if (is_hugetlb_pmd(*pmdp)) {
 		pa  = pmd_pfn(*pmdp) << PAGE_SHIFT;
 		pa += tpc & ~HPAGE_MASK;
 
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index ba52e64..cafb5ca 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -131,23 +131,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	pmd_t *pmd;
 	pte_t *pte = NULL;
 
-	/* We must align the address, because our caller will run
-	 * set_huge_pte_at() on whatever we return, which writes out
-	 * all of the sub-ptes for the hugepage range.  So we have
-	 * to give it the first such sub-pte.
-	 */
-	addr &= HPAGE_MASK;
-
 	pgd = pgd_offset(mm, addr);
 	pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
-		pmd = pmd_alloc(mm, pud, addr);
-		if (pmd)
-			pte = pte_alloc_map(mm, pmd, addr);
-	}
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
 	return pte;
 }
 
@@ -155,19 +145,13 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	pmd_t *pmd;
 	pte_t *pte = NULL;
 
-	addr &= HPAGE_MASK;
-
 	pgd = pgd_offset(mm, addr);
 	if (!pgd_none(*pgd)) {
 		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd))
-				pte = pte_offset_map(pmd, addr);
-		}
+		if (!pud_none(*pud))
+			pte = (pte_t *)pmd_offset(pud, addr);
 	}
 	return pte;
 }
@@ -175,67 +159,43 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t entry)
 {
-	int i;
-	pte_t orig[2];
-	unsigned long nptes;
+	pte_t orig;
 
 	if (!pte_present(*ptep) && pte_present(entry))
 		mm->context.huge_pte_count++;
 
 	addr &= HPAGE_MASK;
-
-	nptes = 1 << HUGETLB_PAGE_ORDER;
-	orig[0] = *ptep;
-	orig[1] = *(ptep + nptes / 2);
-	for (i = 0; i < nptes; i++) {
-		*ptep = entry;
-		ptep++;
-		addr += PAGE_SIZE;
-		pte_val(entry) += PAGE_SIZE;
-	}
+	orig = *ptep;
+	*ptep = entry;
 
 	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
-	maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0);
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
-	maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0);
+	maybe_tlb_batch_add(mm, addr, ptep, orig, 0);
+	maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0);
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep)
 {
 	pte_t entry;
-	int i;
-	unsigned long nptes;
 
 	entry = *ptep;
 	if (pte_present(entry))
 		mm->context.huge_pte_count--;
 
 	addr &= HPAGE_MASK;
-	nptes = 1 << HUGETLB_PAGE_ORDER;
-	for (i = 0; i < nptes; i++) {
-		*ptep = __pte(0UL);
-		addr += PAGE_SIZE;
-		ptep++;
-	}
+	*ptep = __pte(0UL);
 
 	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
-	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
-	addr -= REAL_HPAGE_SIZE;
-	ptep -= nptes / 2;
 	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
+	maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0);
 
 	return entry;
 }
 
 int pmd_huge(pmd_t pmd)
 {
-	return 0;
+	return !pmd_none(pmd) &&
+		(pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
 }
 
 int pud_huge(pud_t pud)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 14bb0d5..2d94fed 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -346,9 +346,12 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 	spin_lock_irqsave(&mm->context.lock, flags);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
+	if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) {
+		/* We are fabricating 8MB pages using 4MB real hw pages.  */
+		pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
 		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
 					address, pte_val(pte));
+	}
 	else
 #endif
 		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] sparc64: Fix pagetable freeing for hugepage regions
  2016-06-23 22:05 ` Nitin Gupta
@ 2016-06-23 22:05   ` Nitin Gupta
  -1 siblings, 0 replies; 6+ messages in thread
From: Nitin Gupta @ 2016-06-23 22:05 UTC (permalink / raw)
  To: David S. Miller
  Cc: Nitin Gupta, David S. Miller, Andrew Morton, Dominik Dingel,
	Martin Schwidefsky, Kirill A. Shutemov, sparclinux, linux-kernel

8M pages now allocate page tables till PMD level only.
So, when freeing page table for 8M hugepage backed region,
make sure we don't try to access non-existent PTE level.

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
---
 arch/sparc/include/asm/hugetlb.h |   12 ++---
 arch/sparc/mm/hugetlbpage.c      |   98 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 139e711..dcbf985 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -31,14 +31,6 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
@@ -82,4 +74,8 @@ static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
 
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
 #endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index cafb5ca..494c390 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -12,6 +12,7 @@
 
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
@@ -202,3 +203,100 @@ int pud_huge(pud_t pud)
 {
 	return 0;
 }
+
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			   unsigned long addr)
+{
+	pgtable_t token = pmd_pgtable(*pmd);
+
+	pmd_clear(pmd);
+	pte_free_tlb(tlb, token, addr);
+	atomic_long_dec(&tlb->mm->nr_ptes);
+}
+
+static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd))
+			continue;
+		if (is_hugetlb_pmd(*pmd))
+			pmd_clear(pmd);
+		else
+			hugetlb_free_pte_range(tlb, pmd, addr);
+	} while (pmd++, addr = next, addr != end);
+
+	start &= PUD_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PUD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pmd = pmd_offset(pud, start);
+	pud_clear(pud);
+	pmd_free_tlb(tlb, pmd, start);
+	mm_dec_nr_pmds(tlb->mm);
+}
+
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pud_t *pud;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+				       ceiling);
+	} while (pud++, addr = next, addr != end);
+
+	start &= PGDIR_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PGDIR_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(pgd, start);
+	pgd_clear(pgd);
+	pud_free_tlb(tlb, pud, start);
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+			    unsigned long addr, unsigned long end,
+			    unsigned long floor, unsigned long ceiling)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset(tlb->mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+	} while (pgd++, addr = next, addr != end);
+}
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] sparc64: Fix pagetable freeing for hugepage regions
@ 2016-06-23 22:05   ` Nitin Gupta
  0 siblings, 0 replies; 6+ messages in thread
From: Nitin Gupta @ 2016-06-23 22:05 UTC (permalink / raw)
  To: David S. Miller
  Cc: Nitin Gupta, Andrew Morton, Dominik Dingel, Martin Schwidefsky,
	Kirill A. Shutemov, sparclinux, linux-kernel

8M pages now allocate page tables till PMD level only.
So, when freeing page table for 8M hugepage backed region,
make sure we don't try to access non-existent PTE level.

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
---
 arch/sparc/include/asm/hugetlb.h |   12 ++---
 arch/sparc/mm/hugetlbpage.c      |   98 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 139e711..dcbf985 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -31,14 +31,6 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
@@ -82,4 +74,8 @@ static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
 
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
 #endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index cafb5ca..494c390 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -12,6 +12,7 @@
 
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
@@ -202,3 +203,100 @@ int pud_huge(pud_t pud)
 {
 	return 0;
 }
+
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			   unsigned long addr)
+{
+	pgtable_t token = pmd_pgtable(*pmd);
+
+	pmd_clear(pmd);
+	pte_free_tlb(tlb, token, addr);
+	atomic_long_dec(&tlb->mm->nr_ptes);
+}
+
+static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd))
+			continue;
+		if (is_hugetlb_pmd(*pmd))
+			pmd_clear(pmd);
+		else
+			hugetlb_free_pte_range(tlb, pmd, addr);
+	} while (pmd++, addr = next, addr != end);
+
+	start &= PUD_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PUD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pmd = pmd_offset(pud, start);
+	pud_clear(pud);
+	pmd_free_tlb(tlb, pmd, start);
+	mm_dec_nr_pmds(tlb->mm);
+}
+
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pud_t *pud;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+				       ceiling);
+	} while (pud++, addr = next, addr != end);
+
+	start &= PGDIR_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PGDIR_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(pgd, start);
+	pgd_clear(pgd);
+	pud_free_tlb(tlb, pud, start);
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+			    unsigned long addr, unsigned long end,
+			    unsigned long floor, unsigned long ceiling)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset(tlb->mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+	} while (pgd++, addr = next, addr != end);
+}
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Fix pagetable freeing for hugepage regions
  2016-06-23 22:05   ` Nitin Gupta
@ 2016-07-29  4:00     ` David Miller
  -1 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2016-07-29  4:00 UTC (permalink / raw)
  To: nitin.m.gupta
  Cc: akpm, dingel, schwidefsky, kirill.shutemov, sparclinux, linux-kernel

From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Thu, 23 Jun 2016 15:05:36 -0700

> 8M pages now allocate page tables till PMD level only.
> So, when freeing page table for 8M hugepage backed region,
> make sure we don't try to access non-existent PTE level.
> 
> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>

The manner in which you have submitted this change breaks
bisectability.

The "problem" you are fixing here is introduced by you in patch #1.

You need to combine these two changes together, otherwise there is
an intermediate state where the kernel is broken by your changes.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Fix pagetable freeing for hugepage regions
@ 2016-07-29  4:00     ` David Miller
  0 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2016-07-29  4:00 UTC (permalink / raw)
  To: nitin.m.gupta
  Cc: akpm, dingel, schwidefsky, kirill.shutemov, sparclinux, linux-kernel

From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Thu, 23 Jun 2016 15:05:36 -0700

> 8M pages now allocate page tables till PMD level only.
> So, when freeing page table for 8M hugepage backed region,
> make sure we don't try to access non-existent PTE level.
> 
> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>

The manner in which you have submitted this change breaks
bisectability.

The "problem" you are fixing here is introduced by you in patch #1.

You need to combine these two changes together, otherwise there is
an intermediate state where the kernel is broken by your changes.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-07-29  4:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-23 22:05 [PATCH v2 1/2] sparc64: Trim page tables for 8M hugepages Nitin Gupta
2016-06-23 22:05 ` Nitin Gupta
2016-06-23 22:05 ` [PATCH v2 2/2] sparc64: Fix pagetable freeing for hugepage regions Nitin Gupta
2016-06-23 22:05   ` Nitin Gupta
2016-07-29  4:00   ` David Miller
2016-07-29  4:00     ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.