All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
@ 2016-11-23 11:09 Aneesh Kumar K.V
  2016-11-23 11:09 ` [PATCH v5 2/7] powerpc/mm: Rename hugetlb-radix.h to hugetlb.h Aneesh Kumar K.V
                   ` (7 more replies)
  0 siblings, 8 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:09 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

When we are updating pte, we just need to flush the tlb mapping for
that pte. Right now we do a full mm flush because we don't track page
size. Update the interface to track the page size and use that to
do the right tlb flush.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 +++-
 arch/powerpc/include/asm/book3s/64/pgtable.h |  7 +++++--
 arch/powerpc/include/asm/book3s/64/radix.h   | 14 +++++++-------
 arch/powerpc/include/asm/nohash/32/pgtable.h |  4 +++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  4 +++-
 arch/powerpc/mm/pgtable-book3s64.c           |  3 ++-
 arch/powerpc/mm/pgtable-radix.c              | 16 ++++++++++++++++
 arch/powerpc/mm/pgtable.c                    | 10 ++++++++--
 arch/powerpc/mm/tlb-radix.c                  | 15 ---------------
 9 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 6b8b2d57fdc8..cd835e74e633 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -224,7 +224,9 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 
 
 static inline void __ptep_set_access_flags(struct mm_struct *mm,
-					   pte_t *ptep, pte_t entry)
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   unsigned long page_size)
 {
 	unsigned long set = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 86870c11917b..761622ec7f2a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -580,10 +580,13 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
  */
 
 static inline void __ptep_set_access_flags(struct mm_struct *mm,
-					   pte_t *ptep, pte_t entry)
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   unsigned long page_size)
 {
 	if (radix_enabled())
-		return radix__ptep_set_access_flags(mm, ptep, entry);
+		return radix__ptep_set_access_flags(mm, ptep, entry,
+						    address, page_size);
 	return hash__ptep_set_access_flags(ptep, entry);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 2a46dea8e1b1..e104004bf2b1 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -110,6 +110,7 @@
 #define RADIX_PUD_TABLE_SIZE	(sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
 #define RADIX_PGD_TABLE_SIZE	(sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
 
+extern int radix_get_mmu_psize(unsigned long page_size);
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
 {
@@ -167,7 +168,9 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
  * function doesn't need to invalidate tlb.
  */
 static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
-						pte_t *ptep, pte_t entry)
+						pte_t *ptep, pte_t entry,
+						unsigned long address,
+						unsigned long page_size)
 {
 
 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
@@ -175,6 +178,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
 
 	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
 
+		int psize;
 		unsigned long old_pte, new_pte;
 
 		old_pte = __radix_pte_update(ptep, ~0, 0);
@@ -183,12 +187,8 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
 		 * new value of pte
 		 */
 		new_pte = old_pte | set;
-
-		/*
-		 * For now let's do heavy pid flush
-		 * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
-		 */
-		radix__flush_tlb_mm(mm);
+		psize = radix_get_mmu_psize(page_size);
+		radix__flush_tlb_page_psize(mm, address, psize);
 
 		__radix_pte_update(ptep, 0, new_pte);
 	} else
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index c219ef7be53b..4153b8e591a4 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -268,7 +268,9 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 
 
 static inline void __ptep_set_access_flags(struct mm_struct *mm,
-					   pte_t *ptep, pte_t entry)
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   unsigned long page_size)
 {
 	unsigned long set = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 653a1838469d..7e42b8195e85 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -301,7 +301,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
  * function doesn't need to flush the hash entry
  */
 static inline void __ptep_set_access_flags(struct mm_struct *mm,
-					   pte_t *ptep, pte_t entry)
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   unsigned long page_size)
 {
 	unsigned long bits = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f4f437cbabf1..5c7c501b7cae 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 #endif
 	changed = !pmd_same(*(pmdp), entry);
 	if (changed) {
-		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
+		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry),
+					address, HPAGE_PMD_SIZE);
 		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	}
 	return changed;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 688b54517655..416918005395 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -222,6 +222,22 @@ static int __init get_idx_from_shift(unsigned int shift)
 	return idx;
 }
 
+int radix_get_mmu_psize(unsigned long page_size)
+{
+	int psize;
+
+	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
+		psize = mmu_virtual_psize;
+	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
+		psize = MMU_PAGE_2M;
+	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
+		psize = MMU_PAGE_1G;
+	else
+		return -1;
+	return psize;
+}
+
+
 static int __init radix_dt_scan_page_sizes(unsigned long node,
 					   const char *uname, int depth,
 					   void *data)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 911fdfb63ec1..503ae9bd3efe 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -219,12 +219,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 			  pte_t *ptep, pte_t entry, int dirty)
 {
 	int changed;
+	unsigned long page_size;
+
 	entry = set_access_flags_filter(entry, vma, dirty);
 	changed = !pte_same(*(ptep), entry);
 	if (changed) {
-		if (!is_vm_hugetlb_page(vma))
+		if (!is_vm_hugetlb_page(vma)) {
+			page_size = PAGE_SIZE;
 			assert_pte_locked(vma->vm_mm, address);
-		__ptep_set_access_flags(vma->vm_mm, ptep, entry);
+		} else
+			page_size = huge_page_size(hstate_vma(vma));
+		__ptep_set_access_flags(vma->vm_mm, ptep, entry,
+					address, page_size);
 		flush_tlb_page(vma, address);
 	}
 	return changed;
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 3493cf4e0452..81a9f6390f64 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -282,21 +282,6 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 }
 EXPORT_SYMBOL(radix__flush_tlb_range);
 
-static int radix_get_mmu_psize(int page_size)
-{
-	int psize;
-
-	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
-		psize = mmu_virtual_psize;
-	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
-		psize = MMU_PAGE_2M;
-	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
-		psize = MMU_PAGE_1G;
-	else
-		return -1;
-	return psize;
-}
-
 void radix__tlb_flush(struct mmu_gather *tlb)
 {
 	int psize = 0;
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v5 2/7] powerpc/mm: Rename hugetlb-radix.h to hugetlb.h
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
@ 2016-11-23 11:09 ` Aneesh Kumar K.V
  2016-11-23 11:09 ` [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config Aneesh Kumar K.V
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:09 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We will start moving some book3s specific hugetlb functions there.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/{hugetlb-radix.h => hugetlb.h} | 4 ++--
 arch/powerpc/include/asm/hugetlb.h                                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename arch/powerpc/include/asm/book3s/64/{hugetlb-radix.h => hugetlb.h} (90%)

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
similarity index 90%
rename from arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
rename to arch/powerpc/include/asm/book3s/64/hugetlb.h
index c45189aa7476..499268045306 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
-#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
+#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H
 /*
  * For radix we want generic code to handle hugetlb. But then if we want
  * both hash and radix to be enabled together we need to workaround the
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index c5517f463ec7..c03e0a3dd4d8 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -9,7 +9,7 @@ extern struct kmem_cache *hugepte_cache;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
-#include <asm/book3s/64/hugetlb-radix.h>
+#include <asm/book3s/64/hugetlb.h>
 /*
  * This should work for other subarchs too. But right now we use the
  * new format only for 64bit book3s
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
  2016-11-23 11:09 ` [PATCH v5 2/7] powerpc/mm: Rename hugetlb-radix.h to hugetlb.h Aneesh Kumar K.V
@ 2016-11-23 11:09 ` Aneesh Kumar K.V
  2016-11-23 14:08   ` Balbir Singh
  2016-11-23 11:10 ` [PATCH v5 4/7] powerpc/mm/hugetlb: Make copy of huge_ptep_get_and_clear to different platform headers Aneesh Kumar K.V
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:09 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

W.r.t hash page table config, we support 16MB and 16GB as the hugepage
size. Update the hstate_get_psize to handle 16M and 16G.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 499268045306..d9c283f95e05 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -21,6 +21,10 @@ static inline int hstate_get_psize(struct hstate *hstate)
 		return MMU_PAGE_2M;
 	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
 		return MMU_PAGE_1G;
+	else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift)
+		return MMU_PAGE_16M;
+	else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift)
+		return MMU_PAGE_16G;
 	else {
 		WARN(1, "Wrong huge page shift\n");
 		return mmu_virtual_psize;
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v5 4/7] powerpc/mm/hugetlb: Make copy of huge_ptep_get_and_clear to different platform headers
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
  2016-11-23 11:09 ` [PATCH v5 2/7] powerpc/mm: Rename hugetlb-radix.h to hugetlb.h Aneesh Kumar K.V
  2016-11-23 11:09 ` [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config Aneesh Kumar K.V
@ 2016-11-23 11:10 ` Aneesh Kumar K.V
  2016-11-23 11:10 ` [PATCH v5 5/7] powerpc/mm/hugetlb: Switch hugetlb update to use huge_pte_update Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:10 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

In the subsequent patch we will change the implementation of book3s 64. This
also avoid #ifdef in the code.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  5 +++++
 arch/powerpc/include/asm/book3s/64/hugetlb.h |  6 ++++++
 arch/powerpc/include/asm/hugetlb.h           | 10 ----------
 arch/powerpc/include/asm/nohash/32/pgtable.h |  6 ++++++
 arch/powerpc/include/asm/nohash/64/pgtable.h |  7 +++++++
 5 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index cd835e74e633..3994c403ad9a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -222,6 +222,11 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	ptep_set_wrprotect(mm, addr, ptep);
 }
 
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(ptep, ~0UL, 0));
+}
 
 static inline void __ptep_set_access_flags(struct mm_struct *mm,
 					   pte_t *ptep, pte_t entry,
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index d9c283f95e05..8fc04d2ac86f 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -30,4 +30,10 @@ static inline int hstate_get_psize(struct hstate *hstate)
 		return mmu_virtual_psize;
 	}
 }
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
+}
 #endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index c03e0a3dd4d8..8b39806851c0 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -132,16 +132,6 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	set_pte_at(mm, addr, ptep, pte);
 }
 
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-#ifdef CONFIG_PPC64
-	return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
-#else
-	return __pte(pte_update(ptep, ~0UL, 0));
-#endif
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 4153b8e591a4..8569673682dd 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -266,6 +266,12 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	ptep_set_wrprotect(mm, addr, ptep);
 }
 
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(ptep, ~0UL, 0));
+}
+
 
 static inline void __ptep_set_access_flags(struct mm_struct *mm,
 					   pte_t *ptep, pte_t entry,
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 7e42b8195e85..f7a973d4b509 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -266,6 +266,13 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
 }
 
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
+}
+
+
 /*
  * We currently remove entries from the hashtable regardless of whether
  * the entry was young or dirty. The generic routines only flush if the
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v5 5/7] powerpc/mm/hugetlb: Switch hugetlb update to use huge_pte_update
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2016-11-23 11:10 ` [PATCH v5 4/7] powerpc/mm/hugetlb: Make copy of huge_ptep_get_and_clear to different platform headers Aneesh Kumar K.V
@ 2016-11-23 11:10 ` Aneesh Kumar K.V
  2016-11-23 11:10 ` [PATCH v5 6/7] powerpc/mm: update pte_update to not do full mm tlb flush Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:10 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We want to switch pte_update to use va based tlb flush. In order to do that we
need to track the page size. With hugetlb we currently don't have page size
available in these functions. Hence switch hugetlb to use seperate functions
for update. In later patch we will update hugetlb functions to take
vm_area_struct from which we can derive the page size. After that we will switch
this back to use pte_update

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hugetlb.h | 43 +++++++++++++++++++++++++++-
 arch/powerpc/include/asm/book3s/64/pgtable.h |  9 ------
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 8fc04d2ac86f..586236625117 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -31,9 +31,50 @@ static inline int hstate_get_psize(struct hstate *hstate)
 	}
 }
 
+static inline unsigned long huge_pte_update(struct mm_struct *mm, unsigned long addr,
+					    pte_t *ptep, unsigned long clr,
+					    unsigned long set)
+{
+	if (radix_enabled()) {
+		unsigned long old_pte;
+
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+
+			unsigned long new_pte;
+
+			old_pte = __radix_pte_update(ptep, ~0, 0);
+			asm volatile("ptesync" : : : "memory");
+			/*
+			 * new value of pte
+			 */
+			new_pte = (old_pte | set) & ~clr;
+			/*
+			 * For now let's do heavy pid flush
+			 * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
+			 */
+			radix__flush_tlb_mm(mm);
+
+			__radix_pte_update(ptep, 0, new_pte);
+		} else
+			old_pte = __radix_pte_update(ptep, clr, set);
+		asm volatile("ptesync" : : : "memory");
+		return old_pte;
+	}
+	return hash__pte_update(mm, addr, ptep, clr, set, true);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
+		return;
+
+	huge_pte_update(mm, addr, ptep, _PAGE_WRITE, 0);
+}
+
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep)
 {
-	return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
+	return __pte(huge_pte_update(mm, addr, ptep, ~0UL, 0));
 }
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 761622ec7f2a..4a2bd260eec0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -346,15 +346,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 }
 
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
-}
-
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long addr, pte_t *ptep)
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v5 6/7] powerpc/mm: update pte_update to not do full mm tlb flush
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2016-11-23 11:10 ` [PATCH v5 5/7] powerpc/mm/hugetlb: Switch hugetlb update to use huge_pte_update Aneesh Kumar K.V
@ 2016-11-23 11:10 ` Aneesh Kumar K.V
  2016-11-23 11:10 ` [PATCH v5 7/7] powerpc/mm: Batch tlb flush when invalidating pte entries Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:10 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

When we are updating pte, we just need to flush the tlb mapping for
that pte. Right now we do a full mm flush because we don't track page
size. Update the interface to track the page size and use that to
do the right tlb flush.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 16 ++++++++++------
 arch/powerpc/include/asm/book3s/64/radix.h   | 19 ++++++++-----------
 arch/powerpc/mm/pgtable-radix.c              |  2 +-
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4a2bd260eec0..7789ce64beb1 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -301,12 +301,16 @@ extern unsigned long pci_io_base;
 
 static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep, unsigned long clr,
-				       unsigned long set, int huge)
+				       unsigned long set,
+				       unsigned long page_size)
 {
+	bool huge = (page_size != PAGE_SIZE);
+
 	if (radix_enabled())
-		return radix__pte_update(mm, addr, ptep, clr, set, huge);
+		return radix__pte_update(mm, addr, ptep, clr, set, page_size);
 	return hash__pte_update(mm, addr, ptep, clr, set, huge);
 }
+
 /*
  * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
  * We currently remove entries from the hashtable regardless of whether
@@ -324,7 +328,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 
 	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
 		return 0;
-	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, PAGE_SIZE);
 	return (old & _PAGE_ACCESSED) != 0;
 }
 
@@ -343,21 +347,21 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, PAGE_SIZE);
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long addr, pte_t *ptep)
 {
-	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, PAGE_SIZE);
 	return __pte(old);
 }
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 			     pte_t * ptep)
 {
-	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	pte_update(mm, addr, ptep, ~0UL, 0, PAGE_SIZE);
 }
 
 static inline int pte_write(pte_t pte)
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index e104004bf2b1..da94bdae1f88 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -129,15 +129,16 @@ static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 
 
 static inline unsigned long radix__pte_update(struct mm_struct *mm,
-					unsigned long addr,
-					pte_t *ptep, unsigned long clr,
-					unsigned long set,
-					int huge)
+					      unsigned long addr,
+					      pte_t *ptep, unsigned long clr,
+					      unsigned long set,
+					      unsigned long page_size)
 {
 	unsigned long old_pte;
 
 	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
 
+		int psize;
 		unsigned long new_pte;
 
 		old_pte = __radix_pte_update(ptep, ~0, 0);
@@ -146,18 +147,14 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
 		 * new value of pte
 		 */
 		new_pte = (old_pte | set) & ~clr;
-
-		/*
-		 * For now let's do heavy pid flush
-		 * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
-		 */
-		radix__flush_tlb_mm(mm);
+		psize = radix_get_mmu_psize(page_size);
+		radix__flush_tlb_page_psize(mm, addr, psize);
 
 		__radix_pte_update(ptep, 0, new_pte);
 	} else
 		old_pte = __radix_pte_update(ptep, clr, set);
 	asm volatile("ptesync" : : : "memory");
-	if (!huge)
+	if (page_size == PAGE_SIZE)
 		assert_pte_locked(mm, addr);
 
 	return old_pte;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 416918005395..2fc7336619b3 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -486,7 +486,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
 	assert_spin_locked(&mm->page_table_lock);
 #endif
 
-	old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
+	old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, HPAGE_PMD_SIZE);
 	trace_hugepage_update(addr, old, clr, set);
 
 	return old;
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v5 7/7] powerpc/mm: Batch tlb flush when invalidating pte entries
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2016-11-23 11:10 ` [PATCH v5 6/7] powerpc/mm: update pte_update to not do full mm tlb flush Aneesh Kumar K.V
@ 2016-11-23 11:10 ` Aneesh Kumar K.V
  2016-11-23 11:23 ` [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Balbir Singh
  2016-11-25  2:48 ` Paul Mackerras
  7 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:10 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This will improve the task exit case, by batching tlb invalidates.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index da94bdae1f88..2c3b93a628d7 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -142,15 +142,21 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
 		unsigned long new_pte;
 
 		old_pte = __radix_pte_update(ptep, ~0, 0);
-		asm volatile("ptesync" : : : "memory");
 		/*
 		 * new value of pte
 		 */
 		new_pte = (old_pte | set) & ~clr;
-		psize = radix_get_mmu_psize(page_size);
-		radix__flush_tlb_page_psize(mm, addr, psize);
-
-		__radix_pte_update(ptep, 0, new_pte);
+		/*
+		 * If we are trying to clear the pte, we can skip
+		 * the below sequence and batch the tlb flush. The
+		 * tlb flush batching is done by mmu gather code
+		 */
+		if (new_pte) {
+			asm volatile("ptesync" : : : "memory");
+			psize = radix_get_mmu_psize(page_size);
+			radix__flush_tlb_page_psize(mm, addr, psize);
+			__radix_pte_update(ptep, 0, new_pte);
+		}
 	} else
 		old_pte = __radix_pte_update(ptep, clr, set);
 	asm volatile("ptesync" : : : "memory");
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
                   ` (5 preceding siblings ...)
  2016-11-23 11:10 ` [PATCH v5 7/7] powerpc/mm: Batch tlb flush when invalidating pte entries Aneesh Kumar K.V
@ 2016-11-23 11:23 ` Balbir Singh
  2016-11-23 11:53   ` Aneesh Kumar K.V
  2016-11-25  2:48 ` Paul Mackerras
  7 siblings, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2016-11-23 11:23 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 23/11/16 22:09, Aneesh Kumar K.V wrote:
> When we are updating pte, we just need to flush the tlb mapping for
> that pte. Right now we do a full mm flush because we don't track page
> size. Update the interface to track the page size and use that to
> do the right tlb flush.
> 

Could you also clarify the scope -- this seems to be _radix_ only.
The problem statement is not very clear and why doesn't the flush_tlb_page()
following ptep_set_access_flags() work? What else do we need to do?


> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/32/pgtable.h |  4 +++-
>  arch/powerpc/include/asm/book3s/64/pgtable.h |  7 +++++--
>  arch/powerpc/include/asm/book3s/64/radix.h   | 14 +++++++-------
>  arch/powerpc/include/asm/nohash/32/pgtable.h |  4 +++-
>  arch/powerpc/include/asm/nohash/64/pgtable.h |  4 +++-
>  arch/powerpc/mm/pgtable-book3s64.c           |  3 ++-
>  arch/powerpc/mm/pgtable-radix.c              | 16 ++++++++++++++++
>  arch/powerpc/mm/pgtable.c                    | 10 ++++++++--
>  arch/powerpc/mm/tlb-radix.c                  | 15 ---------------
>  9 files changed, 47 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 6b8b2d57fdc8..cd835e74e633 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -224,7 +224,9 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
>  
>  
>  static inline void __ptep_set_access_flags(struct mm_struct *mm,
> -					   pte_t *ptep, pte_t entry)
> +					   pte_t *ptep, pte_t entry,
> +					   unsigned long address,
> +					   unsigned long page_size)
>  {
>  	unsigned long set = pte_val(entry) &
>  		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 86870c11917b..761622ec7f2a 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -580,10 +580,13 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
>   */
>  
>  static inline void __ptep_set_access_flags(struct mm_struct *mm,
> -					   pte_t *ptep, pte_t entry)
> +					   pte_t *ptep, pte_t entry,
> +					   unsigned long address,
> +					   unsigned long page_size)
>  {
>  	if (radix_enabled())
> -		return radix__ptep_set_access_flags(mm, ptep, entry);
> +		return radix__ptep_set_access_flags(mm, ptep, entry,
> +						    address, page_size);
>  	return hash__ptep_set_access_flags(ptep, entry);
>  }
>  
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 2a46dea8e1b1..e104004bf2b1 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -110,6 +110,7 @@
>  #define RADIX_PUD_TABLE_SIZE	(sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
>  #define RADIX_PGD_TABLE_SIZE	(sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
>  
> +extern int radix_get_mmu_psize(unsigned long page_size);
>  static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
>  					       unsigned long set)
>  {
> @@ -167,7 +168,9 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
>   * function doesn't need to invalidate tlb.
>   */
>  static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
> -						pte_t *ptep, pte_t entry)
> +						pte_t *ptep, pte_t entry,
> +						unsigned long address,
> +						unsigned long page_size)
>  {
>  
>  	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
> @@ -175,6 +178,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
>  
>  	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
>  
> +		int psize;
>  		unsigned long old_pte, new_pte;
>  
>  		old_pte = __radix_pte_update(ptep, ~0, 0);
> @@ -183,12 +187,8 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
>  		 * new value of pte
>  		 */
>  		new_pte = old_pte | set;
> -
> -		/*
> -		 * For now let's do heavy pid flush
> -		 * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
> -		 */
> -		radix__flush_tlb_mm(mm);
> +		psize = radix_get_mmu_psize(page_size);
> +		radix__flush_tlb_page_psize(mm, address, psize);
>  
>  		__radix_pte_update(ptep, 0, new_pte);
>  	} else
> diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
> index c219ef7be53b..4153b8e591a4 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@ -268,7 +268,9 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
>  
>  
>  static inline void __ptep_set_access_flags(struct mm_struct *mm,
> -					   pte_t *ptep, pte_t entry)
> +					   pte_t *ptep, pte_t entry,
> +					   unsigned long address,
> +					   unsigned long page_size)
>  {
>  	unsigned long set = pte_val(entry) &
>  		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
> diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
> index 653a1838469d..7e42b8195e85 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
> @@ -301,7 +301,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
>   * function doesn't need to flush the hash entry
>   */
>  static inline void __ptep_set_access_flags(struct mm_struct *mm,
> -					   pte_t *ptep, pte_t entry)
> +					   pte_t *ptep, pte_t entry,
> +					   unsigned long address,
> +					   unsigned long page_size)
>  {
>  	unsigned long bits = pte_val(entry) &
>  		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index f4f437cbabf1..5c7c501b7cae 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>  #endif
>  	changed = !pmd_same(*(pmdp), entry);
>  	if (changed) {
> -		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
> +		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry),
> +					address, HPAGE_PMD_SIZE);
>  		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>  	}
>  	return changed;
> diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
> index 688b54517655..416918005395 100644
> --- a/arch/powerpc/mm/pgtable-radix.c
> +++ b/arch/powerpc/mm/pgtable-radix.c
> @@ -222,6 +222,22 @@ static int __init get_idx_from_shift(unsigned int shift)
>  	return idx;
>  }
>  
> +int radix_get_mmu_psize(unsigned long page_size)
> +{
> +	int psize;
> +
> +	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
> +		psize = mmu_virtual_psize;
> +	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
> +		psize = MMU_PAGE_2M;
> +	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
> +		psize = MMU_PAGE_1G;
> +	else
> +		return -1;
> +	return psize;
> +}
> +
> +
>  static int __init radix_dt_scan_page_sizes(unsigned long node,
>  					   const char *uname, int depth,
>  					   void *data)
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 911fdfb63ec1..503ae9bd3efe 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -219,12 +219,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>  			  pte_t *ptep, pte_t entry, int dirty)
>  {
>  	int changed;
> +	unsigned long page_size;
> +
>  	entry = set_access_flags_filter(entry, vma, dirty);
>  	changed = !pte_same(*(ptep), entry);
>  	if (changed) {
> -		if (!is_vm_hugetlb_page(vma))
> +		if (!is_vm_hugetlb_page(vma)) {
> +			page_size = PAGE_SIZE;
>  			assert_pte_locked(vma->vm_mm, address);
> -		__ptep_set_access_flags(vma->vm_mm, ptep, entry);
> +		} else
> +			page_size = huge_page_size(hstate_vma(vma));
> +		__ptep_set_access_flags(vma->vm_mm, ptep, entry,
> +					address, page_size);
>  		flush_tlb_page(vma, address);

Can't we use this flush_tlb_page() or a modification inside it to do the job?

Thanks,
Balbir

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-23 11:23 ` [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Balbir Singh
@ 2016-11-23 11:53   ` Aneesh Kumar K.V
  2016-11-23 14:05     ` Balbir Singh
  0 siblings, 1 reply; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 11:53 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> On 23/11/16 22:09, Aneesh Kumar K.V wrote:
>> When we are updating pte, we just need to flush the tlb mapping for
>> that pte. Right now we do a full mm flush because we don't track page
>> size. Update the interface to track the page size and use that to
>> do the right tlb flush.
>> 
>
> Could you also clarify the scope -- this seems to be _radix_ only.
> The problem statement is not very clear and why doesn't the flush_tlb_page()
> following ptep_set_access_flags() work? What else do we need to do?

Yes it modifies only radix part.  Don't understand the flush_tlb_page()
part of the comment above. We are modifying the tlbflush that we need to do in the pte update
sequence for DD1. ie, we need to do the flush before we can set the pte
with new value.

Also in this specific case, we can idealy drop that flush_tlb_page,
because relaxing an access really don't need a tlb flush from generic
architecture point of view. I left it there to make sure, we measure and
get the invalidate path correct before going ahead with that
optimization.


>
>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/32/pgtable.h |  4 +++-
>>  arch/powerpc/include/asm/book3s/64/pgtable.h |  7 +++++--
>>  arch/powerpc/include/asm/book3s/64/radix.h   | 14 +++++++-------
>>  arch/powerpc/include/asm/nohash/32/pgtable.h |  4 +++-
>>  arch/powerpc/include/asm/nohash/64/pgtable.h |  4 +++-
>>  arch/powerpc/mm/pgtable-book3s64.c           |  3 ++-
>>  arch/powerpc/mm/pgtable-radix.c              | 16 ++++++++++++++++
>>  arch/powerpc/mm/pgtable.c                    | 10 ++++++++--
>>  arch/powerpc/mm/tlb-radix.c                  | 15 ---------------
>>  9 files changed, 47 insertions(+), 30 deletions(-)
>> 

-aneesh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-23 11:53   ` Aneesh Kumar K.V
@ 2016-11-23 14:05     ` Balbir Singh
  2016-11-23 14:36       ` Aneesh Kumar K.V
  0 siblings, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2016-11-23 14:05 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 23/11/16 22:53, Aneesh Kumar K.V wrote:
> Balbir Singh <bsingharora@gmail.com> writes:
> 
>> On 23/11/16 22:09, Aneesh Kumar K.V wrote:
>>> When we are updating pte, we just need to flush the tlb mapping for
>>> that pte. Right now we do a full mm flush because we don't track page
>>> size. Update the interface to track the page size and use that to
>>> do the right tlb flush.
>>>
>>
>> Could you also clarify the scope -- this seems to be _radix_ only.
>> The problem statement is not very clear and why doesn't the flush_tlb_page()
>> following ptep_set_access_flags() work? What else do we need to do?
> 
> Yes it modifies only radix part.  Don't understand the flush_tlb_page()
> part of the comment above. We are modifying the tlbflush that we need to do in the pte update
> sequence for DD1. ie, we need to do the flush before we can set the pte
> with new value.
> 
> Also in this specific case, we can idealy drop that flush_tlb_page,
> because relaxing an access really don't need a tlb flush from generic
> architecture point of view. I left it there to make sure, we measure and
> get the invalidate path correct before going ahead with that
> optimization.
> 

OK.. here is my untested solution. I've only compiled it.
It breaks the 64/hash/radix abstractions, but it makes the
changes much simpler

Signed-off-by: Balbir Singh <bsingharora@gmail.com>

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 2a46dea..2454217 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -162,6 +162,30 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
 	return old_pte;
 }
 
+static inline void radix__ptep_dd1_set_access_flags(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t entry,
+						unsigned long page_size)
+{
+
+	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
+					      _PAGE_RW | _PAGE_EXEC);
+
+	unsigned long old_pte, new_pte;
+
+	old_pte = __radix_pte_update(ptep, ~0, 0);
+	asm volatile("ptesync" : : : "memory");
+	/*
+	 * new value of pte
+	 */
+	new_pte = old_pte | set;
+
+	radix__flush_tlb_page_psize(mm, addr, page_size);
+	__radix_pte_update(ptep, 0, new_pte);
+
+	asm volatile("ptesync" : : : "memory");
+}
+
 /*
  * Set the dirty and/or accessed bits atomically in a linux PTE, this
  * function doesn't need to invalidate tlb.
@@ -173,26 +197,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
 					      _PAGE_RW | _PAGE_EXEC);
 
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-
-		unsigned long old_pte, new_pte;
-
-		old_pte = __radix_pte_update(ptep, ~0, 0);
-		asm volatile("ptesync" : : : "memory");
-		/*
-		 * new value of pte
-		 */
-		new_pte = old_pte | set;
-
-		/*
-		 * For now let's do heavy pid flush
-		 * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
-		 */
-		radix__flush_tlb_mm(mm);
-
-		__radix_pte_update(ptep, 0, new_pte);
-	} else
-		__radix_pte_update(ptep, 0, set);
+	__radix_pte_update(ptep, 0, set);
 	asm volatile("ptesync" : : : "memory");
 }
 
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f4f437c..0c7ee0e 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -7,12 +7,14 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/kernel.h>
 #include <linux/sched.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
 #include "mmu_decl.h"
 #include <trace/events/thp.h>
+#include <linux/hugetlb.h>
 
 int (*register_process_table)(unsigned long base, unsigned long page_size,
 			      unsigned long tbl_size);
@@ -35,7 +37,15 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 #endif
 	changed = !pmd_same(*(pmdp), entry);
 	if (changed) {
-		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
+		if (radix_enabled() && cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+			unsigned long page_size;
+			page_size = is_vm_hugetlb_page(vma) ? PAGE_SIZE :
+					huge_page_size(hstate_vma(vma));
+
+			radix__ptep_dd1_set_access_flags(vma->vm_mm, address,
+					pmdp_ptep(pmdp), pmd_pte(entry), page_size);
+		} else
+			__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
 		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	}
 	return changed;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 911fdfb..ebb464e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,7 +224,16 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	if (changed) {
 		if (!is_vm_hugetlb_page(vma))
 			assert_pte_locked(vma->vm_mm, address);
-		__ptep_set_access_flags(vma->vm_mm, ptep, entry);
+		/* Workaround to call radix update directly for DD1 */
+		if (radix_enabled() && cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+			unsigned long page_size;
+			page_size = is_vm_hugetlb_page(vma) ? PAGE_SIZE :
+					huge_page_size(hstate_vma(vma));
+
+			radix__ptep_dd1_set_access_flags(vma->vm_mm, address,
+					ptep, entry, page_size);
+		} else
+			__ptep_set_access_flags(vma->vm_mm, ptep, entry);
 		flush_tlb_page(vma, address);
 	}
 	return changed;

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config
  2016-11-23 11:09 ` [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config Aneesh Kumar K.V
@ 2016-11-23 14:08   ` Balbir Singh
  2016-11-23 14:30     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2016-11-23 14:08 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 23/11/16 22:09, Aneesh Kumar K.V wrote:
> W.r.t hash page table config, we support 16MB and 16GB as the hugepage
> size. Update the hstate_get_psize to handle 16M and 16G.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
> index 499268045306..d9c283f95e05 100644
> --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
> +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
> @@ -21,6 +21,10 @@ static inline int hstate_get_psize(struct hstate *hstate)
>  		return MMU_PAGE_2M;
>  	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
>  		return MMU_PAGE_1G;
> +	else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift)
> +		return MMU_PAGE_16M;
> +	else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift)
> +		return MMU_PAGE_16G;
>  	else {
>  		WARN(1, "Wrong huge page shift\n");
>  		return mmu_virtual_psize;
> 

Is this related to this patch series? Radix can't do these sizes

Balbir

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config
  2016-11-23 14:08   ` Balbir Singh
@ 2016-11-23 14:30     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 14:30 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> On 23/11/16 22:09, Aneesh Kumar K.V wrote:
>> W.r.t hash page table config, we support 16MB and 16GB as the hugepage
>> size. Update the hstate_get_psize to handle 16M and 16G.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 ++++
>>  1 file changed, 4 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
>> index 499268045306..d9c283f95e05 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
>> @@ -21,6 +21,10 @@ static inline int hstate_get_psize(struct hstate *hstate)
>>  		return MMU_PAGE_2M;
>>  	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
>>  		return MMU_PAGE_1G;
>> +	else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift)
>> +		return MMU_PAGE_16M;
>> +	else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift)
>> +		return MMU_PAGE_16G;
>>  	else {
>>  		WARN(1, "Wrong huge page shift\n");
>>  		return mmu_virtual_psize;
>> 
>
> Is this related to this patch series? Radix can't do these sizes
>

The code returns the psize (the index value of the page size ) from hstate.
It doesn't make any verification. I added the hash details here because
this header is now suppose to contain generic functions not radix
specific one.

-aneesh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-23 14:05     ` Balbir Singh
@ 2016-11-23 14:36       ` Aneesh Kumar K.V
  2016-11-23 15:21         ` Balbir Singh
  0 siblings, 1 reply; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-23 14:36 UTC (permalink / raw)
  To: Balbir Singh, benh, paulus, mpe; +Cc: linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> On 23/11/16 22:53, Aneesh Kumar K.V wrote:
>> Balbir Singh <bsingharora@gmail.com> writes:
>> 
>>> On 23/11/16 22:09, Aneesh Kumar K.V wrote:
>>>> When we are updating pte, we just need to flush the tlb mapping for
>>>> that pte. Right now we do a full mm flush because we don't track page
>>>> size. Update the interface to track the page size and use that to
>>>> do the right tlb flush.
>>>>
>>>
>>> Could you also clarify the scope -- this seems to be _radix_ only.
>>> The problem statement is not very clear and why doesn't the flush_tlb_page()
>>> following ptep_set_access_flags() work? What else do we need to do?
>> 
>> Yes it modifies only radix part.  Don't understand the flush_tlb_page()
>> part of the comment above. We are modifying the tlbflush that we need to do in the pte update
>> sequence for DD1. ie, we need to do the flush before we can set the pte
>> with new value.
>> 
>> Also in this specific case, we can idealy drop that flush_tlb_page,
>> because relaxing an access really don't need a tlb flush from generic
>> architecture point of view. I left it there to make sure, we measure and
>> get the invalidate path correct before going ahead with that
>> optimization.
>> 
>
> OK.. here is my untested solution. I've only compiled it.
> It breaks the 64/hash/radix abstractions, but it makes the
> changes much simpler
>
> Signed-off-by: Balbir Singh <bsingharora@gmail.com>

I find the below one more confusing and complicated, spreading the
details of DD1 around the code. I am not sure what extra i could have
done to simplify the code. We have done the arch pte updates such that
most of the update use the pte_update() interface and the one which relax
the access bits get to ptep_set_access_flag. All pte updated rules are
contained there. What you did below is that you moved the dd1 sequence
out to a place where page size is available. What I did in my patch is to
pass page size around. IMHO it is a matter of style. I also want to pass
page size around so that we keep huge_pte_update, pte_update,
ptep_set_access_flags all similar.


>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 2a46dea..2454217 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -162,6 +162,30 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
>  	return old_pte;
>  }
>
> +static inline void radix__ptep_dd1_set_access_flags(struct mm_struct *mm,
> +						unsigned long addr,
> +						pte_t *ptep, pte_t entry,
> +						unsigned long page_size)
> +{
> +
> +	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
> +					      _PAGE_RW | _PAGE_EXEC);
> +
> +	unsigned long old_pte, new_pte;
> +
> +	old_pte = __radix_pte_update(ptep, ~0, 0);
> +	asm volatile("ptesync" : : : "memory");
> +	/*
> +	 * new value of pte
> +	 */
> +	new_pte = old_pte | set;
> +
> +	radix__flush_tlb_page_psize(mm, addr, page_size);
> +	__radix_pte_update(ptep, 0, new_pte);
> +
> +	asm volatile("ptesync" : : : "memory");
> +}
> +
>  /*
>   * Set the dirty and/or accessed bits atomically in a linux PTE, this
>   * function doesn't need to invalidate tlb.
> @@ -173,26 +197,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
>  	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
>  					      _PAGE_RW | _PAGE_EXEC);
>
> -	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
> -
> -		unsigned long old_pte, new_pte;
> -
> -		old_pte = __radix_pte_update(ptep, ~0, 0);
> -		asm volatile("ptesync" : : : "memory");
> -		/*
> -		 * new value of pte
> -		 */
> -		new_pte = old_pte | set;
> -
> -		/*
> -		 * For now let's do heavy pid flush
> -		 * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
> -		 */
> -		radix__flush_tlb_mm(mm);
> -
> -		__radix_pte_update(ptep, 0, new_pte);
> -	} else
> -		__radix_pte_update(ptep, 0, set);
> +	__radix_pte_update(ptep, 0, set);
>  	asm volatile("ptesync" : : : "memory");
>  }
>
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index f4f437c..0c7ee0e 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -7,12 +7,14 @@
>   * 2 of the License, or (at your option) any later version.
>   */
>
> +#include <linux/kernel.h>
>  #include <linux/sched.h>
>  #include <asm/pgalloc.h>
>  #include <asm/tlb.h>
>
>  #include "mmu_decl.h"
>  #include <trace/events/thp.h>
> +#include <linux/hugetlb.h>
>
>  int (*register_process_table)(unsigned long base, unsigned long page_size,
>  			      unsigned long tbl_size);
> @@ -35,7 +37,15 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>  #endif
>  	changed = !pmd_same(*(pmdp), entry);
>  	if (changed) {
> -		__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
> +		if (radix_enabled() && cpu_has_feature(CPU_FTR_POWER9_DD1)) {
> +			unsigned long page_size;
> +			page_size = is_vm_hugetlb_page(vma) ? PAGE_SIZE :
> +					huge_page_size(hstate_vma(vma));
> +
> +			radix__ptep_dd1_set_access_flags(vma->vm_mm, address,
> +					pmdp_ptep(pmdp), pmd_pte(entry), page_size);
> +		} else
> +			__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
>  		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>  	}
>  	return changed;
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 911fdfb..ebb464e 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -224,7 +224,16 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>  	if (changed) {
>  		if (!is_vm_hugetlb_page(vma))
>  			assert_pte_locked(vma->vm_mm, address);
> -		__ptep_set_access_flags(vma->vm_mm, ptep, entry);
> +		/* Workaround to call radix update directly for DD1 */
> +		if (radix_enabled() && cpu_has_feature(CPU_FTR_POWER9_DD1)) {
> +			unsigned long page_size;
> +			page_size = is_vm_hugetlb_page(vma) ? PAGE_SIZE :
> +					huge_page_size(hstate_vma(vma));
> +
> +			radix__ptep_dd1_set_access_flags(vma->vm_mm, address,
> +					ptep, entry, page_size);
> +		} else
> +			__ptep_set_access_flags(vma->vm_mm, ptep, entry);
>  		flush_tlb_page(vma, address);
>  	}
>  	return changed;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-23 14:36       ` Aneesh Kumar K.V
@ 2016-11-23 15:21         ` Balbir Singh
  0 siblings, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2016-11-23 15:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus, mpe; +Cc: linuxppc-dev



On 24/11/16 01:36, Aneesh Kumar K.V wrote:
> Balbir Singh <bsingharora@gmail.com> writes:
> 
>> On 23/11/16 22:53, Aneesh Kumar K.V wrote:
>>> Balbir Singh <bsingharora@gmail.com> writes:
>>>
>>>> On 23/11/16 22:09, Aneesh Kumar K.V wrote:
>>>>> When we are updating pte, we just need to flush the tlb mapping for
>>>>> that pte. Right now we do a full mm flush because we don't track page
>>>>> size. Update the interface to track the page size and use that to
>>>>> do the right tlb flush.
>>>>>
>>>>
>>>> Could you also clarify the scope -- this seems to be _radix_ only.
>>>> The problem statement is not very clear and why doesn't the flush_tlb_page()
>>>> following ptep_set_access_flags() work? What else do we need to do?
>>>
>>> Yes it modifies only radix part.  Don't understand the flush_tlb_page()
>>> part of the comment above. We are modifying the tlbflush that we need to do in the pte update
>>> sequence for DD1. ie, we need to do the flush before we can set the pte
>>> with new value.
>>>
>>> Also in this specific case, we can idealy drop that flush_tlb_page,
>>> because relaxing an access really don't need a tlb flush from generic
>>> architecture point of view. I left it there to make sure, we measure and
>>> get the invalidate path correct before going ahead with that
>>> optimization.
>>>
>>
>> OK.. here is my untested solution. I've only compiled it.
>> It breaks the 64/hash/radix abstractions, but it makes the
>> changes much simpler
>>
>> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
> 
> I find the below one more confusing and complicated, spreading the
> details of DD1 around the code. I am not sure what extra i could have
> done to simplify the code. We have done the arch pte updates such that
> most of the update use the pte_update() interface and the one which relax
> the access bits get to ptep_set_access_flag. All pte updated rules are
> contained there. What you did below is that you moved the dd1 sequence
> out to a place where page size is available. What I did in my patch is to
> pass page size around. IMHO it is a matter of style. I also want to pass
> page size around so that we keep huge_pte_update, pte_update,
> ptep_set_access_flags all similar.
> 

Agreed and the reason I did it that way is that after a while we know
the _dd1_ variants need not be supported/maintained at all. It is a
matter of style and I was wondering if we need to change the API
to pass address and page_size as a permanent solution.

Balbir Singh.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
                   ` (6 preceding siblings ...)
  2016-11-23 11:23 ` [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Balbir Singh
@ 2016-11-25  2:48 ` Paul Mackerras
  2016-11-25  4:19   ` Aneesh Kumar K.V
  7 siblings, 1 reply; 18+ messages in thread
From: Paul Mackerras @ 2016-11-25  2:48 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, mpe, linuxppc-dev

On Wed, Nov 23, 2016 at 04:39:57PM +0530, Aneesh Kumar K.V wrote:
> When we are updating pte, we just need to flush the tlb mapping for
> that pte. Right now we do a full mm flush because we don't track page
> size. Update the interface to track the page size and use that to
> do the right tlb flush.
[...]

> +int radix_get_mmu_psize(unsigned long page_size)
> +{
> +	int psize;
> +
> +	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
> +		psize = mmu_virtual_psize;
> +	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
> +		psize = MMU_PAGE_2M;
> +	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
> +		psize = MMU_PAGE_1G;

Do we actually have support for 1G pages yet?  I couldn't see where
they get instantiated.

> +	else
> +		return -1;
> +	return psize;
> +}
> +
> +
>  static int __init radix_dt_scan_page_sizes(unsigned long node,
>  					   const char *uname, int depth,
>  					   void *data)
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 911fdfb63ec1..503ae9bd3efe 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -219,12 +219,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>  			  pte_t *ptep, pte_t entry, int dirty)
>  {
>  	int changed;
> +	unsigned long page_size;
> +
>  	entry = set_access_flags_filter(entry, vma, dirty);
>  	changed = !pte_same(*(ptep), entry);
>  	if (changed) {
> -		if (!is_vm_hugetlb_page(vma))
> +		if (!is_vm_hugetlb_page(vma)) {
> +			page_size = PAGE_SIZE;
>  			assert_pte_locked(vma->vm_mm, address);
> -		__ptep_set_access_flags(vma->vm_mm, ptep, entry);
> +		} else
> +			page_size = huge_page_size(hstate_vma(vma));

I don't understand how this can work with THP.  You're determining the
page size using only the VMA, but with a THP VMA surely we get
different page sizes at different addresses?

More generally, I'm OK with adding the address parameter to
__ptep_set_access_flags, but I think Ben's suggestion of encoding the
page size in the PTE value is a good one.  I think it is as simple as
the patch below (assuming we only support 2MB large pages for now).
That would simplify things a bit and also it would mean that we are
sure we know the page size correctly even with THP.

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9fd77f8..e4f3581 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -32,7 +32,8 @@
 #define _PAGE_SOFT_DIRTY	0x00000
 #endif
 #define _PAGE_SPECIAL		_RPAGE_SW2 /* software: special page */
-
+#define _PAGE_GIGANTIC		_RPAGE_SW0	/* software: 1GB page */
+#define _PAGE_LARGE		_RPAGE_SW1	/* software: 2MB page */
 
 #define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
 #define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f4f437c..7ff0289 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -86,7 +86,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
 {
 	unsigned long pmdv;
 
-	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+	pmdv = ((pfn << PAGE_SHIFT) & PTE_RPN_MASK) | _PAGE_LARGE;
 	return pmd_set_protbits(__pmd(pmdv), pgprot);
 }
 
Paul.

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-25  2:48 ` Paul Mackerras
@ 2016-11-25  4:19   ` Aneesh Kumar K.V
  2016-11-25  7:05     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-25  4:19 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, mpe, linuxppc-dev

Paul Mackerras <paulus@ozlabs.org> writes:

> On Wed, Nov 23, 2016 at 04:39:57PM +0530, Aneesh Kumar K.V wrote:
>> When we are updating pte, we just need to flush the tlb mapping for
>> that pte. Right now we do a full mm flush because we don't track page
>> size. Update the interface to track the page size and use that to
>> do the right tlb flush.
> [...]
>
>> +int radix_get_mmu_psize(unsigned long page_size)
>> +{
>> +	int psize;
>> +
>> +	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
>> +		psize = mmu_virtual_psize;
>> +	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
>> +		psize = MMU_PAGE_2M;
>> +	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
>> +		psize = MMU_PAGE_1G;
>
> Do we actually have support for 1G pages yet?  I couldn't see where
> they get instantiated.

We use that for kernel linear mapping.

>
>> +	else
>> +		return -1;
>> +	return psize;
>> +}
>> +
>> +
>>  static int __init radix_dt_scan_page_sizes(unsigned long node,
>>  					   const char *uname, int depth,
>>  					   void *data)
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index 911fdfb63ec1..503ae9bd3efe 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -219,12 +219,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>>  			  pte_t *ptep, pte_t entry, int dirty)
>>  {
>>  	int changed;
>> +	unsigned long page_size;
>> +
>>  	entry = set_access_flags_filter(entry, vma, dirty);
>>  	changed = !pte_same(*(ptep), entry);
>>  	if (changed) {
>> -		if (!is_vm_hugetlb_page(vma))
>> +		if (!is_vm_hugetlb_page(vma)) {
>> +			page_size = PAGE_SIZE;
>>  			assert_pte_locked(vma->vm_mm, address);
>> -		__ptep_set_access_flags(vma->vm_mm, ptep, entry);
>> +		} else
>> +			page_size = huge_page_size(hstate_vma(vma));
>
> I don't understand how this can work with THP.  You're determining the
> page size using only the VMA, but with a THP VMA surely we get
> different page sizes at different addresses?

That applies only for hugetlb pages. Ie, for hugetlb vma we use
vm_area_struct to determine the hugepage size.

For THP hugepage configuration we end up calling 

int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
			  pmd_t *pmdp, pmd_t entry, int dirty)

and for that we do
	__ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry),
				address, HPAGE_PMD_SIZE);


>
> More generally, I'm OK with adding the address parameter to
> __ptep_set_access_flags, but I think Ben's suggestion of encoding the
> page size in the PTE value is a good one.  I think it is as simple as
> the patch below (assuming we only support 2MB large pages for now).
> That would simplify things a bit and also it would mean that we are
> sure we know the page size correctly even with THP.
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 9fd77f8..e4f3581 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -32,7 +32,8 @@
>  #define _PAGE_SOFT_DIRTY	0x00000
>  #endif
>  #define _PAGE_SPECIAL		_RPAGE_SW2 /* software: special page */
> -
> +#define _PAGE_GIGANTIC		_RPAGE_SW0	/* software: 1GB page */
> +#define _PAGE_LARGE		_RPAGE_SW1	/* software: 2MB page */

I already use _RPAGE_SW1 for _PAGE_DEVMAP (pmem/nvdimm, patch set to the
list but not merged yet). We are really low on software free bits w.r.t
pte and I was avoiding using that.

I was thinking this series is a simpler cleanup of different page table
update interface to take page size as argument.


>
>  #define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
>  #define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index f4f437c..7ff0289 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -86,7 +86,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
>  {
>  	unsigned long pmdv;
>
> -	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
> +	pmdv = ((pfn << PAGE_SHIFT) & PTE_RPN_MASK) | _PAGE_LARGE;
>  	return pmd_set_protbits(__pmd(pmdv), pgprot);
>  }
>

I will look at this and see if can make the patch simpler. But do we
really want to use the pte bit for this ? Aren't we low on free pte bits
?

-aneesh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-25  4:19   ` Aneesh Kumar K.V
@ 2016-11-25  7:05     ` Aneesh Kumar K.V
  2016-11-25  8:22       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 18+ messages in thread
From: Aneesh Kumar K.V @ 2016-11-25  7:05 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: benh, mpe, linuxppc-dev

"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> writes:

> Paul Mackerras <paulus@ozlabs.org> writes:
>

>>
>>  #define _PAGE_PTE		(1ul << 62)	/* distinguishes PTEs from pointers */
>>  #define _PAGE_PRESENT		(1ul << 63)	/* pte contains a translation */
>> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
>> index f4f437c..7ff0289 100644
>> --- a/arch/powerpc/mm/pgtable-book3s64.c
>> +++ b/arch/powerpc/mm/pgtable-book3s64.c
>> @@ -86,7 +86,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
>>  {
>>  	unsigned long pmdv;
>>
>> -	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
>> +	pmdv = ((pfn << PAGE_SHIFT) & PTE_RPN_MASK) | _PAGE_LARGE;
>>  	return pmd_set_protbits(__pmd(pmdv), pgprot);
>>  }
>>
>
> I will look at this and see if can make the patch simpler. But do we
> really want to use the pte bit for this ? Aren't we low on free pte bits


Ok this will work, provided we are ok to take up two pte bits for this
So we can use make_huge_pte() to fixup the pte entry for hugetlb and
pmd_mkhuge() to fixup the THP.

Now the question will be how will we support _PAGE_DEVMAP (for nvidmm).
We don't have free software pte bits after we make the above change.

NOTE: We do have 3 reserved bits(ppc bit 4-6 ) can we use that ?

-aneesh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush
  2016-11-25  7:05     ` Aneesh Kumar K.V
@ 2016-11-25  8:22       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 18+ messages in thread
From: Benjamin Herrenschmidt @ 2016-11-25  8:22 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Paul Mackerras; +Cc: mpe, linuxppc-dev

On Fri, 2016-11-25 at 12:35 +0530, Aneesh Kumar K.V wrote:
> Now the question will be how will we support _PAGE_DEVMAP (for
> nvidmm).
> We don't have free software pte bits after we make the above change.
> 
> NOTE: We do have 3 reserved bits(ppc bit 4-6 ) can we use that ?

Yes, I think we can, at least as long as it's a DD1 thing only, future
processors might re-use these.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2016-11-25  8:23 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-23 11:09 [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Aneesh Kumar K.V
2016-11-23 11:09 ` [PATCH v5 2/7] powerpc/mm: Rename hugetlb-radix.h to hugetlb.h Aneesh Kumar K.V
2016-11-23 11:09 ` [PATCH v5 3/7] powerpc/mm/hugetlb: Handle hugepage size supported by hash config Aneesh Kumar K.V
2016-11-23 14:08   ` Balbir Singh
2016-11-23 14:30     ` Aneesh Kumar K.V
2016-11-23 11:10 ` [PATCH v5 4/7] powerpc/mm/hugetlb: Make copy of huge_ptep_get_and_clear to different platform headers Aneesh Kumar K.V
2016-11-23 11:10 ` [PATCH v5 5/7] powerpc/mm/hugetlb: Switch hugetlb update to use huge_pte_update Aneesh Kumar K.V
2016-11-23 11:10 ` [PATCH v5 6/7] powerpc/mm: update pte_update to not do full mm tlb flush Aneesh Kumar K.V
2016-11-23 11:10 ` [PATCH v5 7/7] powerpc/mm: Batch tlb flush when invalidating pte entries Aneesh Kumar K.V
2016-11-23 11:23 ` [PATCH v5 1/7] powerpc/mm: update ptep_set_access_flag to not do full mm tlb flush Balbir Singh
2016-11-23 11:53   ` Aneesh Kumar K.V
2016-11-23 14:05     ` Balbir Singh
2016-11-23 14:36       ` Aneesh Kumar K.V
2016-11-23 15:21         ` Balbir Singh
2016-11-25  2:48 ` Paul Mackerras
2016-11-25  4:19   ` Aneesh Kumar K.V
2016-11-25  7:05     ` Aneesh Kumar K.V
2016-11-25  8:22       ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.