linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap
@ 2017-03-30 12:03 Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 02/19] powerpc/mm/slice: Update the function prototype Michael Ellerman
                   ` (18 more replies)
  0 siblings, 19 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

In followup patch we want to increase the va range which will result
in us requiring high_slices to have more than 64 bits. To enable this
convert high_slices to bitmap. We keep the number bits same in this patch
and later change that to higher value

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Fold in fix to use bitmap_empty()]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/page_64.h |  15 ++---
 arch/powerpc/mm/slice.c            | 112 +++++++++++++++++++++++++------------
 2 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 3e83d2a20b6f..bd55ff751938 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -98,19 +98,16 @@ extern u64 ppc64_pft_size;
 #define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
 
+#ifndef __ASSEMBLY__
 /*
- * 1 bit per slice and we have one slice per 1TB
- * Right now we support only 64TB.
- * IF we change this we will have to change the type
- * of high_slices
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ * 64 below is actually SLICE_NUM_HIGH to fixup complie errros
  */
-#define SLICE_MASK_SIZE 8
-
-#ifndef __ASSEMBLY__
-
 struct slice_mask {
 	u16 low_slices;
-	u64 high_slices;
+	DECLARE_BITMAP(high_slices, 64);
 };
 
 struct mm_struct;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index bf150557dba8..639c7171d174 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -36,11 +36,6 @@
 #include <asm/copro.h>
 #include <asm/hugetlb.h>
 
-/* some sanity checks */
-#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
-#error H_PGTABLE_RANGE exceeds slice_mask high_slices size
-#endif
-
 static DEFINE_SPINLOCK(slice_convert_lock);
 
 
@@ -49,7 +44,7 @@ int _slice_debug = 1;
 
 static void slice_print_mask(const char *label, struct slice_mask mask)
 {
-	char	*p, buf[16 + 3 + 64 + 1];
+	char	*p, buf[SLICE_NUM_LOW + 3 + SLICE_NUM_HIGH + 1];
 	int	i;
 
 	if (!_slice_debug)
@@ -60,8 +55,12 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
 	*(p++) = ' ';
 	*(p++) = '-';
 	*(p++) = ' ';
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		*(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		if (test_bit(i, mask.high_slices))
+			*(p++) = '1';
+		else
+			*(p++) = '0';
+	}
 	*(p++) = 0;
 
 	printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -80,7 +79,10 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 					     unsigned long len)
 {
 	unsigned long end = start + len - 1;
-	struct slice_mask ret = { 0, 0 };
+	struct slice_mask ret;
+
+	ret.low_slices = 0;
+	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
 
 	if (start < SLICE_LOW_TOP) {
 		unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
@@ -89,10 +91,13 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 			- (1u << GET_LOW_SLICE_INDEX(start));
 	}
 
-	if ((start + len) > SLICE_LOW_TOP)
-		ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
-			- (1ul << GET_HIGH_SLICE_INDEX(start));
+	if ((start + len) > SLICE_LOW_TOP) {
+		unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+		unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+		unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
 
+		bitmap_set(ret.high_slices, start_index, count);
+	}
 	return ret;
 }
 
@@ -129,9 +134,12 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 
 static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
 {
-	struct slice_mask ret = { 0, 0 };
+	struct slice_mask ret;
 	unsigned long i;
 
+	ret.low_slices = 0;
+	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (!slice_low_has_vma(mm, i))
 			ret.low_slices |= 1u << i;
@@ -141,7 +149,7 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
 
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
 		if (!slice_high_has_vma(mm, i))
-			ret.high_slices |= 1ul << i;
+			__set_bit(i, ret.high_slices);
 
 	return ret;
 }
@@ -150,10 +158,13 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
-	struct slice_mask ret = { 0, 0 };
+	struct slice_mask ret;
 	unsigned long i;
 	u64 lpsizes;
 
+	ret.low_slices = 0;
+	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (((lpsizes >> (i * 4)) & 0xf) == psize)
@@ -164,7 +175,7 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-			ret.high_slices |= 1ul << i;
+			__set_bit(i, ret.high_slices);
 	}
 
 	return ret;
@@ -172,8 +183,13 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 
 static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
 {
+	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+
+	bitmap_and(result, mask.high_slices,
+		   available.high_slices, SLICE_NUM_HIGH);
+
 	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		(mask.high_slices & available.high_slices) == mask.high_slices;
+		bitmap_equal(result, mask.high_slices, SLICE_NUM_HIGH);
 }
 
 static void slice_flush_segments(void *parm)
@@ -220,7 +236,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 	for (i = 0; i < SLICE_NUM_HIGH; i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
-		if (mask.high_slices & (1ul << i))
+		if (test_bit(i, mask.high_slices))
 			hpsizes[index] = (hpsizes[index] &
 					  ~(0xf << (mask_index * 4))) |
 				(((unsigned long)psize) << (mask_index * 4));
@@ -256,7 +272,7 @@ static bool slice_scan_available(unsigned long addr,
 		slice = GET_HIGH_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) ?
 			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-		return !!(available.high_slices & (1ul << slice));
+		return !!test_bit(slice, available.high_slices);
 	}
 }
 
@@ -363,15 +379,24 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 		return slice_find_area_bottomup(mm, len, mask, psize);
 }
 
-#define or_mask(dst, src)	do {			\
-	(dst).low_slices |= (src).low_slices;		\
-	(dst).high_slices |= (src).high_slices;		\
-} while (0)
+static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+{
+	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
 
-#define andnot_mask(dst, src)	do {			\
-	(dst).low_slices &= ~(src).low_slices;		\
-	(dst).high_slices &= ~(src).high_slices;	\
-} while (0)
+	dst->low_slices |= src->low_slices;
+	bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+	bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+}
+
+static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+{
+	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+
+	dst->low_slices &= ~src->low_slices;
+
+	bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+	bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+}
 
 #ifdef CONFIG_PPC_64K_PAGES
 #define MMU_PAGE_BASE	MMU_PAGE_64K
@@ -383,15 +408,28 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 				      unsigned long flags, unsigned int psize,
 				      int topdown)
 {
-	struct slice_mask mask = {0, 0};
+	struct slice_mask mask;
 	struct slice_mask good_mask;
-	struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
-	struct slice_mask compat_mask = {0, 0};
+	struct slice_mask potential_mask;
+	struct slice_mask compat_mask;
 	int fixed = (flags & MAP_FIXED);
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	struct mm_struct *mm = current->mm;
 	unsigned long newaddr;
 
+	/*
+	 * init different masks
+	 */
+	mask.low_slices = 0;
+	bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
+
+	/* silence stupid warning */;
+	potential_mask.low_slices = 0;
+	bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
+
+	compat_mask.low_slices = 0;
+	bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
+
 	/* Sanity checks */
 	BUG_ON(mm->task_size == 0);
 	VM_BUG_ON(radix_enabled());
@@ -449,7 +487,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (psize == MMU_PAGE_64K) {
 		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
 		if (fixed)
-			or_mask(good_mask, compat_mask);
+			slice_or_mask(&good_mask, &compat_mask);
 	}
 #endif
 
@@ -484,7 +522,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * empty and thus can be converted
 	 */
 	potential_mask = slice_mask_for_free(mm);
-	or_mask(potential_mask, good_mask);
+	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
 	if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
@@ -517,7 +555,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
-		or_mask(potential_mask, compat_mask);
+		slice_or_mask(&potential_mask, &compat_mask);
 		addr = slice_find_area(mm, len, potential_mask, psize,
 				       topdown);
 	}
@@ -531,9 +569,9 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	slice_print_mask(" mask", mask);
 
  convert:
-	andnot_mask(mask, good_mask);
-	andnot_mask(mask, compat_mask);
-	if (mask.low_slices || mask.high_slices) {
+	slice_andnot_mask(&mask, &good_mask);
+	slice_andnot_mask(&mask, &compat_mask);
+	if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
 		slice_convert(mm, mask, psize);
 		if (psize > MMU_PAGE_BASE)
 			on_each_cpu(slice_flush_segments, mm, 1);
@@ -700,7 +738,7 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 	if (psize == MMU_PAGE_64K) {
 		struct slice_mask compat_mask;
 		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
-		or_mask(available, compat_mask);
+		slice_or_mask(&available, &compat_mask);
 	}
 #endif
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 02/19] powerpc/mm/slice: Update the function prototype
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 03/19] powerpc/mm: Move copy_mm_to_paca to paca.c Michael Ellerman
                   ` (17 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This avoid copying the slice_mask struct as function return value

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slice.c | 62 ++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 639c7171d174..d1da357583e3 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -75,19 +75,18 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
 
 #endif
 
-static struct slice_mask slice_range_to_mask(unsigned long start,
-					     unsigned long len)
+static void slice_range_to_mask(unsigned long start, unsigned long len,
+				struct slice_mask *ret)
 {
 	unsigned long end = start + len - 1;
-	struct slice_mask ret;
 
-	ret.low_slices = 0;
-	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+	ret->low_slices = 0;
+	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	if (start < SLICE_LOW_TOP) {
 		unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
 
-		ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+		ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
 			- (1u << GET_LOW_SLICE_INDEX(start));
 	}
 
@@ -96,9 +95,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 		unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
 		unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
 
-		bitmap_set(ret.high_slices, start_index, count);
+		bitmap_set(ret->high_slices, start_index, count);
 	}
-	return ret;
 }
 
 static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
@@ -132,53 +130,47 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 	return !slice_area_is_free(mm, start, end - start);
 }
 
-static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
 {
-	struct slice_mask ret;
 	unsigned long i;
 
-	ret.low_slices = 0;
-	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+	ret->low_slices = 0;
+	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (!slice_low_has_vma(mm, i))
-			ret.low_slices |= 1u << i;
+			ret->low_slices |= 1u << i;
 
 	if (mm->task_size <= SLICE_LOW_TOP)
-		return ret;
+		return;
 
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
 		if (!slice_high_has_vma(mm, i))
-			__set_bit(i, ret.high_slices);
-
-	return ret;
+			__set_bit(i, ret->high_slices);
 }
 
-static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
-	struct slice_mask ret;
 	unsigned long i;
 	u64 lpsizes;
 
-	ret.low_slices = 0;
-	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+	ret->low_slices = 0;
+	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (((lpsizes >> (i * 4)) & 0xf) == psize)
-			ret.low_slices |= 1u << i;
+			ret->low_slices |= 1u << i;
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < SLICE_NUM_HIGH; i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-			__set_bit(i, ret.high_slices);
+			__set_bit(i, ret->high_slices);
 	}
-
-	return ret;
 }
 
 static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
@@ -460,7 +452,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First make up a "good" mask of slices that have the right size
 	 * already
 	 */
-	good_mask = slice_mask_for_size(mm, psize);
+	slice_mask_for_size(mm, psize, &good_mask);
 	slice_print_mask(" good_mask", good_mask);
 
 	/*
@@ -485,7 +477,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	/* If we support combo pages, we can allow 64k pages in 4k slices */
 	if (psize == MMU_PAGE_64K) {
-		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
 		if (fixed)
 			slice_or_mask(&good_mask, &compat_mask);
 	}
@@ -494,7 +486,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First check hint if it's valid or if we have MAP_FIXED */
 	if (addr != 0 || fixed) {
 		/* Build a mask for the requested range */
-		mask = slice_range_to_mask(addr, len);
+		slice_range_to_mask(addr, len, &mask);
 		slice_print_mask(" mask", mask);
 
 		/* Check if we fit in the good mask. If we do, we just return,
@@ -521,7 +513,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* We don't fit in the good mask, check what other slices are
 	 * empty and thus can be converted
 	 */
-	potential_mask = slice_mask_for_free(mm);
+	slice_mask_for_free(mm, &potential_mask);
 	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
@@ -564,7 +556,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (addr == -ENOMEM)
 		return -ENOMEM;
 
-	mask = slice_range_to_mask(addr, len);
+	slice_range_to_mask(addr, len, &mask);
 	slice_dbg(" found potential area at 0x%lx\n", addr);
 	slice_print_mask(" mask", mask);
 
@@ -696,9 +688,11 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 			   unsigned long len, unsigned int psize)
 {
-	struct slice_mask mask = slice_range_to_mask(start, len);
+	struct slice_mask mask;
 
 	VM_BUG_ON(radix_enabled());
+
+	slice_range_to_mask(start, len, &mask);
 	slice_convert(mm, mask, psize);
 }
 
@@ -731,13 +725,13 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 	if (radix_enabled())
 		return 0;
 
-	mask = slice_range_to_mask(addr, len);
-	available = slice_mask_for_size(mm, psize);
+	slice_range_to_mask(addr, len, &mask);
+	slice_mask_for_size(mm, psize, &available);
 #ifdef CONFIG_PPC_64K_PAGES
 	/* We need to account for 4k slices too */
 	if (psize == MMU_PAGE_64K) {
 		struct slice_mask compat_mask;
-		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
 		slice_or_mask(&available, &compat_mask);
 	}
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 03/19] powerpc/mm: Move copy_mm_to_paca to paca.c
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 02/19] powerpc/mm/slice: Update the function prototype Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 04/19] powerpc/mm: Remove checks that TASK_SIZE_USER64 is too small Michael Ellerman
                   ` (16 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We also update the function arg to struct mm_struct. Move this so that function
finds the definition of struct mm_struct. No functional change in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h | 18 +-----------------
 arch/powerpc/kernel/paca.c      | 19 +++++++++++++++++++
 arch/powerpc/mm/hash_utils_64.c |  4 ++--
 arch/powerpc/mm/slb.c           |  2 +-
 arch/powerpc/mm/slice.c         |  2 +-
 5 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 708c3e592eeb..f48c250339fd 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -206,23 +206,7 @@ struct paca_struct {
 #endif
 };
 
-#ifdef CONFIG_PPC_BOOK3S
-static inline void copy_mm_to_paca(mm_context_t *context)
-{
-	get_paca()->mm_ctx_id = context->id;
-#ifdef CONFIG_PPC_MM_SLICES
-	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
-	memcpy(&get_paca()->mm_ctx_high_slices_psize,
-	       &context->high_slices_psize, SLICE_ARRAY_SIZE);
-#else
-	get_paca()->mm_ctx_user_psize = context->user_psize;
-	get_paca()->mm_ctx_sllp = context->sllp;
-#endif
-}
-#else
-static inline void copy_mm_to_paca(mm_context_t *context){}
-#endif
-
+extern void copy_mm_to_paca(struct mm_struct *mm);
 extern struct paca_struct *paca;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index dfc479df9634..e2cf745a4b94 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -245,3 +245,22 @@ void __init free_unused_pacas(void)
 
 	free_lppacas();
 }
+
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_BOOK3S
+	mm_context_t *context = &mm->context;
+
+	get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+	memcpy(&get_paca()->mm_ctx_high_slices_psize,
+	       &context->high_slices_psize, SLICE_ARRAY_SIZE);
+#else /* CONFIG_PPC_MM_SLICES */
+	get_paca()->mm_ctx_user_psize = context->user_psize;
+	get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+#else /* CONFIG_PPC_BOOK3S */
+	return;
+#endif
+}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d0ee17029e99..716255f88bbd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1133,7 +1133,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 	copro_flush_all_slbs(mm);
 	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
 
-		copy_mm_to_paca(&mm->context);
+		copy_mm_to_paca(mm);
 		slb_flush_and_rebolt();
 	}
 }
@@ -1205,7 +1205,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
 {
 	if (user_region) {
 		if (psize != get_paca_psize(ea)) {
-			copy_mm_to_paca(&mm->context);
+			copy_mm_to_paca(mm);
 			slb_flush_and_rebolt();
 		}
 	} else if (get_paca()->vmalloc_sllp !=
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5e01b2ece1d0..98ae810b8c21 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -229,7 +229,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		asm volatile("slbie %0" : : "r" (slbie_data));
 
 	get_paca()->slb_cache_ptr = 0;
-	copy_mm_to_paca(&mm->context);
+	copy_mm_to_paca(mm);
 
 	/*
 	 * preload some userspace segments into the SLB.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index d1da357583e3..175e0dbce89b 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -192,7 +192,7 @@ static void slice_flush_segments(void *parm)
 	if (mm != current->active_mm)
 		return;
 
-	copy_mm_to_paca(&current->active_mm->context);
+	copy_mm_to_paca(current->active_mm);
 
 	local_irq_save(flags);
 	slb_flush_and_rebolt();
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 04/19] powerpc/mm: Remove checks that TASK_SIZE_USER64 is too small
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 02/19] powerpc/mm/slice: Update the function prototype Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 03/19] powerpc/mm: Move copy_mm_to_paca to paca.c Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 05/19] powerpc/mm/slice: Move slice_mask struct definition to slice.c Michael Ellerman
                   ` (15 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Remove the checks that TASK_SIZE_USER64 is smaller than H_PGTABLE_RANGE
and USER_VSID_RANGE.

In a following patch we will deliberately add support for a TASK_SIZE
smaller than both ranges, so this will no longer be an error condition.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Keep the check in pgtable_64.c that we don't exceed USER_VSID_RANGE]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/init_64.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9be992083d2a..8f6f2a173e47 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -71,10 +71,6 @@
 #if H_PGTABLE_RANGE > USER_VSID_RANGE
 #warning Limited user VSID range means pagetable space is wasted
 #endif
-
-#if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 phys_addr_t memstart_addr = ~0;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 05/19] powerpc/mm/slice: Move slice_mask struct definition to slice.c
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (2 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 04/19] powerpc/mm: Remove checks that TASK_SIZE_USER64 is too small Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 06/19] powerpc/mm/slice: Update slice mask printing to use bitmap printing Michael Ellerman
                   ` (14 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This structure definition need not be in a header since this is used only by
slice.c file. So move it to slice.c. This also allow us to use SLICE_NUM_HIGH
instead of 64.

I also switch the low_slices type to u64 from u16. This doesn't have an impact
on size of struct due to padding added with u16 type. This helps in using
bitmap printing function for printing slice mask.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/page_64.h | 11 -----------
 arch/powerpc/mm/slice.c            | 10 +++++++++-
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index bd55ff751938..c4d9654bd637 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -99,17 +99,6 @@ extern u64 ppc64_pft_size;
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
 
 #ifndef __ASSEMBLY__
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- * 64 below is actually SLICE_NUM_HIGH to fixup complie errros
- */
-struct slice_mask {
-	u16 low_slices;
-	DECLARE_BITMAP(high_slices, 64);
-};
-
 struct mm_struct;
 
 extern unsigned long slice_get_unmapped_area(unsigned long addr,
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 175e0dbce89b..7599c2703520 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -37,7 +37,15 @@
 #include <asm/hugetlb.h>
 
 static DEFINE_SPINLOCK(slice_convert_lock);
-
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+	u64 low_slices;
+	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
 
 #ifdef DEBUG
 int _slice_debug = 1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 06/19] powerpc/mm/slice: Update slice mask printing to use bitmap printing.
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (3 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 05/19] powerpc/mm/slice: Move slice_mask struct definition to slice.c Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 07/19] powerpc/mm/hash: Abstract context id allocation for KVM Michael Ellerman
                   ` (13 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We now get output like below which is much better.

[    0.935306]  good_mask low_slice: 0-15
[    0.935360]  good_mask high_slice: 0-511

Compared to

[    0.953414]  good_mask:1111111111111111 - 1111111111111.........

I also fixed an error with slice_dbg printing.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slice.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 7599c2703520..95e5a20b1b6a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -52,29 +52,13 @@ int _slice_debug = 1;
 
 static void slice_print_mask(const char *label, struct slice_mask mask)
 {
-	char	*p, buf[SLICE_NUM_LOW + 3 + SLICE_NUM_HIGH + 1];
-	int	i;
-
 	if (!_slice_debug)
 		return;
-	p = buf;
-	for (i = 0; i < SLICE_NUM_LOW; i++)
-		*(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
-	*(p++) = ' ';
-	*(p++) = '-';
-	*(p++) = ' ';
-	for (i = 0; i < SLICE_NUM_HIGH; i++) {
-		if (test_bit(i, mask.high_slices))
-			*(p++) = '1';
-		else
-			*(p++) = '0';
-	}
-	*(p++) = 0;
-
-	printk(KERN_DEBUG "%s:%s\n", label, buf);
+	pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
+	pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
 }
 
-#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
 
 #else
 
@@ -243,8 +227,8 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 	}
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
-		  mm->context.low_slices_psize,
-		  mm->context.high_slices_psize);
+		  (unsigned long)mm->context.low_slices_psize,
+		  (unsigned long)mm->context.high_slices_psize);
 
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
 
@@ -686,8 +670,8 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
-		  mm->context.low_slices_psize,
-		  mm->context.high_slices_psize);
+		  (unsigned long)mm->context.low_slices_psize,
+		  (unsigned long)mm->context.high_slices_psize);
 
  bail:
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 07/19] powerpc/mm/hash: Abstract context id allocation for KVM
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (4 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 06/19] powerpc/mm/slice: Update slice mask printing to use bitmap printing Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 08/19] powerpc/mm/hash: Pull hash constants into hash__alloc_context_id() Michael Ellerman
                   ` (12 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

KVM wants to be able to allocate an MMU context id, which it does
currently by calling __init_new_context().

We're about to rework that code, so provide a wrapper for KVM so it
can not worry about the details.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu_context.h |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c  |  2 +-
 arch/powerpc/mm/mmu_context_book3s64.c | 10 ++++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b9e3f0aca261..7d721101ec78 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -51,7 +51,7 @@ static inline void switch_mmu_context(struct mm_struct *prev,
 	return switch_slb(tsk, next);
 }
 
-extern int __init_new_context(void);
+extern int hash__alloc_context_id(void);
 extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
 #else
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index a587e8f4fd26..b35f44c98d1f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -390,7 +390,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int err;
 
-	err = __init_new_context();
+	err = hash__alloc_context_id();
 	if (err < 0)
 		return -1;
 	vcpu3s->context_id[0] = err;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 73bf6e14c3aa..650a498b1de9 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -30,7 +30,7 @@
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
-int __init_new_context(void)
+static int __init_new_context(void)
 {
 	int index;
 	int err;
@@ -57,7 +57,13 @@ int __init_new_context(void)
 
 	return index;
 }
-EXPORT_SYMBOL_GPL(__init_new_context);
+
+int hash__alloc_context_id(void)
+{
+	return __init_new_context();
+}
+EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+
 static int radix__init_new_context(struct mm_struct *mm, int index)
 {
 	unsigned long rts_field;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 08/19] powerpc/mm/hash: Pull hash constants into hash__alloc_context_id()
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (5 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 07/19] powerpc/mm/hash: Abstract context id allocation for KVM Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 09/19] powerpc/mm: Split radix vs hash mm context initialisation Michael Ellerman
                   ` (11 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

The min and max context id values used in alloc_context_id() are
currently the right values for use on hash, and happen to also be safe
for use on radix.

But we need to change that in a subsequent patch, so make the min/max
ids parameters and pull the hash values into hsah__alloc_context_id().

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_book3s64.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 650a498b1de9..981c3b02e46a 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -30,17 +30,16 @@
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
-static int __init_new_context(void)
+static int alloc_context_id(int min_id, int max_id)
 {
-	int index;
-	int err;
+	int index, err;
 
 again:
 	if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
 		return -ENOMEM;
 
 	spin_lock(&mmu_context_lock);
-	err = ida_get_new_above(&mmu_context_ida, 1, &index);
+	err = ida_get_new_above(&mmu_context_ida, min_id, &index);
 	spin_unlock(&mmu_context_lock);
 
 	if (err == -EAGAIN)
@@ -48,7 +47,7 @@ static int __init_new_context(void)
 	else if (err)
 		return err;
 
-	if (index > MAX_USER_CONTEXT) {
+	if (index > max_id) {
 		spin_lock(&mmu_context_lock);
 		ida_remove(&mmu_context_ida, index);
 		spin_unlock(&mmu_context_lock);
@@ -60,7 +59,7 @@ static int __init_new_context(void)
 
 int hash__alloc_context_id(void)
 {
-	return __init_new_context();
+	return alloc_context_id(1, MAX_USER_CONTEXT);
 }
 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
 
@@ -80,7 +79,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int index;
 
-	index = __init_new_context();
+	index = hash__alloc_context_id();
 	if (index < 0)
 		return index;
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 09/19] powerpc/mm: Split radix vs hash mm context initialisation
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (6 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 08/19] powerpc/mm/hash: Pull hash constants into hash__alloc_context_id() Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:03 ` [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel Michael Ellerman
                   ` (10 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

Complete the split of the radix vs hash mm context initialisation.

This is mostly code movement, with the exception that we now limit the
context allocation to PRTB_ENTRIES - 1 on radix.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu.h |  2 +
 arch/powerpc/mm/mmu_context_book3s64.c   | 67 ++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 805d4105e9bb..cce434e7de06 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -65,6 +65,8 @@ extern struct patb_entry *partition_tb;
  * MAX_USER_CONTEXT * 16 bytes of space.
  */
 #define PRTB_SIZE_SHIFT	(CONTEXT_BITS + 4)
+#define PRTB_ENTRIES	(1ul << CONTEXT_BITS)
+
 /*
  * Power9 currently only support 64K partition table size.
  */
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 981c3b02e46a..e5da551edde3 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -63,47 +63,66 @@ int hash__alloc_context_id(void)
 }
 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
 
-static int radix__init_new_context(struct mm_struct *mm, int index)
+static int hash__init_new_context(struct mm_struct *mm)
+{
+	int index;
+
+	index = hash__alloc_context_id();
+	if (index < 0)
+		return index;
+
+	/*
+	 * The old code would re-promote on fork, we don't do that when using
+	 * slices as it could cause problem promoting slices that have been
+	 * forced down to 4K.
+	 *
+	 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+	 * explicitly against context.id == 0. This ensures that we properly
+	 * initialize context slice details for newly allocated mm's (which will
+	 * have id == 0) and don't alter context slice inherited via fork (which
+	 * will have id != 0).
+	 *
+	 * We should not be calling init_new_context() on init_mm. Hence a
+	 * check against 0 is OK.
+	 */
+	if (mm->context.id == 0)
+		slice_set_user_psize(mm, mmu_virtual_psize);
+
+	subpage_prot_init_new_context(mm);
+
+	return index;
+}
+
+static int radix__init_new_context(struct mm_struct *mm)
 {
 	unsigned long rts_field;
+	int index;
+
+	index = alloc_context_id(1, PRTB_ENTRIES - 1);
+	if (index < 0)
+		return index;
 
 	/*
 	 * set the process table entry,
 	 */
 	rts_field = radix__get_tree_size();
 	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
-	return 0;
+
+	return index;
 }
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int index;
 
-	index = hash__alloc_context_id();
+	if (radix_enabled())
+		index = radix__init_new_context(mm);
+	else
+		index = hash__init_new_context(mm);
+
 	if (index < 0)
 		return index;
 
-	if (radix_enabled()) {
-		radix__init_new_context(mm, index);
-	} else {
-
-		/* The old code would re-promote on fork, we don't do that
-		 * when using slices as it could cause problem promoting slices
-		 * that have been forced down to 4K
-		 *
-		 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
-		 * explicitly against context.id == 0. This ensures that we
-		 * properly initialize context slice details for newly allocated
-		 * mm's (which will have id == 0) and don't alter context slice
-		 * inherited via fork (which will have id != 0).
-		 *
-		 * We should not be calling init_new_context() on init_mm. Hence a
-		 * check against 0 is ok.
-		 */
-		if (mm->context.id == 0)
-			slice_set_user_psize(mm, mmu_virtual_psize);
-		subpage_prot_init_new_context(mm);
-	}
 	mm->context.id = index;
 #ifdef CONFIG_PPC_ICSWX
 	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (7 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 09/19] powerpc/mm: Split radix vs hash mm context initialisation Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-04-28 10:57   ` Michal Suchánek
  2017-03-30 12:03 ` [PATCH v6 11/19] powerpc/mm/hash: Check for non-kernel address in get_kernel_vsid() Michael Ellerman
                   ` (9 subsequent siblings)
  18 siblings, 1 reply; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Currently we use the top 4 context ids (0x7fffc-0x7ffff) for the kernel.
Kernel VSIDs are built using these top context values and effective the
segement ID. In subsequent patches we want to increase the max effective
address to 512TB. We will achieve that by increasing the effective
segment IDs there by increasing virtual address range.

We will be switching to a 68bit virtual address in the following patch.
But platforms like Power4 and Power5 only support a 65 bit virtual
address. We will handle that by limiting the context bits to 16 instead
of 19 on those platforms. That means the max context id will have a
different value on different platforms.

So that we don't have to deal with the kernel context ids changing
between different platforms, move the kernel context ids down to use
context ids 1-4.

We can't use segment 0 of context-id 0, because that maps to VSID 0,
which we want to keep as invalid, so we avoid context-id 0 entirely.
Similarly we can't use the last segment of the maximum context, so we
avoid it too.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Switch from 0-3 to 1-4 so VSID=0 remains invalid]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 60 ++++++++++++++++-----------
 arch/powerpc/mm/mmu_context_book3s64.c        |  2 +-
 arch/powerpc/mm/slb_low.S                     | 20 +++------
 3 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 52d8d1e4b772..a5ab6f5b8a7f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -491,13 +491,14 @@ extern void slb_set_size(u16 size);
  * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
  * from mmu context id and effective segment id of the address.
  *
- * For user processes max context id is limited to ((1ul << 19) - 5)
- * for kernel space, we use the top 4 context ids to map address as below
+ * For user processes max context id is limited to MAX_USER_CONTEXT.
+
+ * For kernel space, we use context ids 1-5 to map address as below:
  * NOTE: each context only support 64TB now.
- * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ * 0x00001 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x00002 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x00003 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x00004 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
  *
  * The proto-VSIDs are then scrambled into real VSIDs with the
  * multiplicative hash:
@@ -511,15 +512,13 @@ extern void slb_set_size(u16 size);
  * robust scattering in the hash table (at least based on some initial
  * results).
  *
- * We also consider VSID 0 special. We use VSID 0 for slb entries mapping
- * bad address. This enables us to consolidate bad address handling in
- * hash_page.
+ * We use VSID 0 to indicate an invalid VSID. The means we can't use context id
+ * 0, because a context id of 0 and an EA of 0 gives a proto-VSID of 0, which
+ * will produce a VSID of 0.
  *
  * We also need to avoid the last segment of the last context, because that
  * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
- * because of the modulo operation in vsid scramble. But the vmemmap
- * (which is what uses region 0xf) will never be close to 64TB in size
- * (it's 56 bytes per page of system memory).
+ * because of the modulo operation in vsid scramble.
  */
 
 #define CONTEXT_BITS		19
@@ -532,12 +531,19 @@ extern void slb_set_size(u16 size);
 /*
  * 256MB segment
  * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
- * available for user + kernel mapping. The top 4 contexts are used for
- * kernel mapping. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
- * (19 == 37 + 28 - 46).
+ * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
+ * 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
+ * context maps 2^46 bytes (64TB).
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
  */
-#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 5)
+#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 2)
+#define MIN_USER_CONTEXT	(5)
+
+/* Would be nice to use KERNEL_REGION_ID here */
+#define KERNEL_REGION_CONTEXT_OFFSET	(0xc - 1)
 
 /*
  * This should be computed such that protovosid * vsid_mulitplier
@@ -671,21 +677,25 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 
 /*
  * This is only valid for addresses >= PAGE_OFFSET
- *
- * For kernel space, we use the top 4 context ids to map address as below
- * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
  */
 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
 	unsigned long context;
 
 	/*
-	 * kernel take the top 4 context from the available range
+	 * For kernel space, we use context ids 1-4 to map the address space as
+	 * below:
+	 *
+	 * 0x00001 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
+	 * 0x00002 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
+	 * 0x00003 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
+	 * 0x00004 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
+	 *
+	 * So we can compute the context from the region (top nibble) by
+	 * subtracting 11, or 0xc - 1.
 	 */
-	context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
+	context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET;
+
 	return get_vsid(context, ea, ssize);
 }
 
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index e5da551edde3..a10e972221c4 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -59,7 +59,7 @@ static int alloc_context_id(int min_id, int max_id)
 
 int hash__alloc_context_id(void)
 {
-	return alloc_context_id(1, MAX_USER_CONTEXT);
+	return alloc_context_id(MIN_USER_CONTEXT, MAX_USER_CONTEXT);
 }
 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
 
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a85e06ea6c20..ba1f8696c338 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -45,13 +45,6 @@ _GLOBAL(slb_allocate_realmode)
 	/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
 	blt	cr7,0f			/* user or kernel? */
 
-	/* kernel address: proto-VSID = ESID */
-	/* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
-	 * this code will generate the protoVSID 0xfffffffff for the
-	 * top segment.  That's ok, the scramble below will translate
-	 * it to VSID 0, which is reserved as a bad VSID - one which
-	 * will never have any pages in it.  */
-
 	/* Check if hitting the linear mapping or some other kernel space
 	*/
 	bne	cr7,1f
@@ -63,12 +56,10 @@ _GLOBAL(slb_allocate_realmode)
 slb_miss_kernel_load_linear:
 	li	r11,0
 	/*
-	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * context = (ea >> 60) - (0xc - 1)
 	 * r9 = region id.
 	 */
-	addis	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
-	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
-
+	subi	r9,r9,KERNEL_REGION_CONTEXT_OFFSET
 
 BEGIN_FTR_SECTION
 	b	.Lslb_finish_load
@@ -77,9 +68,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 
 1:
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-	/* Check virtual memmap region. To be patches at kernel boot */
 	cmpldi	cr0,r9,0xf
 	bne	1f
+/* Check virtual memmap region. To be patched at kernel boot */
 .globl slb_miss_kernel_load_vmemmap
 slb_miss_kernel_load_vmemmap:
 	li	r11,0
@@ -102,11 +93,10 @@ slb_miss_kernel_load_io:
 	li	r11,0
 6:
 	/*
-	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * context = (ea >> 60) - (0xc - 1)
 	 * r9 = region id.
 	 */
-	addis	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
-	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+	subi	r9,r9,KERNEL_REGION_CONTEXT_OFFSET
 
 BEGIN_FTR_SECTION
 	b	.Lslb_finish_load
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 11/19] powerpc/mm/hash: Check for non-kernel address in get_kernel_vsid()
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (8 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel Michael Ellerman
@ 2017-03-30 12:03 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 12/19] powerpc/mm/hash: Support 68 bit VA Michael Ellerman
                   ` (8 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

get_kernel_vsid() has a very stern comment saying that it's only valid
for kernel addresses, but there's nothing in the code to enforce that.

Rather than hoping our callers are well behaved, add a check and return
a VSID of 0 (invalid).

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a5ab6f5b8a7f..10a34282829e 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -682,6 +682,9 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
 	unsigned long context;
 
+	if (!is_kernel_addr(ea))
+		return 0;
+
 	/*
 	 * For kernel space, we use context ids 1-4 to map the address space as
 	 * below:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 12/19] powerpc/mm/hash: Support 68 bit VA
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (9 preceding siblings ...)
  2017-03-30 12:03 ` [PATCH v6 11/19] powerpc/mm/hash: Check for non-kernel address in get_kernel_vsid() Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 13/19] powerpc/mm/hash: Convert mask to unsigned long Michael Ellerman
                   ` (7 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Inorder to support large effective address range (512TB), we want to
increase the virtual address bits to 68. But we do have platforms like
p4 and p5 that can only do 65 bit VA. We support those platforms by
limiting context bits on them to 16.

The protovsid -> vsid conversion is verified to work with both 65 and 68
bit va values. I also documented the restrictions in a table format as
part of code comments.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 127 ++++++++++++++++----------
 arch/powerpc/include/asm/mmu.h                |  19 ++--
 arch/powerpc/kvm/book3s_64_mmu_host.c         |   8 +-
 arch/powerpc/mm/mmu_context_book3s64.c        |   9 +-
 arch/powerpc/mm/slb_low.S                     |  54 +++++++++--
 5 files changed, 152 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 10a34282829e..c68102293a19 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -39,6 +39,7 @@
 
 /* Bits in the SLB VSID word */
 #define SLB_VSID_SHIFT		12
+#define SLB_VSID_SHIFT_256M	SLB_VSID_SHIFT
 #define SLB_VSID_SHIFT_1T	24
 #define SLB_VSID_SSIZE_SHIFT	62
 #define SLB_VSID_B		ASM_CONST(0xc000000000000000)
@@ -521,9 +522,19 @@ extern void slb_set_size(u16 size);
  * because of the modulo operation in vsid scramble.
  */
 
+/*
+ * Max Va bits we support as of now is 68 bits. We want 19 bit
+ * context ID.
+ * Restrictions:
+ * GPU has restrictions of not able to access beyond 128TB
+ * (47 bit effective address). We also cannot do more than 20bit PID.
+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
+ */
+#define VA_BITS			68
 #define CONTEXT_BITS		19
-#define ESID_BITS		18
-#define ESID_BITS_1T		6
+#define ESID_BITS		(VA_BITS - (SID_SHIFT + CONTEXT_BITS))
+#define ESID_BITS_1T		(VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
 
 #define ESID_BITS_MASK		((1 << ESID_BITS) - 1)
 #define ESID_BITS_1T_MASK	((1 << ESID_BITS_1T) - 1)
@@ -533,7 +544,7 @@ extern void slb_set_size(u16 size);
  * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
  * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
  * 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB).
+ * context maps 2^49 bytes (512TB).
  *
  * We also need to avoid the last segment of the last context, because that
  * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
@@ -546,53 +557,45 @@ extern void slb_set_size(u16 size);
 #define KERNEL_REGION_CONTEXT_OFFSET	(0xc - 1)
 
 /*
+ * For platforms that support on 65bit VA we limit the context bits
+ */
+#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
+
+/*
  * This should be computed such that protovosid * vsid_mulitplier
- * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
+ * doesn't overflow 64 bits. The vsid_mutliplier should also be
+ * co-prime to vsid_modulus. We also need to make sure that number
+ * of bits in multiplied result (dividend) is less than twice the number of
+ * protovsid bits for our modulus optmization to work.
+ *
+ * The below table shows the current values used.
+ * |-------+------------+----------------------+------------+-------------------|
+ * |       | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 1T    |         24 |                   25 |         49 |                50 |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 256MB |         24 |                   37 |         61 |                74 |
+ * |-------+------------+----------------------+------------+-------------------|
+ *
+ * |-------+------------+----------------------+------------+--------------------|
+ * |       | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 1T    |         24 |                   28 |         52 |                 56 |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 256MB |         24 |                   40 |         64 |                 80 |
+ * |-------+------------+----------------------+------------+--------------------|
+ *
  */
 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_256M		(CONTEXT_BITS + ESID_BITS)
-#define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
+#define VSID_BITS_256M		(VA_BITS - SID_SHIFT)
+#define VSID_BITS_65_256M	(65 - SID_SHIFT)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		(CONTEXT_BITS + ESID_BITS_1T)
-#define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
-
+#define VSID_BITS_1T		(VA_BITS - SID_SHIFT_1T)
+#define VSID_BITS_65_1T		(65 - SID_SHIFT_1T)
 
 #define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
 
-/*
- * This macro generates asm code to compute the VSID scramble
- * function.  Used in slb_allocate() and do_stab_bolted.  The function
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
- *
- *	rt = register containing the proto-VSID and into which the
- *		VSID will be stored
- *	rx = scratch register (clobbered)
- *
- * 	- rt and rx must be different registers
- * 	- The answer will end up in the low VSID_BITS bits of rt.  The higher
- * 	  bits may contain other garbage, so you may need to mask the
- * 	  result.
- */
-#define ASM_VSID_SCRAMBLE(rt, rx, size)					\
-	lis	rx,VSID_MULTIPLIER_##size@h;				\
-	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
-	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
-									\
-	srdi	rx,rt,VSID_BITS_##size;					\
-	clrldi	rt,rt,(64-VSID_BITS_##size);				\
-	add	rt,rt,rx;		/* add high and low bits */	\
-	/* NOTE: explanation based on VSID_BITS_##size = 36		\
-	 * Now, r3 == VSID (mod 2^36-1), and lies between 0 and		\
-	 * 2^36-1+2^28-1.  That in particular means that if r3 >=	\
-	 * 2^36-1, then r3+1 has the 2^36 bit set.  So, if r3+1 has	\
-	 * the bit clear, r3 already has the answer we want, if it	\
-	 * doesn't, the answer is the low 36 bits of r3+1.  So in all	\
-	 * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
-	addi	rx,rt,1;						\
-	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
-	add	rt,rt,rx
-
 /* 4 bits per slice and we have one slice per 1TB */
 #define SLICE_ARRAY_SIZE  (H_PGTABLE_RANGE >> 41)
 
@@ -640,7 +643,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #define vsid_scramble(protovsid, size) \
 	((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
 
-#else /* 1 */
+/* simplified form avoiding mod operation */
 #define vsid_scramble(protovsid, size) \
 	({								 \
 		unsigned long x;					 \
@@ -648,6 +651,21 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 		x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
 		(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
 	})
+
+#else /* 1 */
+static inline unsigned long vsid_scramble(unsigned long protovsid,
+				  unsigned long vsid_multiplier, int vsid_bits)
+{
+	unsigned long vsid;
+	unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
+	/*
+	 * We have same multipler for both 256 and 1T segements now
+	 */
+	vsid = protovsid * vsid_multiplier;
+	vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
+	return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
+}
+
 #endif /* 1 */
 
 /* Returns the segment size indicator for a user address */
@@ -662,17 +680,30 @@ static inline int user_segment_size(unsigned long addr)
 static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 				     int ssize)
 {
+	unsigned long va_bits = VA_BITS;
+	unsigned long vsid_bits;
+	unsigned long protovsid;
+
 	/*
 	 * Bad address. We return VSID 0 for that
 	 */
 	if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
 		return 0;
 
-	if (ssize == MMU_SEGSIZE_256M)
-		return vsid_scramble((context << ESID_BITS)
-				     | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M);
-	return vsid_scramble((context << ESID_BITS_1T)
-			     | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T);
+	if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+		va_bits = 65;
+
+	if (ssize == MMU_SEGSIZE_256M) {
+		vsid_bits = va_bits - SID_SHIFT;
+		protovsid = (context << ESID_BITS) |
+			((ea >> SID_SHIFT) & ESID_BITS_MASK);
+		return vsid_scramble(protovsid, VSID_MULTIPLIER_256M, vsid_bits);
+	}
+	/* 1T segment */
+	vsid_bits = va_bits - SID_SHIFT_1T;
+	protovsid = (context << ESID_BITS_1T) |
+		((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
+	return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
 }
 
 /*
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 065e762fae85..78260409dc9c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -29,6 +29,10 @@
  */
 
 /*
+ * Support for 68 bit VA space. We added that from ISA 2.05
+ */
+#define MMU_FTR_68_BIT_VA		ASM_CONST(0x00002000)
+/*
  * Kernel read only support.
  * We added the ppp value 0b110 in ISA 2.04.
  */
@@ -109,10 +113,10 @@
 #define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
 #define MMU_FTRS_PPC970		MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
 #define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER8		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER9		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER6		MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER8		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER9		MMU_FTRS_POWER6
 #define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
 				MMU_FTR_CI_LARGE_PAGE
 #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -136,7 +140,7 @@ enum {
 		MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
 		MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
 		MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
-		MMU_FTR_KERNEL_RO |
+		MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
 #ifdef CONFIG_PPC_RADIX_MMU
 		MMU_FTR_TYPE_RADIX |
 #endif
@@ -290,7 +294,10 @@ static inline bool early_radix_enabled(void)
 #define MMU_PAGE_16G	14
 #define MMU_PAGE_64G	15
 
-/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
+/*
+ * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
+ * Also we need to change he type of mm_context.low/high_slices_psize.
+ */
 #define MMU_PAGE_COUNT	16
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b35f44c98d1f..74b0153780e3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -229,6 +229,7 @@ void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 
 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 {
+	unsigned long vsid_bits = VSID_BITS_65_256M;
 	struct kvmppc_sid_map *map;
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	u16 sid_map_mask;
@@ -257,7 +258,12 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
 		kvmppc_mmu_flush_segments(vcpu);
 	}
-	map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
+
+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+		vsid_bits = VSID_BITS_256M;
+
+	map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++,
+				       VSID_MULTIPLIER_256M, vsid_bits);
 
 	map->guest_vsid = gvsid;
 	map->valid = true;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a10e972221c4..e5fde156e11d 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -59,7 +59,14 @@ static int alloc_context_id(int min_id, int max_id)
 
 int hash__alloc_context_id(void)
 {
-	return alloc_context_id(MIN_USER_CONTEXT, MAX_USER_CONTEXT);
+	unsigned long max;
+
+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+		max = MAX_USER_CONTEXT;
+	else
+		max = MAX_USER_CONTEXT_65BIT_VA;
+
+	return alloc_context_id(MIN_USER_CONTEXT, max);
 }
 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
 
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index ba1f8696c338..1c503d07e0fb 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -23,6 +23,48 @@
 #include <asm/pgtable.h>
 #include <asm/firmware.h>
 
+/*
+ * This macro generates asm code to compute the VSID scramble
+ * function.  Used in slb_allocate() and do_stab_bolted.  The function
+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ *	rt = register containing the proto-VSID and into which the
+ *		VSID will be stored
+ *	rx = scratch register (clobbered)
+ *	rf = flags
+ *
+ *	- rt and rx must be different registers
+ *	- The answer will end up in the low VSID_BITS bits of rt.  The higher
+ *	  bits may contain other garbage, so you may need to mask the
+ *	  result.
+ */
+#define ASM_VSID_SCRAMBLE(rt, rx, rf, size)				\
+	lis	rx,VSID_MULTIPLIER_##size@h;				\
+	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
+	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
+/*									\
+ * powermac get slb fault before feature fixup, so make 65 bit part     \
+ * the default part of feature fixup					\
+ */									\
+BEGIN_MMU_FTR_SECTION							\
+	srdi	rx,rt,VSID_BITS_65_##size;				\
+	clrldi	rt,rt,(64-VSID_BITS_65_##size);				\
+	add	rt,rt,rx;						\
+	addi	rx,rt,1;						\
+	srdi	rx,rx,VSID_BITS_65_##size;				\
+	add	rt,rt,rx;						\
+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
+MMU_FTR_SECTION_ELSE							\
+	srdi	rx,rt,VSID_BITS_##size;					\
+	clrldi	rt,rt,(64-VSID_BITS_##size);				\
+	add	rt,rt,rx;		/* add high and low bits */	\
+	addi	rx,rt,1;						\
+	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
+	add	rt,rt,rx;						\
+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
+
+
 /* void slb_allocate_realmode(unsigned long ea);
  *
  * Create an SLB entry for the given EA (user or kernel).
@@ -179,13 +221,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
  */
 .Lslb_finish_load:
 	rldimi  r10,r9,ESID_BITS,0
-	ASM_VSID_SCRAMBLE(r10,r9,256M)
-	/*
-	 * bits above VSID_BITS_256M need to be ignored from r10
-	 * also combine VSID and flags
-	 */
-	rldimi	r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
-
+	ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
 	/* r3 = EA, r11 = VSID data */
 	/*
 	 * Find a slot, round robin. Previously we tried to find a
@@ -249,12 +285,12 @@ slb_compare_rr_to_size:
 .Lslb_finish_load_1T:
 	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
 	rldimi  r10,r9,ESID_BITS_1T,0
-	ASM_VSID_SCRAMBLE(r10,r9,1T)
+	ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
 	/*
 	 * bits above VSID_BITS_1T need to be ignored from r10
 	 * also combine VSID and flags
 	 */
-	rldimi	r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
+
 	li	r10,MMU_SEGSIZE_1T
 	rldimi	r11,r10,SLB_VSID_SSIZE_SHIFT,0	/* insert segment size */
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 13/19] powerpc/mm/hash: Convert mask to unsigned long
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (10 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 12/19] powerpc/mm/hash: Support 68 bit VA Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 14/19] powerpc/mm/hash: Increase VA range to 128TB Michael Ellerman
                   ` (6 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This doesn't have any functional change. But helps in avoiding mistakes
in case the shift bit changes

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index c68102293a19..c93450bf306f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -409,7 +409,7 @@ static inline unsigned long hpt_vpn(unsigned long ea,
 static inline unsigned long hpt_hash(unsigned long vpn,
 				     unsigned int shift, int ssize)
 {
-	int mask;
+	unsigned long mask;
 	unsigned long hash, vsid;
 
 	/* VPN_SHIFT can be atmost 12 */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 14/19] powerpc/mm/hash: Increase VA range to 128TB
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (11 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 13/19] powerpc/mm/hash: Convert mask to unsigned long Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 15/19] powerpc/mm: Add addr_limit to mm_context and use it to derive max slice index Michael Ellerman
                   ` (5 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We update the hash linux page table layout such that we can support
512TB. But we limit the TASK_SIZE to 128TB. We can switch to 128TB by
default without conditional because that is the max virtual address
supported by other architectures. We will later add a mechanism to
on-demand increase the application's effective address range to 512TB.

Having the page table layout changed to accommodate 512TB makes testing
large memory configuration easier with less code changes to kernel

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  2 +-
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
 arch/powerpc/include/asm/processor.h          | 22 ++++++++++++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 0c4e470571ca..b4b5e6b671ca 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -8,7 +8,7 @@
 #define H_PTE_INDEX_SIZE  9
 #define H_PMD_INDEX_SIZE  7
 #define H_PUD_INDEX_SIZE  9
-#define H_PGD_INDEX_SIZE  9
+#define H_PGD_INDEX_SIZE  12
 
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 7be54f9590a3..214219dff87c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -4,7 +4,7 @@
 #define H_PTE_INDEX_SIZE  8
 #define H_PMD_INDEX_SIZE  5
 #define H_PUD_INDEX_SIZE  5
-#define H_PGD_INDEX_SIZE  12
+#define H_PGD_INDEX_SIZE  15
 
 /*
  * 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e0fecbcea2a2..ea4b6c696f1f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -102,11 +102,25 @@ void release_thread(struct task_struct *);
 #endif
 
 #ifdef CONFIG_PPC64
-/* 64-bit user address space is 46-bits (64TB user VM) */
-#define TASK_SIZE_USER64 (0x0000400000000000UL)
+/*
+ * 64-bit user address space can have multiple limits
+ * For now supported values are:
+ */
+#define TASK_SIZE_64TB  (0x0000400000000000UL)
+#define TASK_SIZE_128TB (0x0000800000000000UL)
+#define TASK_SIZE_512TB (0x0002000000000000UL)
 
-/* 
- * 32-bit user address space is 4GB - 1 page 
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Max value currently used:
+ */
+#define TASK_SIZE_USER64 TASK_SIZE_128TB
+#else
+#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#endif
+
+/*
+ * 32-bit user address space is 4GB - 1 page
  * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
  */
 #define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 15/19] powerpc/mm: Add addr_limit to mm_context and use it to derive max slice index
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (12 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 14/19] powerpc/mm/hash: Increase VA range to 128TB Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 16/19] powerpc/mm/hash: Store addr_limit in PACA Michael Ellerman
                   ` (4 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

In the followup patch, we will increase the slice array size to handle
512TB range, but will limit the max addr to 128TB. Avoid doing
unnecessary computation and avoid doing slice mask related operation
above address limit.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  3 ++-
 arch/powerpc/include/asm/book3s/64/mmu.h      |  1 +
 arch/powerpc/kernel/paca.c                    |  3 ++-
 arch/powerpc/kernel/setup-common.c            |  9 +++++++++
 arch/powerpc/mm/mmu_context_book3s64.c        |  7 +++++++
 arch/powerpc/mm/slice.c                       | 22 ++++++++++++----------
 6 files changed, 33 insertions(+), 12 deletions(-)

v6: Instead of using mm->task_size, add a addr_limit to the mm_context_t. This
has the advantage of making the series not depend on generic changes. Hopefully
it also makes it clearer that the value is not exactly equivalent to TASK_SIZE,
it's just the limit of currently used addresses, which may change in future.

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index c93450bf306f..5961b0d65a79 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -597,7 +597,8 @@ extern void slb_set_size(u16 size);
 #define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
 
 /* 4 bits per slice and we have one slice per 1TB */
-#define SLICE_ARRAY_SIZE  (H_PGTABLE_RANGE >> 41)
+#define SLICE_ARRAY_SIZE	(H_PGTABLE_RANGE >> 41)
+#define TASK_SLICE_ARRAY_SZ(x)	((x)->context.addr_limit >> 41)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index cce434e7de06..c4b865112d24 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -82,6 +82,7 @@ typedef struct {
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;	/* SLB page size encodings */
 	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned long addr_limit;
 #else
 	u16 sllp;		/* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e2cf745a4b94..a2c7a6456ee6 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -253,9 +253,10 @@ void copy_mm_to_paca(struct mm_struct *mm)
 
 	get_paca()->mm_ctx_id = context->id;
 #ifdef CONFIG_PPC_MM_SLICES
+	VM_BUG_ON(!mm->context.addr_limit);
 	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
 	memcpy(&get_paca()->mm_ctx_high_slices_psize,
-	       &context->high_slices_psize, SLICE_ARRAY_SIZE);
+	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
 #else /* CONFIG_PPC_MM_SLICES */
 	get_paca()->mm_ctx_user_psize = context->user_psize;
 	get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4697da895133..a79db6b63466 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -920,6 +920,15 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.end_code = (unsigned long) _etext;
 	init_mm.end_data = (unsigned long) _edata;
 	init_mm.brk = klimit;
+
+#ifdef CONFIG_PPC_MM_SLICES
+#ifdef CONFIG_PPC64
+	init_mm.context.addr_limit = TASK_SIZE_USER64;
+#else
+#error	"context.addr_limit not initialized."
+#endif
+#endif
+
 #ifdef CONFIG_PPC_64K_PAGES
 	init_mm.context.pte_frag = NULL;
 #endif
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index e5fde156e11d..fd0bc6db2dcd 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -79,6 +79,13 @@ static int hash__init_new_context(struct mm_struct *mm)
 		return index;
 
 	/*
+	 * We do switch_slb() early in fork, even before we setup the
+	 * mm->context.addr_limit. Default to max task size so that we copy the
+	 * default values to paca which will help us to handle slb miss early.
+	 */
+	mm->context.addr_limit = TASK_SIZE_USER64;
+
+	/*
 	 * The old code would re-promote on fork, we don't do that when using
 	 * slices as it could cause problem promoting slices that have been
 	 * forced down to 4K.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 95e5a20b1b6a..a79a20d4cf99 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -136,7 +136,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
 	if (mm->task_size <= SLICE_LOW_TOP)
 		return;
 
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++)
 		if (!slice_high_has_vma(mm, i))
 			__set_bit(i, ret->high_slices);
 }
@@ -157,7 +157,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 			ret->low_slices |= 1u << i;
 
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -165,15 +165,17 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 	}
 }
 
-static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
+static int slice_check_fit(struct mm_struct *mm,
+			   struct slice_mask mask, struct slice_mask available)
 {
 	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit);
 
 	bitmap_and(result, mask.high_slices,
-		   available.high_slices, SLICE_NUM_HIGH);
+		   available.high_slices, slice_count);
 
 	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		bitmap_equal(result, mask.high_slices, SLICE_NUM_HIGH);
+		bitmap_equal(result, mask.high_slices, slice_count);
 }
 
 static void slice_flush_segments(void *parm)
@@ -217,7 +219,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 	mm->context.low_slices_psize = lpsizes;
 
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (test_bit(i, mask.high_slices))
@@ -484,7 +486,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		/* Check if we fit in the good mask. If we do, we just return,
 		 * nothing else to do
 		 */
-		if (slice_check_fit(mask, good_mask)) {
+		if (slice_check_fit(mm, mask, good_mask)) {
 			slice_dbg(" fits good !\n");
 			return addr;
 		}
@@ -509,7 +511,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
-	if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
+	if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
 		slice_dbg(" fits potential !\n");
 		goto convert;
 	}
@@ -657,7 +659,7 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 	mm->context.low_slices_psize = lpsizes;
 
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
@@ -734,6 +736,6 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 	slice_print_mask(" mask", mask);
 	slice_print_mask(" available", available);
 #endif
-	return !slice_check_fit(mask, available);
+	return !slice_check_fit(mm, mask, available);
 }
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 16/19] powerpc/mm/hash: Store addr_limit in PACA
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (13 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 15/19] powerpc/mm: Add addr_limit to mm_context and use it to derive max slice index Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 17/19] powerpc/pseries: Skip using reserved virtual address range Michael Ellerman
                   ` (3 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We optmize the slice page size array copy to paca by copying only the
range based on addr_limit. This will require us to not look at page size
array beyond addr_limit in PACA on slb fault. To enable that copy task
size to paca which will be used during slb fault.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Rename from task_size to addr_limit, consolidate #ifdefs]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h   | 1 +
 arch/powerpc/kernel/asm-offsets.c | 1 +
 arch/powerpc/kernel/paca.c        | 1 +
 arch/powerpc/mm/slb_low.S         | 8 +++++++-
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index f48c250339fd..65d0ffb2aa33 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -139,6 +139,7 @@ struct paca_struct {
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 mm_ctx_low_slices_psize;
 	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned long addr_limit;
 #else
 	u16 mm_ctx_user_psize;
 	u16 mm_ctx_sllp;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4367e7df51a1..795505e27535 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,6 +185,7 @@ int main(void)
 #ifdef CONFIG_PPC_MM_SLICES
 	OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
 	OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
+	DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
 	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
 #endif /* CONFIG_PPC_MM_SLICES */
 #endif
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index a2c7a6456ee6..8d63627e067f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -254,6 +254,7 @@ void copy_mm_to_paca(struct mm_struct *mm)
 	get_paca()->mm_ctx_id = context->id;
 #ifdef CONFIG_PPC_MM_SLICES
 	VM_BUG_ON(!mm->context.addr_limit);
+	get_paca()->addr_limit = mm->context.addr_limit;
 	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
 	memcpy(&get_paca()->mm_ctx_high_slices_psize,
 	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1c503d07e0fb..1519617aab36 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -149,7 +149,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	 * For userspace addresses, make sure this is region 0.
 	 */
 	cmpdi	r9, 0
-	bne	8f
+	bne-	8f
+        /*
+         * user space make sure we are within the allowed limit
+	 */
+	ld	r11,PACA_ADDR_LIMIT(r13)
+	cmpld	r3,r11
+	bge-	8f
 
 	/* when using slices, we extract the psize off the slice bitmaps
 	 * and then we need to get the sllp encoding off the mmu_psize_defs
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 17/19] powerpc/pseries: Skip using reserved virtual address range
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (14 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 16/19] powerpc/mm/hash: Store addr_limit in PACA Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 18/19] powerpc/mm: Switch some TASK_SIZE checks to use mm_context addr_limit Michael Ellerman
                   ` (2 subsequent siblings)
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Now that we use all the available virtual address range, we need to make
sure we don't generate VSID such that it overlaps with the reserved vsid
range. Reserved vsid range include the virtual address range used by the
adjunct partition and also the VRMA virtual segment. We find the context
value that can result in generating such a VSID and reserve it early in
boot.

We don't look at the adjunct range, because for now we disable the
adjunct usage in a Linux LPAR via CAS interface.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Rewrite hash__reserve_context_id(), move the rest into pseries]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  7 +++
 arch/powerpc/include/asm/kvm_book3s_64.h      |  2 -
 arch/powerpc/include/asm/mmu_context.h        |  1 +
 arch/powerpc/mm/hash_utils_64.c               |  1 -
 arch/powerpc/mm/mmu_context_book3s64.c        | 16 +++++++
 arch/powerpc/platforms/pseries/lpar.c         | 61 +++++++++++++++++++++++++++
 6 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 5961b0d65a79..6d56974adf28 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -589,11 +589,18 @@ extern void slb_set_size(u16 size);
 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
 #define VSID_BITS_256M		(VA_BITS - SID_SHIFT)
 #define VSID_BITS_65_256M	(65 - SID_SHIFT)
+/*
+ * Modular multiplicative inverse of VSID_MULTIPLIER under modulo VSID_MODULUS
+ */
+#define VSID_MULINV_256M	ASM_CONST(665548017062)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
 #define VSID_BITS_1T		(VA_BITS - SID_SHIFT_1T)
 #define VSID_BITS_65_1T		(65 - SID_SHIFT_1T)
+#define VSID_MULINV_1T		ASM_CONST(209034062)
 
+/* 1TB VSID reserved for VRMA */
+#define VRMA_VSID	0x1ffffffUL
 #define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
 
 /* 4 bits per slice and we have one slice per 1TB */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index d9b48f5bb606..d55c7f881ce7 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -49,8 +49,6 @@ static inline bool kvm_is_radix(struct kvm *kvm)
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
 #endif
 
-#define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
-
 /*
  * We use a lock bit in HPTE dword 0 to synchronize updates and
  * accesses to each HPTE, and another bit to indicate non-present
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 7d721101ec78..78803a7ebdd9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -52,6 +52,7 @@ static inline void switch_mmu_context(struct mm_struct *prev,
 }
 
 extern int hash__alloc_context_id(void);
+extern void hash__reserve_context_id(int id);
 extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
 #else
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 716255f88bbd..8848fec51ce9 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1868,5 +1868,4 @@ static int __init hash64_debugfs(void)
 	return 0;
 }
 machine_device_initcall(pseries, hash64_debugfs);
-
 #endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index fd0bc6db2dcd..7bc5b63034db 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -57,6 +57,22 @@ static int alloc_context_id(int min_id, int max_id)
 	return index;
 }
 
+void hash__reserve_context_id(int id)
+{
+	int rc, result = 0;
+
+	do {
+		if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
+			break;
+
+		spin_lock(&mmu_context_lock);
+		rc = ida_get_new_above(&mmu_context_ida, id, &result);
+		spin_unlock(&mmu_context_lock);
+	} while (rc == -EAGAIN);
+
+	WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
+}
+
 int hash__alloc_context_id(void)
 {
 	unsigned long max;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 8b1fe895daa3..6541d0b03e4c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -958,3 +958,64 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
 
 	return rc;
 }
+
+static unsigned long vsid_unscramble(unsigned long vsid, int ssize)
+{
+	unsigned long protovsid;
+	unsigned long va_bits = VA_BITS;
+	unsigned long modinv, vsid_modulus;
+	unsigned long max_mod_inv, tmp_modinv;
+
+	if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+		va_bits = 65;
+
+	if (ssize == MMU_SEGSIZE_256M) {
+		modinv = VSID_MULINV_256M;
+		vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1);
+	} else {
+		modinv = VSID_MULINV_1T;
+		vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1);
+	}
+
+	/*
+	 * vsid outside our range.
+	 */
+	if (vsid >= vsid_modulus)
+		return 0;
+
+	/*
+	 * If modinv is the modular multiplicate inverse of (x % vsid_modulus)
+	 * and vsid = (protovsid * x) % vsid_modulus, then we say:
+	 *   protovsid = (vsid * modinv) % vsid_modulus
+	 */
+
+	/* Check if (vsid * modinv) overflow (63 bits) */
+	max_mod_inv = 0x7fffffffffffffffull / vsid;
+	if (modinv < max_mod_inv)
+		return (vsid * modinv) % vsid_modulus;
+
+	tmp_modinv = modinv/max_mod_inv;
+	modinv %= max_mod_inv;
+
+	protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus;
+	protovsid = (protovsid + vsid * modinv) % vsid_modulus;
+
+	return protovsid;
+}
+
+static int __init reserve_vrma_context_id(void)
+{
+	unsigned long protovsid;
+
+	/*
+	 * Reserve context ids which map to reserved virtual addresses. For now
+	 * we only reserve the context id which maps to the VRMA VSID. We ignore
+	 * the addresses in "ibm,adjunct-virtual-addresses" because we don't
+	 * enable adjunct support via the "ibm,client-architecture-support"
+	 * interface.
+	 */
+	protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T);
+	hash__reserve_context_id(protovsid >> ESID_BITS_1T);
+	return 0;
+}
+machine_device_initcall(pseries, reserve_vrma_context_id);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 18/19] powerpc/mm: Switch some TASK_SIZE checks to use mm_context addr_limit
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (15 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 17/19] powerpc/pseries: Skip using reserved virtual address range Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-03-30 12:04 ` [PATCH v6 19/19] powerpc/mm: Enable mappings above 128TB Michael Ellerman
  2017-04-03 10:13 ` [v6, 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/hugetlbpage-radix.c |  4 ++--
 arch/powerpc/mm/mmap.c              | 12 ++++++------
 arch/powerpc/mm/slice.c             |  6 +++---
 arch/powerpc/mm/subpage-prot.c      |  3 ++-
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 35254a678456..95207c117b82 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -52,7 +52,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (len > TASK_SIZE)
+	if (len > mm->context.addr_limit)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
@@ -64,7 +64,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	if (addr) {
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
+		if (mm->context.addr_limit - len >= addr &&
 		    (!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index a5d9ef59debe..6e9fead92969 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -97,7 +97,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	struct vm_area_struct *vma;
 	struct vm_unmapped_area_info info;
 
-	if (len > TASK_SIZE - mmap_min_addr)
+	if (len > mm->context.addr_limit - mmap_min_addr)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED)
@@ -106,7 +106,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+		if (mm->context.addr_limit - len >= addr && addr >= mmap_min_addr &&
 		    (!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
@@ -114,7 +114,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_base;
-	info.high_limit = TASK_SIZE;
+	info.high_limit = mm->context.addr_limit;
 	info.align_mask = 0;
 	return vm_unmapped_area(&info);
 }
@@ -132,7 +132,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 	struct vm_unmapped_area_info info;
 
 	/* requested length too big for entire address space */
-	if (len > TASK_SIZE - mmap_min_addr)
+	if (len > mm->context.addr_limit - mmap_min_addr)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED)
@@ -142,7 +142,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+		if (mm->context.addr_limit - len >= addr && addr >= mmap_min_addr &&
 				(!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
@@ -164,7 +164,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
-		info.high_limit = TASK_SIZE;
+		info.high_limit = mm->context.addr_limit;
 		addr = vm_unmapped_area(&info);
 	}
 
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index a79a20d4cf99..bf41825054d0 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -277,7 +277,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 	info.align_offset = 0;
 
 	addr = TASK_UNMAPPED_BASE;
-	while (addr < TASK_SIZE) {
+	while (addr < mm->context.addr_limit) {
 		info.low_limit = addr;
 		if (!slice_scan_available(addr, available, 1, &addr))
 			continue;
@@ -289,8 +289,8 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 		 * Check if we need to reduce the range, or if we can
 		 * extend it to cover the next available slice.
 		 */
-		if (addr >= TASK_SIZE)
-			addr = TASK_SIZE;
+		if (addr >= mm->context.addr_limit)
+			addr = mm->context.addr_limit;
 		else if (slice_scan_available(addr, available, 1, &next_end)) {
 			addr = next_end;
 			goto next_slice;
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 94210940112f..a409f78d206b 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -197,7 +197,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
 
 	/* Check parameters */
 	if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
-	    addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE)
+	    addr >= mm->context.addr_limit || len >= mm->context.addr_limit ||
+	    addr + len > mm->context.addr_limit)
 		return -EINVAL;
 
 	if (is_hugepage_only_range(mm, addr, len))
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 19/19] powerpc/mm: Enable mappings above 128TB
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (16 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 18/19] powerpc/mm: Switch some TASK_SIZE checks to use mm_context addr_limit Michael Ellerman
@ 2017-03-30 12:04 ` Michael Ellerman
  2017-04-03 10:13 ` [v6, 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-03-30 12:04 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Not all user space application is ready to handle wide addresses. It's known that
at least some JIT compilers use higher bits in pointers to encode their
information. It collides with valid pointers with 512TB addresses and
leads to crashes.

To mitigate this, we are not going to allocate virtual address space
above 128TB by default.

But userspace can ask for allocation from full address space by
specifying hint address (with or without MAP_FIXED) above 128TB.

If hint address set above 128TB, but MAP_FIXED is not specified, we try
to look for unmapped area by specified address. If it's already
occupied, we look for unmapped area in *full* address space, rather than
from 128TB window.

This approach helps to easily make application's memory allocator aware
about large address space without manually tracking allocated virtual
address space.

This is going to be a per mmap decision. ie, we can have some mmaps with larger
addresses and other that do not.

A sample memory layout looks like below.

10000000-10010000 r-xp 00000000 fc:00 9057045                            /home/max_addr_512TB
10010000-10020000 r--p 00000000 fc:00 9057045                            /home/max_addr_512TB
10020000-10030000 rw-p 00010000 fc:00 9057045                            /home/max_addr_512TB
10029630000-10029660000 rw-p 00000000 00:00 0                            [heap]
7fff834a0000-7fff834b0000 rw-p 00000000 00:00 0
7fff834b0000-7fff83670000 r-xp 00000000 fc:00 9177190                    /lib/powerpc64le-linux-gnu/libc-2.23.so
7fff83670000-7fff83680000 r--p 001b0000 fc:00 9177190                    /lib/powerpc64le-linux-gnu/libc-2.23.so
7fff83680000-7fff83690000 rw-p 001c0000 fc:00 9177190                    /lib/powerpc64le-linux-gnu/libc-2.23.so
7fff83690000-7fff836a0000 rw-p 00000000 00:00 0
7fff836a0000-7fff836c0000 r-xp 00000000 00:00 0                          [vdso]
7fff836c0000-7fff83700000 r-xp 00000000 fc:00 9177193                    /lib/powerpc64le-linux-gnu/ld-2.23.so
7fff83700000-7fff83710000 r--p 00030000 fc:00 9177193                    /lib/powerpc64le-linux-gnu/ld-2.23.so
7fff83710000-7fff83720000 rw-p 00040000 fc:00 9177193                    /lib/powerpc64le-linux-gnu/ld-2.23.so
7fffdccf0000-7fffdcd20000 rw-p 00000000 00:00 0                          [stack]
1000000000000-1000000010000 rw-p 00000000 00:00 0
1ffff83710000-1ffff83720000 rw-p 00000000 00:00 0

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/processor.h | 23 +++++++++++----
 arch/powerpc/mm/hugetlbpage-radix.c  |  7 +++++
 arch/powerpc/mm/mmap.c               | 32 +++++++++++++--------
 arch/powerpc/mm/slice.c              | 54 ++++++++++++++++++++++++++++--------
 4 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ea4b6c696f1f..a4b1d8d6b793 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -114,9 +114,9 @@ void release_thread(struct task_struct *);
 /*
  * Max value currently used:
  */
-#define TASK_SIZE_USER64 TASK_SIZE_128TB
+#define TASK_SIZE_USER64	TASK_SIZE_512TB
 #else
-#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#define TASK_SIZE_USER64	TASK_SIZE_64TB
 #endif
 
 /*
@@ -128,26 +128,37 @@ void release_thread(struct task_struct *);
 #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
 		TASK_SIZE_USER32 : TASK_SIZE_USER64)
 #define TASK_SIZE	  TASK_SIZE_OF(current)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
 #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
 
 #define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
 #endif
 
+/*
+ * Initial task size value for user applications. For book3s 64 we start
+ * with 128TB and conditionally enable upto 512TB
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ? \
+				 TASK_SIZE_USER32 : TASK_SIZE_128TB)
+#else
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
+#endif
+
 #ifdef __powerpc64__
 
-#define STACK_TOP_USER64 TASK_SIZE_USER64
+/* Limit stack to 128TB */
+#define STACK_TOP_USER64 TASK_SIZE_128TB
 #define STACK_TOP_USER32 TASK_SIZE_USER32
 
 #define STACK_TOP (is_32bit_task() ? \
 		   STACK_TOP_USER32 : STACK_TOP_USER64)
 
-#define STACK_TOP_MAX STACK_TOP_USER64
+#define STACK_TOP_MAX TASK_SIZE_USER64
 
 #else /* __powerpc64__ */
 
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 95207c117b82..0aa9cade422f 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -50,6 +50,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct hstate *h = hstate_file(file);
 	struct vm_unmapped_area_info info;
 
+	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
+		mm->context.addr_limit = TASK_SIZE;
+
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
 	if (len > mm->context.addr_limit)
@@ -78,5 +81,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	info.high_limit = current->mm->mmap_base;
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
 	info.align_offset = 0;
+
+	if (addr > DEFAULT_MAP_WINDOW)
+		info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+
 	return vm_unmapped_area(&info);
 }
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 6e9fead92969..b2111baa0da6 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -79,7 +79,7 @@ static inline unsigned long mmap_base(unsigned long rnd)
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 
-	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+	return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
 }
 
 #ifdef CONFIG_PPC_RADIX_MMU
@@ -97,6 +97,9 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	struct vm_area_struct *vma;
 	struct vm_unmapped_area_info info;
 
+	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
+		mm->context.addr_limit = TASK_SIZE;
+
 	if (len > mm->context.addr_limit - mmap_min_addr)
 		return -ENOMEM;
 
@@ -114,8 +117,13 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_base;
-	info.high_limit = mm->context.addr_limit;
 	info.align_mask = 0;
+
+	if (unlikely(addr > DEFAULT_MAP_WINDOW))
+		info.high_limit = mm->context.addr_limit;
+	else
+		info.high_limit = DEFAULT_MAP_WINDOW;
+
 	return vm_unmapped_area(&info);
 }
 
@@ -131,6 +139,9 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
 
+	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
+		mm->context.addr_limit = TASK_SIZE;
+
 	/* requested length too big for entire address space */
 	if (len > mm->context.addr_limit - mmap_min_addr)
 		return -ENOMEM;
@@ -152,7 +163,14 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
 	info.high_limit = mm->mmap_base;
 	info.align_mask = 0;
+
+	if (addr > DEFAULT_MAP_WINDOW)
+		info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+
 	addr = vm_unmapped_area(&info);
+	if (!(addr & ~PAGE_MASK))
+		return addr;
+	VM_BUG_ON(addr != -ENOMEM);
 
 	/*
 	 * A failed mmap() very likely causes application failure,
@@ -160,15 +178,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	if (addr & ~PAGE_MASK) {
-		VM_BUG_ON(addr != -ENOMEM);
-		info.flags = 0;
-		info.low_limit = TASK_UNMAPPED_BASE;
-		info.high_limit = mm->context.addr_limit;
-		addr = vm_unmapped_area(&info);
-	}
-
-	return addr;
+	return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
 }
 
 static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index bf41825054d0..ff82b651d15d 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -265,7 +265,7 @@ static bool slice_scan_available(unsigned long addr,
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      unsigned long len,
 					      struct slice_mask available,
-					      int psize)
+					      int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	unsigned long addr, found, next_end;
@@ -277,7 +277,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 	info.align_offset = 0;
 
 	addr = TASK_UNMAPPED_BASE;
-	while (addr < mm->context.addr_limit) {
+	/*
+	 * Check till the allow max value for this mmap request
+	 */
+	while (addr < high_limit) {
 		info.low_limit = addr;
 		if (!slice_scan_available(addr, available, 1, &addr))
 			continue;
@@ -308,7 +311,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     unsigned long len,
 					     struct slice_mask available,
-					     int psize)
+					     int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	unsigned long addr, found, prev;
@@ -320,6 +323,15 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 	info.align_offset = 0;
 
 	addr = mm->mmap_base;
+	/*
+	 * If we are trying to allocate above DEFAULT_MAP_WINDOW
+	 * Add the different to the mmap_base.
+	 * Only for that request for which high_limit is above
+	 * DEFAULT_MAP_WINDOW we should apply this.
+	 */
+	if (high_limit  > DEFAULT_MAP_WINDOW)
+		addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+
 	while (addr > PAGE_SIZE) {
 		info.high_limit = addr;
 		if (!slice_scan_available(addr - 1, available, 0, &addr))
@@ -351,18 +363,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	return slice_find_area_bottomup(mm, len, available, psize);
+	return slice_find_area_bottomup(mm, len, available, psize, high_limit);
 }
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 				     struct slice_mask mask, int psize,
-				     int topdown)
+				     int topdown, unsigned long high_limit)
 {
 	if (topdown)
-		return slice_find_area_topdown(mm, len, mask, psize);
+		return slice_find_area_topdown(mm, len, mask, psize, high_limit);
 	else
-		return slice_find_area_bottomup(mm, len, mask, psize);
+		return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
 }
 
 static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
@@ -402,8 +414,23 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	struct mm_struct *mm = current->mm;
 	unsigned long newaddr;
+	unsigned long high_limit;
 
 	/*
+	 * Check if we need to expland slice area.
+	 */
+	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE)) {
+		mm->context.addr_limit = TASK_SIZE;
+		on_each_cpu(slice_flush_segments, mm, 1);
+	}
+	/*
+	 * This mmap request can allocate upt to 512TB
+	 */
+	if (addr > DEFAULT_MAP_WINDOW)
+		high_limit = mm->context.addr_limit;
+	else
+		high_limit = DEFAULT_MAP_WINDOW;
+	/*
 	 * init different masks
 	 */
 	mask.low_slices = 0;
@@ -494,7 +521,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		/* Now let's see if we can find something in the existing
 		 * slices for that size
 		 */
-		newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
+		newaddr = slice_find_area(mm, len, good_mask,
+					  psize, topdown, high_limit);
 		if (newaddr != -ENOMEM) {
 			/* Found within the good mask, we don't have to setup,
 			 * we thus return directly
@@ -526,7 +554,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * anywhere in the good area.
 	 */
 	if (addr) {
-		addr = slice_find_area(mm, len, good_mask, psize, topdown);
+		addr = slice_find_area(mm, len, good_mask,
+				       psize, topdown, high_limit);
 		if (addr != -ENOMEM) {
 			slice_dbg(" found area at 0x%lx\n", addr);
 			return addr;
@@ -536,14 +565,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* Now let's see if we can find something in the existing slices
 	 * for that size plus free slices
 	 */
-	addr = slice_find_area(mm, len, potential_mask, psize, topdown);
+	addr = slice_find_area(mm, len, potential_mask,
+			       psize, topdown, high_limit);
 
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
 		slice_or_mask(&potential_mask, &compat_mask);
-		addr = slice_find_area(mm, len, potential_mask, psize,
-				       topdown);
+		addr = slice_find_area(mm, len, potential_mask,
+				       psize, topdown, high_limit);
 	}
 #endif
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [v6, 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap
  2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
                   ` (17 preceding siblings ...)
  2017-03-30 12:04 ` [PATCH v6 19/19] powerpc/mm: Enable mappings above 128TB Michael Ellerman
@ 2017-04-03 10:13 ` Michael Ellerman
  18 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-04-03 10:13 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: aneesh.kumar

On Thu, 2017-03-30 at 12:03:49 UTC, Michael Ellerman wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> In followup patch we want to increase the va range which will result
> in us requiring high_slices to have more than 64 bits. To enable this
> convert high_slices to bitmap. We keep the number bits same in this patch
> and later change that to higher value
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> [mpe: Fold in fix to use bitmap_empty()]
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Series applied to powerpc next.

https://git.kernel.org/powerpc/c/f3207c124e7aa8d4d9cf32cc45b10c

cheers

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel
  2017-03-30 12:03 ` [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel Michael Ellerman
@ 2017-04-28 10:57   ` Michal Suchánek
  2017-04-28 12:05     ` Michael Ellerman
  0 siblings, 1 reply; 22+ messages in thread
From: Michal Suchánek @ 2017-04-28 10:57 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, aneesh.kumar

Hello,

just a nit:

On Thu, 30 Mar 2017 23:03:58 +1100
Michael Ellerman <mpe@ellerman.id.au> wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Currently we use the top 4 context ids (0x7fffc-0x7ffff) for the
> kernel. Kernel VSIDs are built using these top context values and
> effective the segement ID. In subsequent patches we want to increase
> the max effective address to 512TB. We will achieve that by
> increasing the effective segment IDs there by increasing virtual
> address range.
> 
> We will be switching to a 68bit virtual address in the following
> patch. But platforms like Power4 and Power5 only support a 65 bit
> virtual address. We will handle that by limiting the context bits to
> 16 instead of 19 on those platforms. That means the max context id
> will have a different value on different platforms.
> 
> So that we don't have to deal with the kernel context ids changing
> between different platforms, move the kernel context ids down to use
> context ids 1-4.
> 
> We can't use segment 0 of context-id 0, because that maps to VSID 0,
> which we want to keep as invalid, so we avoid context-id 0 entirely.
> Similarly we can't use the last segment of the maximum context, so we
> avoid it too.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> [mpe: Switch from 0-3 to 1-4 so VSID=0 remains invalid]
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h | 60
> ++++++++++++++++-----------
> arch/powerpc/mm/mmu_context_book3s64.c        |  2 +-
> arch/powerpc/mm/slb_low.S                     | 20 +++------ 3 files
> changed, 41 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index
> 52d8d1e4b772..a5ab6f5b8a7f 100644 ---
> a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -491,13 +491,14 @@
> extern void slb_set_size(u16 size);
>   * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
>   * from mmu context id and effective segment id of the address.
>   *
> - * For user processes max context id is limited to ((1ul << 19) - 5)
> - * for kernel space, we use the top 4 context ids to map address as
> below
> + * For user processes max context id is limited to MAX_USER_CONTEXT.
> +
> + * For kernel space, we use context ids 1-5 to map address as below:
This appears wrong.

>   * NOTE: each context only support 64TB now.
> - * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
> - * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
> - * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
> - * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
> + * 0x00001 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
> + * 0x00002 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
> + * 0x00003 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
> + * 0x00004 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
>   *
>   * The proto-VSIDs are then scrambled into real VSIDs with the
>   * multiplicative hash:
> @@ -511,15 +512,13 @@ extern void slb_set_size(u16 size);
>   * robust scattering in the hash table (at least based on some
> initial
>   * results).
>   *
> - * We also consider VSID 0 special. We use VSID 0 for slb entries
> mapping
> - * bad address. This enables us to consolidate bad address handling
> in
> - * hash_page.
> + * We use VSID 0 to indicate an invalid VSID. The means we can't use
> context id
> + * 0, because a context id of 0 and an EA of 0 gives a proto-VSID of
> 0, which
> + * will produce a VSID of 0.
>   *
>   * We also need to avoid the last segment of the last context,
> because that
>   * would give a protovsid of 0x1fffffffff. That will result in a
> VSID 0
> - * because of the modulo operation in vsid scramble. But the vmemmap
> - * (which is what uses region 0xf) will never be close to 64TB in
> size
> - * (it's 56 bytes per page of system memory).
> + * because of the modulo operation in vsid scramble.
>   */
>  
>  #define CONTEXT_BITS		19
> @@ -532,12 +531,19 @@ extern void slb_set_size(u16 size);
>  /*
>   * 256MB segment
>   * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
> - * available for user + kernel mapping. The top 4 contexts are used
> for
> - * kernel mapping. Each segment contains 2^28 bytes. Each
> - * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
> - * (19 == 37 + 28 - 46).
> + * available for user + kernel mapping. VSID 0 is reserved as
> invalid, contexts
> + * 1-4 are used for kernel mapping. Each segment contains 2^28
> bytes. Each
> + * context maps 2^46 bytes (64TB).
> + *
> + * We also need to avoid the last segment of the last context,
> because that
> + * would give a protovsid of 0x1fffffffff. That will result in a
> VSID 0
> + * because of the modulo operation in vsid scramble.
>   */
> -#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 5)
> +#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 2)
> +#define MIN_USER_CONTEXT	(5)
> +
> +/* Would be nice to use KERNEL_REGION_ID here */
> +#define KERNEL_REGION_CONTEXT_OFFSET	(0xc - 1)
>  
>  /*
>   * This should be computed such that protovosid * vsid_mulitplier
> @@ -671,21 +677,25 @@ static inline unsigned long get_vsid(unsigned
> long context, unsigned long ea, 
>  /*
>   * This is only valid for addresses >= PAGE_OFFSET
> - *
> - * For kernel space, we use the top 4 context ids to map address as
> below
> - * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
> - * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
> - * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
> - * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
>   */
>  static inline unsigned long get_kernel_vsid(unsigned long ea, int
> ssize) {
>  	unsigned long context;
>  
>  	/*
> -	 * kernel take the top 4 context from the available range
> +	 * For kernel space, we use context ids 1-4 to map the
> address space as
and this appears right - or at least consistent with the commit
message ... and the rest of the comment.

Thanks

Michal

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel
  2017-04-28 10:57   ` Michal Suchánek
@ 2017-04-28 12:05     ` Michael Ellerman
  0 siblings, 0 replies; 22+ messages in thread
From: Michael Ellerman @ 2017-04-28 12:05 UTC (permalink / raw)
  To: Michal Suchánek; +Cc: linuxppc-dev, aneesh.kumar

Michal Such=C3=A1nek <msuchanek@suse.de> writes:
> Hello,
>
> just a nit:
>
> On Thu, 30 Mar 2017 23:03:58 +1100
> Michael Ellerman <mpe@ellerman.id.au> wrote:
>> + * For kernel space, we use context ids 1-5 to map address as below:
>
> This appears wrong.

>>  	/*
>> -	 * kernel take the top 4 context from the available range
>> +	 * For kernel space, we use context ids 1-4 to map the
>
> and this appears right - or at least consistent with the commit
> message ... and the rest of the comment.

Yep thanks.

Patch on its way.

cheers

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2017-04-28 12:05 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-30 12:03 [PATCH v6 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 02/19] powerpc/mm/slice: Update the function prototype Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 03/19] powerpc/mm: Move copy_mm_to_paca to paca.c Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 04/19] powerpc/mm: Remove checks that TASK_SIZE_USER64 is too small Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 05/19] powerpc/mm/slice: Move slice_mask struct definition to slice.c Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 06/19] powerpc/mm/slice: Update slice mask printing to use bitmap printing Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 07/19] powerpc/mm/hash: Abstract context id allocation for KVM Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 08/19] powerpc/mm/hash: Pull hash constants into hash__alloc_context_id() Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 09/19] powerpc/mm: Split radix vs hash mm context initialisation Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 10/19] powerpc/mm/hash: Use context ids 1-4 for the kernel Michael Ellerman
2017-04-28 10:57   ` Michal Suchánek
2017-04-28 12:05     ` Michael Ellerman
2017-03-30 12:03 ` [PATCH v6 11/19] powerpc/mm/hash: Check for non-kernel address in get_kernel_vsid() Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 12/19] powerpc/mm/hash: Support 68 bit VA Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 13/19] powerpc/mm/hash: Convert mask to unsigned long Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 14/19] powerpc/mm/hash: Increase VA range to 128TB Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 15/19] powerpc/mm: Add addr_limit to mm_context and use it to derive max slice index Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 16/19] powerpc/mm/hash: Store addr_limit in PACA Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 17/19] powerpc/pseries: Skip using reserved virtual address range Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 18/19] powerpc/mm: Switch some TASK_SIZE checks to use mm_context addr_limit Michael Ellerman
2017-03-30 12:04 ` [PATCH v6 19/19] powerpc/mm: Enable mappings above 128TB Michael Ellerman
2017-04-03 10:13 ` [v6, 01/19] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).