All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/5] Add support for larger address space for ppc64
@ 2017-02-07  3:48 Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Aneesh Kumar K.V
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  3:48 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This patch series update ppc64 to use a 68 bit virtual address. The goal here
is to help us increase the effective address range to 512TB. I still haven't
comeup with a mechanism to enable application to selectively use address about
the 64TB (the current limit). The last patch in this series is just to check
whether the 68 bit va limit is working as expected.

On p4 and p5 which can do only 65 bit va, we do use mmu feature to fixup the max 
va bits. We do that by limiting the max context on these platforms. The context
bits for these platforms get reduced from 19 bits to 16 bits with these patches.

W.r.t limitting max effective address for application, we have discussion happening
on lkml w.r.t different approaches that can be used. Once we reach concensus on that,
the same approach can be used on ppc64 too. 
https://lkml.kernel.org/r/20161227015413.187403-30-kirill.shutemov@linux.intel.com

TODO:
* PR kvm need to be looked at to study the impact
* Limit max address to 64TB by default and selectively enable range above.
* Fixup context switch to copy only required slice array.


Aneesh Kumar K.V (5):
  powerpc/mm/slice: Convert slice_mask high slice to a bitmap
  powerpc/mm/slice: Update the function prototype
  powerpc/mm/hash: Move kernel context to the starting of context range
  powerpc/mm/hash: Support 68 bit VA
  powerpc/mm/hash: Increase VA range to 256TB

 arch/powerpc/include/asm/book3s/64/hash-4k.h  |   2 +-
 arch/powerpc/include/asm/book3s/64/hash-64k.h |   2 +-
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 162 +++++++++++++++-----------
 arch/powerpc/include/asm/mmu.h                |  19 ++-
 arch/powerpc/include/asm/mmu_context.h        |   2 -
 arch/powerpc/include/asm/page_64.h            |  15 +--
 arch/powerpc/include/asm/processor.h          |  12 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c         |   2 +-
 arch/powerpc/mm/hash_utils_64.c               |   5 -
 arch/powerpc/mm/mmu_context_book3s64.c        |  87 +++++++++-----
 arch/powerpc/mm/slb_low.S                     |  71 +++++++----
 arch/powerpc/mm/slice.c                       | 147 ++++++++++++++---------
 12 files changed, 318 insertions(+), 208 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap
  2017-02-07  3:48 [RFC PATCH 0/5] Add support for larger address space for ppc64 Aneesh Kumar K.V
@ 2017-02-07  3:48 ` Aneesh Kumar K.V
  2017-02-08 10:30   ` Balbir Singh
  2017-02-07  3:48 ` [RFC PATCH 2/5] powerpc/mm/slice: Update the function prototype Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  3:48 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

In followup patch we want to increase the va range which will result
in us requiring high_slices to have more than 64 bits. To enable this
convert high_slices to bitmap. We keep the number bits same in this patch
and later change that to larger value

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/page_64.h |  15 +++---
 arch/powerpc/mm/slice.c            | 106 ++++++++++++++++++++++++-------------
 2 files changed, 76 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index dd5f0712afa2..7f72659b7999 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -98,19 +98,16 @@ extern u64 ppc64_pft_size;
 #define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
 
+#ifndef __ASSEMBLY__
 /*
- * 1 bit per slice and we have one slice per 1TB
- * Right now we support only 64TB.
- * IF we change this we will have to change the type
- * of high_slices
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ * 64 below is actually SLICE_NUM_HIGH to fixup complie errros
  */
-#define SLICE_MASK_SIZE 8
-
-#ifndef __ASSEMBLY__
-
 struct slice_mask {
 	u16 low_slices;
-	u64 high_slices;
+	DECLARE_BITMAP(high_slices, 64);
 };
 
 struct mm_struct;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 2b27458902ee..84dfb2b58870 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -36,11 +36,6 @@
 #include <asm/copro.h>
 #include <asm/hugetlb.h>
 
-/* some sanity checks */
-#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
-#error H_PGTABLE_RANGE exceeds slice_mask high_slices size
-#endif
-
 static DEFINE_SPINLOCK(slice_convert_lock);
 
 
@@ -49,7 +44,7 @@ int _slice_debug = 1;
 
 static void slice_print_mask(const char *label, struct slice_mask mask)
 {
-	char	*p, buf[16 + 3 + 64 + 1];
+	char	*p, buf[SLICE_NUM_LOW + 3 + SLICE_NUM_HIGH + 1];
 	int	i;
 
 	if (!_slice_debug)
@@ -60,8 +55,12 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
 	*(p++) = ' ';
 	*(p++) = '-';
 	*(p++) = ' ';
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		*(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		if (test_bit(i, mask.high_slices))
+			*(p++) = '1';
+		else
+			*(p++) = '0';
+	}
 	*(p++) = 0;
 
 	printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -80,7 +79,10 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 					     unsigned long len)
 {
 	unsigned long end = start + len - 1;
-	struct slice_mask ret = { 0, 0 };
+	struct slice_mask ret;
+
+	ret.low_slices = 0;
+	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
 
 	if (start < SLICE_LOW_TOP) {
 		unsigned long mend = min(end, SLICE_LOW_TOP);
@@ -91,9 +93,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 	}
 
 	if ((start + len) > SLICE_LOW_TOP)
-		ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
-			- (1ul << GET_HIGH_SLICE_INDEX(start));
-
+		bitmap_set(ret.high_slices, GET_HIGH_SLICE_INDEX(start),
+			   GET_HIGH_SLICE_INDEX(len));
 	return ret;
 }
 
@@ -130,9 +131,12 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 
 static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
 {
-	struct slice_mask ret = { 0, 0 };
+	struct slice_mask ret;
 	unsigned long i;
 
+	ret.low_slices = 0;
+	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (!slice_low_has_vma(mm, i))
 			ret.low_slices |= 1u << i;
@@ -142,7 +146,7 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
 
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
 		if (!slice_high_has_vma(mm, i))
-			ret.high_slices |= 1ul << i;
+			__set_bit(i, ret.high_slices);
 
 	return ret;
 }
@@ -151,10 +155,13 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
-	struct slice_mask ret = { 0, 0 };
+	struct slice_mask ret;
 	unsigned long i;
 	u64 lpsizes;
 
+	ret.low_slices = 0;
+	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (((lpsizes >> (i * 4)) & 0xf) == psize)
@@ -165,7 +172,7 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-			ret.high_slices |= 1ul << i;
+			__set_bit(i, ret.high_slices);
 	}
 
 	return ret;
@@ -173,8 +180,13 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 
 static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
 {
+	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+
+	bitmap_and(result, mask.high_slices,
+		   available.high_slices, SLICE_NUM_HIGH);
+
 	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		(mask.high_slices & available.high_slices) == mask.high_slices;
+		bitmap_equal(result, mask.high_slices, SLICE_NUM_HIGH);
 }
 
 static void slice_flush_segments(void *parm)
@@ -221,7 +233,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 	for (i = 0; i < SLICE_NUM_HIGH; i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
-		if (mask.high_slices & (1ul << i))
+		if (test_bit(i, mask.high_slices))
 			hpsizes[index] = (hpsizes[index] &
 					  ~(0xf << (mask_index * 4))) |
 				(((unsigned long)psize) << (mask_index * 4));
@@ -257,7 +269,7 @@ static bool slice_scan_available(unsigned long addr,
 		slice = GET_HIGH_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) ?
 			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-		return !!(available.high_slices & (1ul << slice));
+		return !!test_bit(slice, available.high_slices);
 	}
 }
 
@@ -364,15 +376,24 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 		return slice_find_area_bottomup(mm, len, mask, psize);
 }
 
-#define or_mask(dst, src)	do {			\
-	(dst).low_slices |= (src).low_slices;		\
-	(dst).high_slices |= (src).high_slices;		\
-} while (0)
+static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+{
+	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
 
-#define andnot_mask(dst, src)	do {			\
-	(dst).low_slices &= ~(src).low_slices;		\
-	(dst).high_slices &= ~(src).high_slices;	\
-} while (0)
+	dst->low_slices |= src->low_slices;
+	bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+	bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+}
+
+static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+{
+	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+
+	dst->low_slices &= ~src->low_slices;
+
+	bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+	bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+}
 
 #ifdef CONFIG_PPC_64K_PAGES
 #define MMU_PAGE_BASE	MMU_PAGE_64K
@@ -384,15 +405,28 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 				      unsigned long flags, unsigned int psize,
 				      int topdown)
 {
-	struct slice_mask mask = {0, 0};
+	struct slice_mask mask;
 	struct slice_mask good_mask;
-	struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
-	struct slice_mask compat_mask = {0, 0};
+	struct slice_mask potential_mask;
+	struct slice_mask compat_mask;
 	int fixed = (flags & MAP_FIXED);
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	struct mm_struct *mm = current->mm;
 	unsigned long newaddr;
 
+	/*
+	 * init different masks
+	 */
+	mask.low_slices = 0;
+	bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
+
+	/* silence stupid warning */;
+	potential_mask.low_slices = 0;
+	bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
+
+	compat_mask.low_slices = 0;
+	bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
+
 	/* Sanity checks */
 	BUG_ON(mm->task_size == 0);
 	VM_BUG_ON(radix_enabled());
@@ -450,7 +484,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (psize == MMU_PAGE_64K) {
 		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
 		if (fixed)
-			or_mask(good_mask, compat_mask);
+			slice_or_mask(&good_mask, &compat_mask);
 	}
 #endif
 
@@ -485,7 +519,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * empty and thus can be converted
 	 */
 	potential_mask = slice_mask_for_free(mm);
-	or_mask(potential_mask, good_mask);
+	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
 	if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
@@ -518,7 +552,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
-		or_mask(potential_mask, compat_mask);
+		slice_or_mask(&potential_mask, &compat_mask);
 		addr = slice_find_area(mm, len, potential_mask, psize,
 				       topdown);
 	}
@@ -532,8 +566,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	slice_print_mask(" mask", mask);
 
  convert:
-	andnot_mask(mask, good_mask);
-	andnot_mask(mask, compat_mask);
+	slice_andnot_mask(&mask, &good_mask);
+	slice_andnot_mask(&mask, &compat_mask);
 	if (mask.low_slices || mask.high_slices) {
 		slice_convert(mm, mask, psize);
 		if (psize > MMU_PAGE_BASE)
@@ -701,7 +735,7 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 	if (psize == MMU_PAGE_64K) {
 		struct slice_mask compat_mask;
 		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
-		or_mask(available, compat_mask);
+		slice_or_mask(&available, &compat_mask);
 	}
 #endif
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 2/5] powerpc/mm/slice: Update the function prototype
  2017-02-07  3:48 [RFC PATCH 0/5] Add support for larger address space for ppc64 Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Aneesh Kumar K.V
@ 2017-02-07  3:48 ` Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 3/5] powerpc/mm/hash: Move kernel context to the starting of context range Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  3:48 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This avoid copying the slice_struct as function return value

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/slice.c | 63 +++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 84dfb2b58870..0ec750adbe7f 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -75,27 +75,26 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
 
 #endif
 
-static struct slice_mask slice_range_to_mask(unsigned long start,
-					     unsigned long len)
+static void slice_range_to_mask(unsigned long start, unsigned long len,
+				struct slice_mask *ret)
 {
 	unsigned long end = start + len - 1;
-	struct slice_mask ret;
 
-	ret.low_slices = 0;
-	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+	ret->low_slices = 0;
+	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	if (start < SLICE_LOW_TOP) {
 		unsigned long mend = min(end, SLICE_LOW_TOP);
 		unsigned long mstart = min(start, SLICE_LOW_TOP);
 
-		ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+		ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
 			- (1u << GET_LOW_SLICE_INDEX(mstart));
 	}
 
 	if ((start + len) > SLICE_LOW_TOP)
-		bitmap_set(ret.high_slices, GET_HIGH_SLICE_INDEX(start),
+		bitmap_set(ret->high_slices, GET_HIGH_SLICE_INDEX(start),
 			   GET_HIGH_SLICE_INDEX(len));
-	return ret;
+	return;
 }
 
 static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
@@ -129,53 +128,47 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 	return !slice_area_is_free(mm, start, end - start);
 }
 
-static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
 {
-	struct slice_mask ret;
 	unsigned long i;
 
-	ret.low_slices = 0;
-	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+	ret->low_slices = 0;
+	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (!slice_low_has_vma(mm, i))
-			ret.low_slices |= 1u << i;
+			ret->low_slices |= 1u << i;
 
 	if (mm->task_size <= SLICE_LOW_TOP)
-		return ret;
+		return;
 
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
 		if (!slice_high_has_vma(mm, i))
-			__set_bit(i, ret.high_slices);
-
-	return ret;
+			__set_bit(i, ret->high_slices);
 }
 
-static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
-	struct slice_mask ret;
 	unsigned long i;
 	u64 lpsizes;
 
-	ret.low_slices = 0;
-	bitmap_zero(ret.high_slices, SLICE_NUM_HIGH);
+	ret->low_slices = 0;
+	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (((lpsizes >> (i * 4)) & 0xf) == psize)
-			ret.low_slices |= 1u << i;
+			ret->low_slices |= 1u << i;
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < SLICE_NUM_HIGH; i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-			__set_bit(i, ret.high_slices);
+			__set_bit(i, ret->high_slices);
 	}
-
-	return ret;
 }
 
 static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
@@ -457,7 +450,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First make up a "good" mask of slices that have the right size
 	 * already
 	 */
-	good_mask = slice_mask_for_size(mm, psize);
+	slice_mask_for_size(mm, psize, &good_mask);
 	slice_print_mask(" good_mask", good_mask);
 
 	/*
@@ -482,7 +475,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	/* If we support combo pages, we can allow 64k pages in 4k slices */
 	if (psize == MMU_PAGE_64K) {
-		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
 		if (fixed)
 			slice_or_mask(&good_mask, &compat_mask);
 	}
@@ -491,7 +484,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First check hint if it's valid or if we have MAP_FIXED */
 	if (addr != 0 || fixed) {
 		/* Build a mask for the requested range */
-		mask = slice_range_to_mask(addr, len);
+		slice_range_to_mask(addr, len, &mask);
 		slice_print_mask(" mask", mask);
 
 		/* Check if we fit in the good mask. If we do, we just return,
@@ -518,7 +511,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* We don't fit in the good mask, check what other slices are
 	 * empty and thus can be converted
 	 */
-	potential_mask = slice_mask_for_free(mm);
+	slice_mask_for_free(mm, &potential_mask);
 	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
@@ -561,7 +554,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (addr == -ENOMEM)
 		return -ENOMEM;
 
-	mask = slice_range_to_mask(addr, len);
+	slice_range_to_mask(addr, len, &mask);
 	slice_dbg(" found potential area at 0x%lx\n", addr);
 	slice_print_mask(" mask", mask);
 
@@ -693,9 +686,11 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 			   unsigned long len, unsigned int psize)
 {
-	struct slice_mask mask = slice_range_to_mask(start, len);
+	struct slice_mask mask;
 
 	VM_BUG_ON(radix_enabled());
+
+	slice_range_to_mask(start, len, &mask);
 	slice_convert(mm, mask, psize);
 }
 
@@ -728,13 +723,13 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 	if (radix_enabled())
 		return 0;
 
-	mask = slice_range_to_mask(addr, len);
-	available = slice_mask_for_size(mm, psize);
+	slice_range_to_mask(addr, len, &mask);
+	slice_mask_for_size(mm, psize, &available);
 #ifdef CONFIG_PPC_64K_PAGES
 	/* We need to account for 4k slices too */
 	if (psize == MMU_PAGE_64K) {
 		struct slice_mask compat_mask;
-		compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
 		slice_or_mask(&available, &compat_mask);
 	}
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 3/5] powerpc/mm/hash: Move kernel context to the starting of context range
  2017-02-07  3:48 [RFC PATCH 0/5] Add support for larger address space for ppc64 Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 2/5] powerpc/mm/slice: Update the function prototype Aneesh Kumar K.V
@ 2017-02-07  3:48 ` Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 4/5] powerpc/mm/hash: Support 68 bit VA Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB Aneesh Kumar K.V
  4 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  3:48 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This enables us to limit the max context based on platforms.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 39 ++++++-------
 arch/powerpc/include/asm/mmu_context.h        |  2 -
 arch/powerpc/kvm/book3s_64_mmu_host.c         |  2 +-
 arch/powerpc/mm/hash_utils_64.c               |  5 --
 arch/powerpc/mm/mmu_context_book3s64.c        | 79 +++++++++++++++++----------
 arch/powerpc/mm/slb_low.S                     | 20 ++-----
 6 files changed, 73 insertions(+), 74 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 0735d5a8049f..014a9bb197cd 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -493,10 +493,10 @@ extern void slb_set_size(u16 size);
  * For user processes max context id is limited to ((1ul << 19) - 5)
  * for kernel space, we use the top 4 context ids to map address as below
  * NOTE: each context only support 64TB now.
- * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ * 0x00000 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x00001 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x00002 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x00003 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
  *
  * The proto-VSIDs are then scrambled into real VSIDs with the
  * multiplicative hash:
@@ -510,15 +510,9 @@ extern void slb_set_size(u16 size);
  * robust scattering in the hash table (at least based on some initial
  * results).
  *
- * We also consider VSID 0 special. We use VSID 0 for slb entries mapping
- * bad address. This enables us to consolidate bad address handling in
- * hash_page.
- *
  * We also need to avoid the last segment of the last context, because that
  * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
- * because of the modulo operation in vsid scramble. But the vmemmap
- * (which is what uses region 0xf) will never be close to 64TB in size
- * (it's 56 bytes per page of system memory).
+ * because of the modulo operation in vsid scramble.
  */
 
 #define CONTEXT_BITS		19
@@ -530,12 +524,15 @@ extern void slb_set_size(u16 size);
 /*
  * 256MB segment
  * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
- * available for user + kernel mapping. The top 4 contexts are used for
+ * available for user + kernel mapping. The bottom 4 contexts are used for
  * kernel mapping. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
- * (19 == 37 + 28 - 46).
+ * context maps 2^46 bytes (64TB).
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
  */
-#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 5)
+#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 2)
 
 /*
  * This should be computed such that protovosid * vsid_mulitplier
@@ -671,19 +668,19 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
  * This is only valid for addresses >= PAGE_OFFSET
  *
  * For kernel space, we use the top 4 context ids to map address as below
- * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ * 0x00000 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x00001 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x00002 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x00003 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
  */
 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
 	unsigned long context;
 
 	/*
-	 * kernel take the top 4 context from the available range
+	 * kernel take the first 4 context from the available range
 	 */
-	context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
+	context = (ea >> 60) - 0xc;
 	return get_vsid(context, ea, ssize);
 }
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b9e3f0aca261..e6f6d2956227 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -51,13 +51,11 @@ static inline void switch_mmu_context(struct mm_struct *prev,
 	return switch_slb(tsk, next);
 }
 
-extern int __init_new_context(void);
 extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
 #else
 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 			       struct task_struct *tsk);
-extern unsigned long __init_new_context(void);
 extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
 #endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index a587e8f4fd26..fddb0e7dc7b8 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -390,7 +390,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int err;
 
-	err = __init_new_context();
+	err = hash__init_new_context();
 	if (err < 0)
 		return -1;
 	vcpu3s->context_id[0] = err;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67e19a0821be..978314b6b8d7 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -255,11 +255,6 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
-		/*
-		 * If we hit a bad address return error.
-		 */
-		if (!vsid)
-			return -1;
 		/* Make kernel text executable */
 		if (overlaps_kernel_text(vaddr, vaddr + step))
 			tprot &= ~HPTE_R_N;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 73bf6e14c3aa..fa3237b8e00f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -30,17 +30,19 @@
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
-int __init_new_context(void)
+static int hash__init_new_context(struct mm_struct *mm)
 {
-	int index;
-	int err;
+	int index, err;
 
 again:
 	if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
 		return -ENOMEM;
 
 	spin_lock(&mmu_context_lock);
-	err = ida_get_new_above(&mmu_context_ida, 1, &index);
+	/*
+	 * 0 - 3 context is taken by kernel.
+	 */
+	err = ida_get_new_above(&mmu_context_ida, 4, &index);
 	spin_unlock(&mmu_context_lock);
 
 	if (err == -EAGAIN)
@@ -54,51 +56,68 @@ int __init_new_context(void)
 		spin_unlock(&mmu_context_lock);
 		return -ENOMEM;
 	}
-
+	/* The old code would re-promote on fork, we don't do that
+	 * when using slices as it could cause problem promoting slices
+	 * that have been forced down to 4K
+	 *
+	 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+	 * explicitly against context.id == 0. This ensures that we
+	 * properly initialize context slice details for newly allocated
+	 * mm's (which will have id == 0) and don't alter context slice
+	 * inherited via fork (which will have id != 0).
+	 *
+	 * We should not be calling init_new_context() on init_mm. Hence a
+	 * check against 0 is ok.
+	 */
+	if (mm->context.id == 0)
+		slice_set_user_psize(mm, mmu_virtual_psize);
+	subpage_prot_init_new_context(mm);
 	return index;
 }
-EXPORT_SYMBOL_GPL(__init_new_context);
-static int radix__init_new_context(struct mm_struct *mm, int index)
+
+static int radix__init_new_context(struct mm_struct *mm)
 {
+	int index, err;
 	unsigned long rts_field;
 
+again:
+	if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&mmu_context_lock);
+	err = ida_get_new_above(&mmu_context_ida, 1, &index);
+	spin_unlock(&mmu_context_lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > ((1UL << PRTB_SIZE_SHIFT) - 1)) {
+		spin_lock(&mmu_context_lock);
+		ida_remove(&mmu_context_ida, index);
+		spin_unlock(&mmu_context_lock);
+		return -ENOMEM;
+	}
 	/*
 	 * set the process table entry,
 	 */
 	rts_field = radix__get_tree_size();
 	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
-	return 0;
+	return index;
 }
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int index;
 
-	index = __init_new_context();
+	if (radix_enabled())
+		index = radix__init_new_context(mm);
+	else
+		index = hash__init_new_context(mm);
 	if (index < 0)
 		return index;
 
-	if (radix_enabled()) {
-		radix__init_new_context(mm, index);
-	} else {
-
-		/* The old code would re-promote on fork, we don't do that
-		 * when using slices as it could cause problem promoting slices
-		 * that have been forced down to 4K
-		 *
-		 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
-		 * explicitly against context.id == 0. This ensures that we
-		 * properly initialize context slice details for newly allocated
-		 * mm's (which will have id == 0) and don't alter context slice
-		 * inherited via fork (which will have id != 0).
-		 *
-		 * We should not be calling init_new_context() on init_mm. Hence a
-		 * check against 0 is ok.
-		 */
-		if (mm->context.id == 0)
-			slice_set_user_psize(mm, mmu_virtual_psize);
-		subpage_prot_init_new_context(mm);
-	}
 	mm->context.id = index;
 #ifdef CONFIG_PPC_ICSWX
 	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index e2974fcd20f1..4ce050ea4200 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -45,13 +45,6 @@ _GLOBAL(slb_allocate_realmode)
 	/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
 	blt	cr7,0f			/* user or kernel? */
 
-	/* kernel address: proto-VSID = ESID */
-	/* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
-	 * this code will generate the protoVSID 0xfffffffff for the
-	 * top segment.  That's ok, the scramble below will translate
-	 * it to VSID 0, which is reserved as a bad VSID - one which
-	 * will never have any pages in it.  */
-
 	/* Check if hitting the linear mapping or some other kernel space
 	*/
 	bne	cr7,1f
@@ -63,12 +56,10 @@ _GLOBAL(slb_allocate_realmode)
 slb_miss_kernel_load_linear:
 	li	r11,0
 	/*
-	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * context = (ea >> 60) - 0xc
 	 * r9 = region id.
 	 */
-	addis	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
-	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
-
+	subi	r9,r9,0xc
 
 BEGIN_FTR_SECTION
 	b	slb_finish_load
@@ -77,9 +68,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 
 1:
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-	/* Check virtual memmap region. To be patches at kernel boot */
 	cmpldi	cr0,r9,0xf
 	bne	1f
+/* Check virtual memmap region. To be patched at kernel boot */
 .globl slb_miss_kernel_load_vmemmap
 slb_miss_kernel_load_vmemmap:
 	li	r11,0
@@ -102,11 +93,10 @@ slb_miss_kernel_load_io:
 	li	r11,0
 6:
 	/*
-	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * context = (ea >> 60) - 0xc
 	 * r9 = region id.
 	 */
-	addis	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
-	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+	subi	r9,r9,0xc
 
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 4/5] powerpc/mm/hash: Support 68 bit VA
  2017-02-07  3:48 [RFC PATCH 0/5] Add support for larger address space for ppc64 Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2017-02-07  3:48 ` [RFC PATCH 3/5] powerpc/mm/hash: Move kernel context to the starting of context range Aneesh Kumar K.V
@ 2017-02-07  3:48 ` Aneesh Kumar K.V
  2017-02-07  3:48 ` [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB Aneesh Kumar K.V
  4 siblings, 0 replies; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  3:48 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We still do 19 bit context. for p4 and p5 we do a 65 bit VA

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 125 ++++++++++++++++----------
 arch/powerpc/include/asm/mmu.h                |  19 ++--
 arch/powerpc/mm/mmu_context_book3s64.c        |   8 +-
 arch/powerpc/mm/slb_low.S                     |  51 +++++++++--
 4 files changed, 142 insertions(+), 61 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 014a9bb197cd..bcbb5ccc4293 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -39,6 +39,7 @@
 
 /* Bits in the SLB VSID word */
 #define SLB_VSID_SHIFT		12
+#define SLB_VSID_SHIFT_256M	12
 #define SLB_VSID_SHIFT_1T	24
 #define SLB_VSID_SSIZE_SHIFT	62
 #define SLB_VSID_B		ASM_CONST(0xc000000000000000)
@@ -515,9 +516,19 @@ extern void slb_set_size(u16 size);
  * because of the modulo operation in vsid scramble.
  */
 
+/*
+ * Max Va bits we support as of now is 68 bits. We want 19 bit
+ * context ID.
+ * Restrictions:
+ * GPU has restrictions of not able to access beyond 128TB
+ * (47 bit effective address). We also cannot do more than 20bit PID.
+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
+ */
+#define VA_BITS			68
 #define CONTEXT_BITS		19
-#define ESID_BITS		18
-#define ESID_BITS_1T		6
+#define ESID_BITS		(VA_BITS -(SID_SHIFT + CONTEXT_BITS))
+#define ESID_BITS_1T		(VA_BITS -(SID_SHIFT_1T + CONTEXT_BITS))
 
 #define ESID_BITS_MASK		((1 << ESID_BITS) - 1)
 #define ESID_BITS_1T_MASK	((1 << ESID_BITS_1T) - 1)
@@ -526,62 +537,56 @@ extern void slb_set_size(u16 size);
  * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
  * available for user + kernel mapping. The bottom 4 contexts are used for
  * kernel mapping. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB).
+ * context maps 2^49 bytes (512TB).
  *
  * We also need to avoid the last segment of the last context, because that
  * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
  * because of the modulo operation in vsid scramble.
  */
 #define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 2)
+/*
+ * For platforms that support on 65bit VA we limit the context bits
+ */
+#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
 
 /*
  * This should be computed such that protovosid * vsid_mulitplier
  * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
+ * We also need to make sure that number of bits in divisor is less
+ * than twice the number of protovsid bits for our modulus optmization to work.
+ * The below table shows the current values used.
+ *
+ * 64TB Config:
+ * |-------+------------+----------------+------------+--------------|
+ * |       | Prime Bits | VSID_BITS_65VA | Total Bits | 2* VSID_BITS |
+ * |-------+------------+----------------+------------+--------------|
+ * | 1T    |         24 |             25 |         49 |           50 |
+ * |-------+------------+----------------+------------+--------------|
+ * | 256MB |         24 |             37 |         61 |           74 |
+ * |-------+------------+----------------+------------+--------------|
+ *
+ * 512TB Config:
+ * |-------+------------+----------------+------------+--------------|
+ * |       | Prime Bits | VSID_BITS_69VA | Total Bits | 2* VSID_BITS |
+ * |-------+------------+----------------+------------+--------------|
+ * | 1T    |         24 |             28 |         52 |           56 |
+ * |-------+------------+----------------+------------+--------------|
+ * | 256MB |         24 |             40 |         64 |           80 |
+ * |-------+------------+----------------+------------+--------------|
+ *
  */
 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_256M		(CONTEXT_BITS + ESID_BITS)
+#define VSID_BITS_256M		(VA_BITS - SID_SHIFT)
 #define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
+#define VSID_BITS_65_256M	(65 - SID_SHIFT)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		(CONTEXT_BITS + ESID_BITS_1T)
+#define VSID_BITS_1T		(VA_BITS - SID_SHIFT_1T)
 #define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
-
+#define VSID_BITS_65_1T		(65 - SID_SHIFT_1T)
 
 #define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
 
-/*
- * This macro generates asm code to compute the VSID scramble
- * function.  Used in slb_allocate() and do_stab_bolted.  The function
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
- *
- *	rt = register containing the proto-VSID and into which the
- *		VSID will be stored
- *	rx = scratch register (clobbered)
- *
- * 	- rt and rx must be different registers
- * 	- The answer will end up in the low VSID_BITS bits of rt.  The higher
- * 	  bits may contain other garbage, so you may need to mask the
- * 	  result.
- */
-#define ASM_VSID_SCRAMBLE(rt, rx, size)					\
-	lis	rx,VSID_MULTIPLIER_##size@h;				\
-	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
-	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
-									\
-	srdi	rx,rt,VSID_BITS_##size;					\
-	clrldi	rt,rt,(64-VSID_BITS_##size);				\
-	add	rt,rt,rx;		/* add high and low bits */	\
-	/* NOTE: explanation based on VSID_BITS_##size = 36		\
-	 * Now, r3 == VSID (mod 2^36-1), and lies between 0 and		\
-	 * 2^36-1+2^28-1.  That in particular means that if r3 >=	\
-	 * 2^36-1, then r3+1 has the 2^36 bit set.  So, if r3+1 has	\
-	 * the bit clear, r3 already has the answer we want, if it	\
-	 * doesn't, the answer is the low 36 bits of r3+1.  So in all	\
-	 * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
-	addi	rx,rt,1;						\
-	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
-	add	rt,rt,rx
-
 /* 4 bits per slice and we have one slice per 1TB */
 #define SLICE_ARRAY_SIZE  (H_PGTABLE_RANGE >> 41)
 
@@ -629,7 +634,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #define vsid_scramble(protovsid, size) \
 	((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
 
-#else /* 1 */
+/* simplified form avoiding mod operation */
 #define vsid_scramble(protovsid, size) \
 	({								 \
 		unsigned long x;					 \
@@ -637,6 +642,21 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 		x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
 		(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
 	})
+
+#else /* 1 */
+static inline unsigned long vsid_scramble(unsigned long protovsid,
+				  unsigned long vsid_multiplier, int vsid_bits)
+{
+	unsigned long vsid;
+	unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
+	/*
+	 * We have same multipler for both 256 and 1T segements now
+	 */
+	vsid = protovsid * vsid_multiplier;
+	vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
+	return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
+}
+
 #endif /* 1 */
 
 /* Returns the segment size indicator for a user address */
@@ -651,17 +671,32 @@ static inline int user_segment_size(unsigned long addr)
 static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 				     int ssize)
 {
+	unsigned long va_bits = 65;
+	unsigned long vsid_bits;
+	unsigned long protovsid;
+
 	/*
 	 * Bad address. We return VSID 0 for that
 	 */
 	if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
 		return 0;
 
-	if (ssize == MMU_SEGSIZE_256M)
-		return vsid_scramble((context << ESID_BITS)
-				     | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M);
-	return vsid_scramble((context << ESID_BITS_1T)
-			     | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T);
+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+		va_bits = 68;
+
+	if (ssize == MMU_SEGSIZE_256M) {
+		vsid_bits = va_bits - SID_SHIFT;
+		protovsid = (context << ESID_BITS) |
+			((ea >> SID_SHIFT) & ESID_BITS_MASK);
+		return vsid_scramble(protovsid,
+				     VSID_MULTIPLIER_256M, vsid_bits);
+	}
+	/* 1T segment */
+	vsid_bits = va_bits - SID_SHIFT_1T;
+	protovsid = (context << ESID_BITS_1T) |
+		((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
+	return vsid_scramble(protovsid,
+			     VSID_MULTIPLIER_1T, vsid_bits);
 }
 
 /*
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e5616bf83623..be40591ce3bd 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -29,6 +29,10 @@
  */
 
 /*
+ * Support for 68 bit VA space. We added that from ISA 2.05
+ */
+#define MMU_FTR_68_BIT_VA		ASM_CONST(0x00002000)
+/*
  * Kernel read only support.
  * We added the ppp value 0b110 in ISA 2.04.
  */
@@ -109,10 +113,10 @@
 #define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
 #define MMU_FTRS_PPC970		MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
 #define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER8		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER9		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER6		MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER8		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER9		MMU_FTRS_POWER6
 #define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
 				MMU_FTR_CI_LARGE_PAGE
 #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -136,7 +140,7 @@ enum {
 		MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
 		MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
 		MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
-		MMU_FTR_KERNEL_RO |
+		MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
 #ifdef CONFIG_PPC_RADIX_MMU
 		MMU_FTR_TYPE_RADIX |
 #endif
@@ -288,7 +292,10 @@ static inline bool early_radix_enabled(void)
 #define MMU_PAGE_16G	14
 #define MMU_PAGE_64G	15
 
-/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
+/*
+ * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
+ * Also we need to change he type of mm_context.low/high_slices_psize.
+ */
 #define MMU_PAGE_COUNT	16
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index fa3237b8e00f..626cc75824ed 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -33,6 +33,12 @@ static DEFINE_IDA(mmu_context_ida);
 static int hash__init_new_context(struct mm_struct *mm)
 {
 	int index, err;
+	unsigned long max_user_context;
+
+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+		max_user_context = MAX_USER_CONTEXT;
+	else
+		max_user_context = MAX_USER_CONTEXT_65BIT_VA;
 
 again:
 	if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
@@ -50,7 +56,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 	else if (err)
 		return err;
 
-	if (index > MAX_USER_CONTEXT) {
+	if (index > max_user_context) {
 		spin_lock(&mmu_context_lock);
 		ida_remove(&mmu_context_ida, index);
 		spin_unlock(&mmu_context_lock);
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 4ce050ea4200..10fac99cd036 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -23,6 +23,45 @@
 #include <asm/pgtable.h>
 #include <asm/firmware.h>
 
+/*
+ * This macro generates asm code to compute the VSID scramble
+ * function.  Used in slb_allocate() and do_stab_bolted.  The function
+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ *	rt = register containing the proto-VSID and into which the
+ *		VSID will be stored
+ *	rx = scratch register (clobbered)
+ *	rf = flags
+ *
+ *	- rt and rx must be different registers
+ *	- The answer will end up in the low VSID_BITS bits of rt.  The higher
+ *	  bits may contain other garbage, so you may need to mask the
+ *	  result.
+ */
+#define ASM_VSID_SCRAMBLE(rt, rx, rf, size)				\
+	lis	rx,VSID_MULTIPLIER_##size@h;				\
+	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
+	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
+									\
+BEGIN_MMU_FTR_SECTION							\
+	srdi	rx,rt,VSID_BITS_##size;					\
+	clrldi	rt,rt,(64-VSID_BITS_##size);				\
+	add	rt,rt,rx;		/* add high and low bits */	\
+	addi	rx,rt,1;						\
+	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
+	add	rt,rt,rx;						\
+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
+MMU_FTR_SECTION_ELSE							\
+	srdi	rx,rt,VSID_BITS_65_##size;				\
+	clrldi	rt,rt,(64-VSID_BITS_65_##size);				\
+	add	rt,rt,rx;						\
+	addi	rx,rt,1;						\
+	srdi	rx,rx,VSID_BITS_65_##size;				\
+	add	rt,rt,rx;						\
+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_68_BIT_VA)
+
+
 /* void slb_allocate_realmode(unsigned long ea);
  *
  * Create an SLB entry for the given EA (user or kernel).
@@ -179,13 +218,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
  */
 slb_finish_load:
 	rldimi  r10,r9,ESID_BITS,0
-	ASM_VSID_SCRAMBLE(r10,r9,256M)
-	/*
-	 * bits above VSID_BITS_256M need to be ignored from r10
-	 * also combine VSID and flags
-	 */
-	rldimi	r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
-
+	ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
 	/* r3 = EA, r11 = VSID data */
 	/*
 	 * Find a slot, round robin. Previously we tried to find a
@@ -249,12 +282,12 @@ slb_compare_rr_to_size:
 slb_finish_load_1T:
 	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
 	rldimi  r10,r9,ESID_BITS_1T,0
-	ASM_VSID_SCRAMBLE(r10,r9,1T)
+	ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
 	/*
 	 * bits above VSID_BITS_1T need to be ignored from r10
 	 * also combine VSID and flags
 	 */
-	rldimi	r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
+
 	li	r10,MMU_SEGSIZE_1T
 	rldimi	r11,r10,SLB_VSID_SSIZE_SHIFT,0	/* insert segment size */
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB
  2017-02-07  3:48 [RFC PATCH 0/5] Add support for larger address space for ppc64 Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2017-02-07  3:48 ` [RFC PATCH 4/5] powerpc/mm/hash: Support 68 bit VA Aneesh Kumar K.V
@ 2017-02-07  3:48 ` Aneesh Kumar K.V
  2017-02-07  4:06   ` Benjamin Herrenschmidt
  4 siblings, 1 reply; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  3:48 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Not-Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  2 +-
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
 arch/powerpc/include/asm/page_64.h            |  2 +-
 arch/powerpc/include/asm/processor.h          | 12 +++++++-----
 arch/powerpc/mm/slice.c                       |  2 ++
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 0c4e470571ca..5b5410ca2bd0 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -6,7 +6,7 @@
  * each entry by assuming that each entry is page aligned.
  */
 #define H_PTE_INDEX_SIZE  9
-#define H_PMD_INDEX_SIZE  7
+#define H_PMD_INDEX_SIZE  9
 #define H_PUD_INDEX_SIZE  9
 #define H_PGD_INDEX_SIZE  9
 
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f3dd21efa2ea..37810b0792a5 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -4,7 +4,7 @@
 #define H_PTE_INDEX_SIZE  8
 #define H_PMD_INDEX_SIZE  5
 #define H_PUD_INDEX_SIZE  5
-#define H_PGD_INDEX_SIZE  12
+#define H_PGD_INDEX_SIZE  14
 
 #define H_PAGE_COMBO	0x00001000 /* this is a combo 4k page */
 #define H_PAGE_4K_PFN	0x00002000 /* PFN is for a single 4k page */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 7f72659b7999..d85ac61e2394 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -107,7 +107,7 @@ extern u64 ppc64_pft_size;
  */
 struct slice_mask {
 	u16 low_slices;
-	DECLARE_BITMAP(high_slices, 64);
+	DECLARE_BITMAP(high_slices, 256);
 };
 
 struct mm_struct;
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 1ba814436c73..37e0f3124dee 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -102,11 +102,13 @@ void release_thread(struct task_struct *);
 #endif
 
 #ifdef CONFIG_PPC64
-/* 64-bit user address space is 46-bits (64TB user VM) */
-#define TASK_SIZE_USER64 (0x0000400000000000UL)
-
-/* 
- * 32-bit user address space is 4GB - 1 page 
+/*
+ * 64-bit user address space is 48-bits (256TB user VM).
+ * Derived out of PGTABLE_RANGE
+ */
+#define TASK_SIZE_USER64 (0x0001000000000000UL)
+/*
+ * 32-bit user address space is 4GB - 1 page
  * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
  */
 #define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0ec750adbe7f..f194afbaf423 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -407,6 +407,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	struct mm_struct *mm = current->mm;
 	unsigned long newaddr;
 
+	/* Make sure high_slices bitmap size is same as we expected */
+	BUILD_BUG_ON(256 != SLICE_NUM_HIGH);
 	/*
 	 * init different masks
 	 */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB
  2017-02-07  3:48 ` [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB Aneesh Kumar K.V
@ 2017-02-07  4:06   ` Benjamin Herrenschmidt
  2017-02-07  4:17     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 10+ messages in thread
From: Benjamin Herrenschmidt @ 2017-02-07  4:06 UTC (permalink / raw)
  To: Aneesh Kumar K.V, paulus, mpe; +Cc: linuxppc-dev

That's 48 bits. I would keep the limit at 47 without some explicit
opt-in by applications. That's what users get on x86 and we know
some GPUs have limits there.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB
  2017-02-07  4:06   ` Benjamin Herrenschmidt
@ 2017-02-07  4:17     ` Aneesh Kumar K.V
  2017-02-07  4:46       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 10+ messages in thread
From: Aneesh Kumar K.V @ 2017-02-07  4:17 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, paulus, mpe; +Cc: linuxppc-dev

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> That's 48 bits. I would keep the limit at 47 without some explicit
> opt-in by applications. That's what users get on x86 and we know
> some GPUs have limits there.

The idea is to have linux personality values that will limit the
effective address to different memory range. By default max effective
address will be limited to 64TB. Applications can then extend this to
128TB or 512TB. So we will have two personality values PER_LINUX_128TB
and PER_LINUX_512TB ?

-aneesh

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB
  2017-02-07  4:17     ` Aneesh Kumar K.V
@ 2017-02-07  4:46       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 10+ messages in thread
From: Benjamin Herrenschmidt @ 2017-02-07  4:46 UTC (permalink / raw)
  To: Aneesh Kumar K.V, paulus, mpe; +Cc: linuxppc-dev

On Tue, 2017-02-07 at 09:47 +0530, Aneesh Kumar K.V wrote:
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> 
> > That's 48 bits. I would keep the limit at 47 without some explicit
> > opt-in by applications. That's what users get on x86 and we know
> > some GPUs have limits there.
> 
> The idea is to have linux personality values that will limit the
> effective address to different memory range. By default max effective
> address will be limited to 64TB. Applications can then extend this to
> 128TB or 512TB. So we will have two personality values PER_LINUX_128TB
> and PER_LINUX_512TB ?

No. I think we can go up to 47-bits without any personality
requirement. For above, a single personality indicating that the
application has no limit.

Check what Intel is doing for 5-level trees

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap
  2017-02-07  3:48 ` [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Aneesh Kumar K.V
@ 2017-02-08 10:30   ` Balbir Singh
  0 siblings, 0 replies; 10+ messages in thread
From: Balbir Singh @ 2017-02-08 10:30 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, linuxppc-dev

On Tue, Feb 07, 2017 at 09:18:49AM +0530, Aneesh Kumar K.V wrote:
> In followup patch we want to increase the va range which will result
> in us requiring high_slices to have more than 64 bits. To enable this
> convert high_slices to bitmap. We keep the number bits same in this patch
> and later change that to larger value
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/page_64.h |  15 +++---
>  arch/powerpc/mm/slice.c            | 106 ++++++++++++++++++++++++-------------
>  2 files changed, 76 insertions(+), 45 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
> index dd5f0712afa2..7f72659b7999 100644
> --- a/arch/powerpc/include/asm/page_64.h
> +++ b/arch/powerpc/include/asm/page_64.h
> @@ -98,19 +98,16 @@ extern u64 ppc64_pft_size;
>  #define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
>  #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
>  
> +#ifndef __ASSEMBLY__
>  struct slice_mask {
>  	u16 low_slices;

Can we move low_slices as well, although we don't need it
it'll just make the code consistent.

> -	u64 high_slices;
> +	DECLARE_BITMAP(high_slices, 64);
>  };
>  
>  
>  static void slice_print_mask(const char *label, struct slice_mask mask)
>  {
> -	char	*p, buf[16 + 3 + 64 + 1];
> +	char	*p, buf[SLICE_NUM_LOW + 3 + SLICE_NUM_HIGH + 1];
>  	int	i;
>  
>  	if (!_slice_debug)
> @@ -60,8 +55,12 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
>  	*(p++) = ' ';
>  	*(p++) = '-';
>  	*(p++) = ' ';
> -	for (i = 0; i < SLICE_NUM_HIGH; i++)
> -		*(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
> +	for (i = 0; i < SLICE_NUM_HIGH; i++) {
> +		if (test_bit(i, mask.high_slices))
> +			*(p++) = '1';
> +		else
> +			*(p++) = '0';
> +	}

Can we move to using %*pbl or bitmap_print_to_pagebuf

>  	*(p++) = 0;
>  
>

Balbir Singh. 

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-02-08 10:30 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-07  3:48 [RFC PATCH 0/5] Add support for larger address space for ppc64 Aneesh Kumar K.V
2017-02-07  3:48 ` [RFC PATCH 1/5] powerpc/mm/slice: Convert slice_mask high slice to a bitmap Aneesh Kumar K.V
2017-02-08 10:30   ` Balbir Singh
2017-02-07  3:48 ` [RFC PATCH 2/5] powerpc/mm/slice: Update the function prototype Aneesh Kumar K.V
2017-02-07  3:48 ` [RFC PATCH 3/5] powerpc/mm/hash: Move kernel context to the starting of context range Aneesh Kumar K.V
2017-02-07  3:48 ` [RFC PATCH 4/5] powerpc/mm/hash: Support 68 bit VA Aneesh Kumar K.V
2017-02-07  3:48 ` [RFC PATCH 5/5] powerpc/mm/hash: Increase VA range to 256TB Aneesh Kumar K.V
2017-02-07  4:06   ` Benjamin Herrenschmidt
2017-02-07  4:17     ` Aneesh Kumar K.V
2017-02-07  4:46       ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.