All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/6] Reduce memory usage for mm_context_t
@ 2019-04-17 13:03 Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 1/6] powerpc/mm/radix: Do slb preload only with hash translation mode Aneesh Kumar K.V
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

This patch moves hash translation related context variables to another structure
and runtime allocate that based on translation mode.

Aneesh Kumar K.V (6):
  powerpc/mm/radix: Do slb preload only with hash translation mode
  powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64
  powerpc/mm: Add helpers for accessing hash translation related
    variables
  powerpc/mm: Move slb_addr_linit to early_init_mmu
  powerpc/mm: Reduce memory usage for mm_context_t for radix
  powerc/mm/hash: Reduce hash_mm_context size

 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 37 +++++++-
 arch/powerpc/include/asm/book3s/64/mmu.h      | 92 +++++++++++++------
 arch/powerpc/include/asm/book3s/64/slice.h    | 13 ---
 arch/powerpc/include/asm/nohash/32/mmu-8xx.h  | 50 ++++++++++
 arch/powerpc/kernel/paca.c                    | 12 +--
 arch/powerpc/kernel/process.c                 |  3 +-
 arch/powerpc/kernel/setup-common.c            |  7 +-
 arch/powerpc/mm/hash_utils_64.c               | 17 +++-
 arch/powerpc/mm/mmu_context_book3s64.c        | 29 +++++-
 arch/powerpc/mm/slb.c                         |  2 +-
 arch/powerpc/mm/slice.c                       | 49 +++++-----
 arch/powerpc/mm/subpage-prot.c                | 34 +++++--
 arch/powerpc/mm/tlb_nohash.c                  |  6 ++
 13 files changed, 248 insertions(+), 103 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3 1/6] powerpc/mm/radix: Do slb preload only with hash translation mode
  2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
@ 2019-04-17 13:03 ` Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 2/6] powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64 Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

Add radix_enabled check to avoid slb preload with radix translation.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/kernel/process.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd9e0d5386ee..f7b2e3b3db28 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1729,7 +1729,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	unsigned long load_addr = regs->gpr[2];	/* saved by ELF_PLAT_INIT */
 
 #ifdef CONFIG_PPC_BOOK3S_64
-	preload_new_slb_context(start, sp);
+	if (!radix_enabled())
+		preload_new_slb_context(start, sp);
 #endif
 #endif
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 2/6] powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64
  2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 1/6] powerpc/mm/radix: Do slb preload only with hash translation mode Aneesh Kumar K.V
@ 2019-04-17 13:03 ` Aneesh Kumar K.V
  2019-04-21 14:19   ` [v3,2/6] " Michael Ellerman
  2019-04-17 13:03 ` [PATCH v3 3/6] powerpc/mm: Add helpers for accessing hash translation related variables Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

Book3s64 always have PPC_MM_SLICES enabled. So remove the unncessary #ifdef

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h   |  4 ----
 arch/powerpc/include/asm/book3s/64/slice.h | 13 -------------
 2 files changed, 17 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..484a8ff9b338 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -123,7 +123,6 @@ typedef struct {
 	/* NPU NMMU context */
 	struct npu_context *npu_context;
 
-#ifdef CONFIG_PPC_MM_SLICES
 	 /* SLB page size encodings*/
 	unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
 	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
@@ -136,9 +135,6 @@ typedef struct {
 	struct slice_mask mask_16m;
 	struct slice_mask mask_16g;
 # endif
-#else
-	u16 sllp;		/* SLB page size encoding */
-#endif
 	unsigned long vdso_base;
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
index db0dedab65ee..062e11136e9c 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
 #define _ASM_POWERPC_BOOK3S_64_SLICE_H
 
-#ifdef CONFIG_PPC_MM_SLICES
-
 #define SLICE_LOW_SHIFT		28
 #define SLICE_LOW_TOP		(0x100000000ul)
 #define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,15 +11,4 @@
 #define SLICE_NUM_HIGH		(H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
 
-#else /* CONFIG_PPC_MM_SLICES */
-
-#define get_slice_psize(mm, addr)	((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize)		\
-do {						\
-	(mm)->context.user_psize = (psize);	\
-	(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-
-#endif /* CONFIG_PPC_MM_SLICES */
-
 #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 3/6] powerpc/mm: Add helpers for accessing hash translation related variables
  2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 1/6] powerpc/mm/radix: Do slb preload only with hash translation mode Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 2/6] powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64 Aneesh Kumar K.V
@ 2019-04-17 13:03 ` Aneesh Kumar K.V
  2019-04-24 10:16   ` Christophe Leroy
  2019-04-17 13:03 ` [PATCH v3 4/6] powerpc/mm: Move slb_addr_linit to early_init_mmu Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

We want to switch to allocating them runtime only when hash translation is
enabled. Add helpers so that both book3s and nohash can be adapted to
upcoming change easily.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 +-
 arch/powerpc/include/asm/book3s/64/mmu.h      | 63 ++++++++++++++++++-
 arch/powerpc/include/asm/nohash/32/mmu-8xx.h  | 50 +++++++++++++++
 arch/powerpc/kernel/paca.c                    | 12 ++--
 arch/powerpc/mm/hash_utils_64.c               | 10 +--
 arch/powerpc/mm/slb.c                         |  2 +-
 arch/powerpc/mm/slice.c                       | 49 +++++++--------
 arch/powerpc/mm/subpage-prot.c                |  8 +--
 8 files changed, 154 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a28a28079edb..eb36fbfe4ef5 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -657,8 +657,8 @@ extern void slb_set_size(u16 size);
 
 /* 4 bits per slice and we have one slice per 1TB */
 #define SLICE_ARRAY_SIZE	(H_PGTABLE_RANGE >> 41)
-#define TASK_SLICE_ARRAY_SZ(x)	((x)->context.slb_addr_limit >> 41)
-
+#define LOW_SLICE_ARRAY_SZ	(BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x)	((x)->slb_addr_limit >> 41)
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_SUBPAGE_PROT
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 484a8ff9b338..28213a36fef7 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -124,7 +124,7 @@ typedef struct {
 	struct npu_context *npu_context;
 
 	 /* SLB page size encodings*/
-	unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
+	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
 	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
 	unsigned long slb_addr_limit;
 # ifdef CONFIG_PPC_64K_PAGES
@@ -159,6 +159,67 @@ typedef struct {
 #endif
 } mm_context_t;
 
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+	return ctx->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+	ctx->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+	return ctx->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+	return ctx->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+	return ctx->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+	ctx->slb_addr_limit = limit;
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx)
+{
+	return &ctx->mask_64k;
+}
+#endif
+
+static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx)
+{
+	return &ctx->mask_4k;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx)
+{
+	return &ctx->mask_16m;
+}
+
+static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx)
+{
+	return &ctx->mask_16g;
+}
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+	return &ctx->spt;
+}
+#endif
+
 /*
  * The current system page and segment sizes
  */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 0a1a3fc54e54..0f4b0b50e5ad 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -167,6 +167,7 @@
 #ifdef CONFIG_PPC_MM_SLICES
 #include <asm/nohash/32/slice.h>
 #define SLICE_ARRAY_SIZE	(1 << (32 - SLICE_LOW_SHIFT - 1))
+#define LOW_SLICE_ARRAY_SZ	SLICE_ARRAY_SIZE
 #endif
 
 #ifndef __ASSEMBLY__
@@ -193,6 +194,55 @@ typedef struct {
 	void *pte_frag;
 } mm_context_t;
 
+#ifdef CONFIG_PPC_MM_SLICES
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+	return ctx->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+	ctx->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+	return ctx->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+	return ctx->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+	return ctx->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+	ctx->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *mm_ctx_slice_mask_base(mm_context_t *ctx)
+{
+	return &ctx->mask_base_psize;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline struct slice_mask *mm_ctx_slice_mask_512k(mm_context_t *ctx)
+{
+	return &ctx->mask_512k;
+}
+
+static inline struct slice_mask *mm_ctx_slice_mask_8m(mm_context_t *ctx)
+{
+	return &ctx->mask_8m;
+}
+#endif
+#endif /* CONFIG_PPC_MM_SLICE */
+
 #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
 #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e7382abee868..9cc91d03ab62 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm)
 
 	get_paca()->mm_ctx_id = context->id;
 #ifdef CONFIG_PPC_MM_SLICES
-	VM_BUG_ON(!mm->context.slb_addr_limit);
-	get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
-	memcpy(&get_paca()->mm_ctx_low_slices_psize,
-	       &context->low_slices_psize, sizeof(context->low_slices_psize));
-	memcpy(&get_paca()->mm_ctx_high_slices_psize,
-	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+	VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+	get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
+	memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+	       LOW_SLICE_ARRAY_SZ);
+	memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+	       TASK_SLICE_ARRAY_SZ(context));
 #else /* CONFIG_PPC_MM_SLICES */
 	get_paca()->mm_ctx_user_psize = context->user_psize;
 	get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0a4f939a8161..5a2bd132f92e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1147,7 +1147,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
  */
 static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
-	struct subpage_prot_table *spt = &mm->context.spt;
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
 	u32 spp = 0;
 	u32 **sbpm, *sbpp;
 
@@ -1470,7 +1470,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
 	int psize = get_slice_psize(mm, ea);
 
 	/* We only prefault standard pages for now */
-	if (unlikely(psize != mm->context.user_psize))
+	if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
 		return false;
 
 	/*
@@ -1549,7 +1549,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 
 	/* Hash it in */
 #ifdef CONFIG_PPC_64K_PAGES
-	if (mm->context.user_psize == MMU_PAGE_64K)
+	if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
 		rc = __hash_page_64K(ea, access, vsid, ptep, trap,
 				     update_flags, ssize);
 	else
@@ -1562,8 +1562,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	 */
 	if (rc == -1)
 		hash_failure_debug(ea, access, vsid, trap, ssize,
-				   mm->context.user_psize,
-				   mm->context.user_psize,
+				   mm_ctx_user_psize(&mm->context),
+				   mm_ctx_user_psize(&mm->context),
 				   pte_val(*ptep));
 out_exit:
 	local_irq_restore(flags);
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5986df48359b..78c0c0a0e355 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -739,7 +739,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
 	 * consider this as bad access if we take a SLB miss
 	 * on an address above addr limit.
 	 */
-	if (ea >= mm->context.slb_addr_limit)
+	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
 		return -EFAULT;
 
 	context = get_user_context(&mm->context, ea);
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index aec91dbcdc0b..35b278082391 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
 {
 	struct vm_area_struct *vma;
 
-	if ((mm->context.slb_addr_limit - len) < addr)
+	if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
 		return 0;
 	vma = find_vma(mm, addr);
 	return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -155,15 +155,15 @@ static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
 {
 #ifdef CONFIG_PPC_64K_PAGES
 	if (psize == MMU_PAGE_64K)
-		return &mm->context.mask_64k;
+		return mm_ctx_slice_mask_64k(&mm->context);
 #endif
 	if (psize == MMU_PAGE_4K)
-		return &mm->context.mask_4k;
+		return mm_ctx_slice_mask_4k(&mm->context);
 #ifdef CONFIG_HUGETLB_PAGE
 	if (psize == MMU_PAGE_16M)
-		return &mm->context.mask_16m;
+		return mm_ctx_slice_mask_16m(&mm->context);
 	if (psize == MMU_PAGE_16G)
-		return &mm->context.mask_16g;
+		return mm_ctx_slice_mask_16g(&mm->context);
 #endif
 	BUG();
 }
@@ -253,7 +253,7 @@ static void slice_convert(struct mm_struct *mm,
 	 */
 	spin_lock_irqsave(&slice_convert_lock, flags);
 
-	lpsizes = mm->context.low_slices_psize;
+	lpsizes = mm_ctx_low_slices(&mm->context);
 	for (i = 0; i < SLICE_NUM_LOW; i++) {
 		if (!(mask->low_slices & (1u << i)))
 			continue;
@@ -272,8 +272,8 @@ static void slice_convert(struct mm_struct *mm,
 				(((unsigned long)psize) << (mask_index * 4));
 	}
 
-	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+	hpsizes = mm_ctx_high_slices(&mm->context);
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
 		if (!test_bit(i, mask->high_slices))
 			continue;
 
@@ -292,8 +292,8 @@ static void slice_convert(struct mm_struct *mm,
 	}
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
-		  (unsigned long)mm->context.low_slices_psize,
-		  (unsigned long)mm->context.high_slices_psize);
+		  (unsigned long)mm_ctx_low_slices(&mm->context),
+		  (unsigned long)mm_ctx_high_slices(&mm->context));
 
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
 
@@ -393,7 +393,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 	 * DEFAULT_MAP_WINDOW we should apply this.
 	 */
 	if (high_limit > DEFAULT_MAP_WINDOW)
-		addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
+		addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
 
 	while (addr > min_addr) {
 		info.high_limit = addr;
@@ -505,20 +505,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 			return -ENOMEM;
 	}
 
-	if (high_limit > mm->context.slb_addr_limit) {
+	if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
 		/*
 		 * Increasing the slb_addr_limit does not require
 		 * slice mask cache to be recalculated because it should
 		 * be already initialised beyond the old address limit.
 		 */
-		mm->context.slb_addr_limit = high_limit;
+		mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
 
 		on_each_cpu(slice_flush_segments, mm, 1);
 	}
 
 	/* Sanity checks */
 	BUG_ON(mm->task_size == 0);
-	BUG_ON(mm->context.slb_addr_limit == 0);
+	BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
 	VM_BUG_ON(radix_enabled());
 
 	slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
@@ -696,7 +696,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
 				     unsigned long flags)
 {
 	return slice_get_unmapped_area(addr, len, flags,
-				       current->mm->context.user_psize, 0);
+				       mm_ctx_user_psize(&current->mm->context), 0);
 }
 
 unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -706,7 +706,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 					     const unsigned long flags)
 {
 	return slice_get_unmapped_area(addr0, len, flags,
-				       current->mm->context.user_psize, 1);
+				       mm_ctx_user_psize(&current->mm->context), 1);
 }
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
@@ -717,10 +717,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 	VM_BUG_ON(radix_enabled());
 
 	if (slice_addr_is_low(addr)) {
-		psizes = mm->context.low_slices_psize;
+		psizes = mm_ctx_low_slices(&mm->context);
 		index = GET_LOW_SLICE_INDEX(addr);
 	} else {
-		psizes = mm->context.high_slices_psize;
+		psizes = mm_ctx_high_slices(&mm->context);
 		index = GET_HIGH_SLICE_INDEX(addr);
 	}
 	mask_index = index & 0x1;
@@ -742,20 +742,19 @@ void slice_init_new_context_exec(struct mm_struct *mm)
 	 * duplicated.
 	 */
 #ifdef CONFIG_PPC64
-	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+	mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW_USER64);
 #else
 	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
 #endif
-
-	mm->context.user_psize = psize;
+	mm_ctx_set_user_psize(&mm->context, psize);
 
 	/*
 	 * Set all slice psizes to the default.
 	 */
-	lpsizes = mm->context.low_slices_psize;
+	lpsizes = mm_ctx_low_slices(&mm->context);
 	memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
 
-	hpsizes = mm->context.high_slices_psize;
+	hpsizes = mm_ctx_high_slices(&mm->context);
 	memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
 
 	/*
@@ -777,7 +776,7 @@ void slice_setup_new_exec(void)
 	if (!is_32bit_task())
 		return;
 
-	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+	mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
 }
 #endif
 
@@ -816,7 +815,7 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len)
 {
 	const struct slice_mask *maskp;
-	unsigned int psize = mm->context.user_psize;
+	unsigned int psize = mm_ctx_user_psize(&mm->context);
 
 	VM_BUG_ON(radix_enabled());
 
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 5e4178790dee..c72252542210 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -25,7 +25,7 @@
  */
 void subpage_prot_free(struct mm_struct *mm)
 {
-	struct subpage_prot_table *spt = &mm->context.spt;
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
 	unsigned long i, j, addr;
 	u32 **p;
 
@@ -52,7 +52,7 @@ void subpage_prot_free(struct mm_struct *mm)
 
 void subpage_prot_init_new_context(struct mm_struct *mm)
 {
-	struct subpage_prot_table *spt = &mm->context.spt;
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
 
 	memset(spt, 0, sizeof(*spt));
 }
@@ -93,7 +93,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
 static void subpage_prot_clear(unsigned long addr, unsigned long len)
 {
 	struct mm_struct *mm = current->mm;
-	struct subpage_prot_table *spt = &mm->context.spt;
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
 	u32 **spm, *spp;
 	unsigned long i;
 	size_t nw;
@@ -189,7 +189,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
 		unsigned long, len, u32 __user *, map)
 {
 	struct mm_struct *mm = current->mm;
-	struct subpage_prot_table *spt = &mm->context.spt;
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
 	u32 **spm, *spp;
 	unsigned long i;
 	size_t nw;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 4/6] powerpc/mm: Move slb_addr_linit to early_init_mmu
  2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2019-04-17 13:03 ` [PATCH v3 3/6] powerpc/mm: Add helpers for accessing hash translation related variables Aneesh Kumar K.V
@ 2019-04-17 13:03 ` Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix Aneesh Kumar K.V
  2019-04-17 13:03 ` [PATCH v3 6/6] powerc/mm/hash: Reduce hash_mm_context size Aneesh Kumar K.V
  5 siblings, 0 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

Avoid #ifdef in generic code. Also enables us to do this specific to
MMU translation mode on book3s64

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/kernel/setup-common.c | 11 -----------
 arch/powerpc/mm/hash_utils_64.c    |  2 ++
 arch/powerpc/mm/tlb_nohash.c       |  6 ++++++
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e5dfb6e0823..a07de8608484 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -947,17 +947,6 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.end_data = (unsigned long) _edata;
 	init_mm.brk = klimit;
 
-#ifdef CONFIG_PPC_MM_SLICES
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#elif defined(CONFIG_PPC_8xx)
-	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#else
-#error	"context.addr_limit not initialized."
-#endif
-#endif
-
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(&init_mm);
 #endif
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5a2bd132f92e..2cb3a456f5b5 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1041,6 +1041,8 @@ void __init hash__early_init_mmu(void)
 	 */
 	htab_initialize();
 
+	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+
 	pr_info("Initializing hash mmu with SLB\n");
 	/* Initialize SLB management */
 	slb_initialize();
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index ac23dc1c6535..088e0a6b5ade 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -800,5 +800,11 @@ void __init early_init_mmu(void)
 #ifdef CONFIG_PPC_47x
 	early_init_mmu_47x();
 #endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+#if defined(CONFIG_PPC_8xx)
+	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
+#endif
 }
 #endif /* CONFIG_PPC64 */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix
  2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2019-04-17 13:03 ` [PATCH v3 4/6] powerpc/mm: Move slb_addr_linit to early_init_mmu Aneesh Kumar K.V
@ 2019-04-17 13:03 ` Aneesh Kumar K.V
  2019-04-24 12:54   ` Christophe Leroy
  2019-04-17 13:03 ` [PATCH v3 6/6] powerc/mm/hash: Reduce hash_mm_context size Aneesh Kumar K.V
  5 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

Currently, our mm_context_t on book3s64 include all hash specific
context details like slice mask and subpage protection details. We
can skip allocating these with radix translation. This will help us to save
8K per mm_context with radix translation.

With the patch applied we have

sizeof(mm_context_t)  = 136
sizeof(struct hash_mm_context)  = 8288

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 33 ++++++++++++-
 arch/powerpc/include/asm/book3s/64/mmu.h      | 49 +++++--------------
 arch/powerpc/kernel/setup-common.c            |  6 +++
 arch/powerpc/mm/hash_utils_64.c               |  4 +-
 arch/powerpc/mm/mmu_context_book3s64.c        | 16 +++++-
 5 files changed, 68 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index eb36fbfe4ef5..4481bedbb5be 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -658,7 +658,7 @@ extern void slb_set_size(u16 size);
 /* 4 bits per slice and we have one slice per 1TB */
 #define SLICE_ARRAY_SIZE	(H_PGTABLE_RANGE >> 41)
 #define LOW_SLICE_ARRAY_SZ	(BITS_PER_LONG / BITS_PER_BYTE)
-#define TASK_SLICE_ARRAY_SZ(x)	((x)->slb_addr_limit >> 41)
+#define TASK_SLICE_ARRAY_SZ(x)	((x)->hash_context->slb_addr_limit >> 41)
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -693,6 +693,37 @@ static inline void subpage_prot_free(struct mm_struct *mm) {}
 static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+	u64 low_slices;
+	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+	u16 user_psize; /* page size index */
+
+	/* SLB page size encodings*/
+	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+	struct slice_mask mask_64k;
+#endif
+	struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+	struct slice_mask mask_16m;
+	struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+	struct subpage_prot_table spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
 #if 0
 /*
  * The code below is equivalent to this function for arguments
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 28213a36fef7..3bc94baf5ed5 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -89,16 +89,6 @@ struct spinlock;
 /* Maximum possible number of NPUs in a system. */
 #define NV_MAX_NPUS 8
 
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
-	u64 low_slices;
-	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
-
 typedef struct {
 	union {
 		/*
@@ -112,7 +102,6 @@ typedef struct {
 		mm_context_id_t id;
 		mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
 	};
-	u16 user_psize;		/* page size index */
 
 	/* Number of bits in the mm_cpumask */
 	atomic_t active_cpus;
@@ -122,23 +111,9 @@ typedef struct {
 
 	/* NPU NMMU context */
 	struct npu_context *npu_context;
+	struct hash_mm_context *hash_context;
 
-	 /* SLB page size encodings*/
-	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
-	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
-	unsigned long slb_addr_limit;
-# ifdef CONFIG_PPC_64K_PAGES
-	struct slice_mask mask_64k;
-# endif
-	struct slice_mask mask_4k;
-# ifdef CONFIG_HUGETLB_PAGE
-	struct slice_mask mask_16m;
-	struct slice_mask mask_16g;
-# endif
 	unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-	struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
 	/*
 	 * pagetable fragment support
 	 */
@@ -161,62 +136,62 @@ typedef struct {
 
 static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
 {
-	return ctx->user_psize;
+	return ctx->hash_context->user_psize;
 }
 
 static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
 {
-	ctx->user_psize = user_psize;
+	ctx->hash_context->user_psize = user_psize;
 }
 
 static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
 {
-	return ctx->low_slices_psize;
+	return ctx->hash_context->low_slices_psize;
 }
 
 static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
 {
-	return ctx->high_slices_psize;
+	return ctx->hash_context->high_slices_psize;
 }
 
 static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
 {
-	return ctx->slb_addr_limit;
+	return ctx->hash_context->slb_addr_limit;
 }
 
 static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
 {
-	ctx->slb_addr_limit = limit;
+	ctx->hash_context->slb_addr_limit = limit;
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
 static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx)
 {
-	return &ctx->mask_64k;
+	return &ctx->hash_context->mask_64k;
 }
 #endif
 
 static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx)
 {
-	return &ctx->mask_4k;
+	return &ctx->hash_context->mask_4k;
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
 static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx)
 {
-	return &ctx->mask_16m;
+	return &ctx->hash_context->mask_16m;
 }
 
 static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx)
 {
-	return &ctx->mask_16g;
+	return &ctx->hash_context->mask_16g;
 }
 #endif
 
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
 {
-	return &ctx->spt;
+	return &ctx->hash_context->spt;
 }
 #endif
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index a07de8608484..21b1ce200b22 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -947,6 +947,12 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.end_data = (unsigned long) _edata;
 	init_mm.brk = klimit;
 
+#ifdef CONFIG_PPC_MM_SLICES
+#if defined(CONFIG_PPC_8xx)
+	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
+#endif
+
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(&init_mm);
 #endif
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2cb3a456f5b5..04ac7c36d380 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -968,6 +968,7 @@ void __init hash__early_init_devtree(void)
 	htab_scan_page_sizes();
 }
 
+struct hash_mm_context init_hash_mm_context;
 void __init hash__early_init_mmu(void)
 {
 #ifndef CONFIG_PPC_64K_PAGES
@@ -1041,7 +1042,8 @@ void __init hash__early_init_mmu(void)
 	 */
 	htab_initialize();
 
-	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+	init_mm.context.hash_context = &init_hash_mm_context;
+	init_mm.context.hash_context->slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
 
 	pr_info("Initializing hash mmu with SLB\n");
 	/* Initialize SLB management */
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index f720c5cc0b5e..6eef5a36b2e9 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -63,6 +63,12 @@ static int hash__init_new_context(struct mm_struct *mm)
 	if (index < 0)
 		return index;
 
+	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL);
+	if (!mm->context.hash_context) {
+		ida_free(&mmu_context_ida, index);
+		return -ENOMEM;
+	}
+
 	/*
 	 * The old code would re-promote on fork, we don't do that when using
 	 * slices as it could cause problem promoting slices that have been
@@ -77,8 +83,14 @@ static int hash__init_new_context(struct mm_struct *mm)
 	 * We should not be calling init_new_context() on init_mm. Hence a
 	 * check against 0 is OK.
 	 */
-	if (mm->context.id == 0)
+	if (mm->context.id == 0) {
+		memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
 		slice_init_new_context_exec(mm);
+	} else {
+		/* This is fork. Copy hash_context details from current->mm */
+		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+
+	}
 
 	subpage_prot_init_new_context(mm);
 
@@ -118,6 +130,7 @@ static int radix__init_new_context(struct mm_struct *mm)
 	asm volatile("ptesync;isync" : : : "memory");
 
 	mm->context.npu_context = NULL;
+	mm->context.hash_context = NULL;
 
 	return index;
 }
@@ -162,6 +175,7 @@ static void destroy_contexts(mm_context_t *ctx)
 		if (context_id)
 			ida_free(&mmu_context_ida, context_id);
 	}
+	kfree(ctx->hash_context);
 }
 
 static void pmd_frag_destroy(void *pmd_frag)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 6/6] powerc/mm/hash: Reduce hash_mm_context size
  2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2019-04-17 13:03 ` [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix Aneesh Kumar K.V
@ 2019-04-17 13:03 ` Aneesh Kumar K.V
  5 siblings, 0 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-17 13:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe, Christophe Leroy; +Cc: Aneesh Kumar K.V, linuxppc-dev

Allocate subpage protect related variables only if we use the feature.
This helps in reducing the hash related mm context struct by around 4K

Before the patch
sizeof(struct hash_mm_context)  = 8288

After the patch
sizeof(struct hash_mm_context) = 4160

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 +--
 arch/powerpc/include/asm/book3s/64/mmu.h      |  2 +-
 arch/powerpc/mm/hash_utils_64.c               |  3 ++
 arch/powerpc/mm/mmu_context_book3s64.c        | 17 +++++++++--
 arch/powerpc/mm/subpage-prot.c                | 28 ++++++++++++++-----
 5 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 4481bedbb5be..eeb40091b46b 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -687,10 +687,8 @@ struct subpage_prot_table {
 #define SBP_L3_SHIFT		(SBP_L2_SHIFT + SBP_L2_BITS)
 
 extern void subpage_prot_free(struct mm_struct *mm);
-extern void subpage_prot_init_new_context(struct mm_struct *mm);
 #else
 static inline void subpage_prot_free(struct mm_struct *mm) {}
-static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
 /*
@@ -720,7 +718,7 @@ struct hash_mm_context {
 #endif
 
 #ifdef CONFIG_PPC_SUBPAGE_PROT
-	struct subpage_prot_table spt;
+	struct subpage_prot_table *spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 };
 
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 3bc94baf5ed5..62ae73df93d2 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -191,7 +191,7 @@ static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx)
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
 {
-	return &ctx->hash_context->spt;
+	return ctx->hash_context->spt;
 }
 #endif
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 04ac7c36d380..33bd4a1b4d75 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1155,6 +1155,9 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 	u32 spp = 0;
 	u32 **sbpm, *sbpp;
 
+	if (!spt)
+		return 0;
+
 	if (ea >= spt->maxaddr)
 		return 0;
 	if (ea < 0x100000000UL) {
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 6eef5a36b2e9..cb2b08635508 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -63,7 +63,8 @@ static int hash__init_new_context(struct mm_struct *mm)
 	if (index < 0)
 		return index;
 
-	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL);
+	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
+					   GFP_KERNEL);
 	if (!mm->context.hash_context) {
 		ida_free(&mmu_context_ida, index);
 		return -ENOMEM;
@@ -89,11 +90,21 @@ static int hash__init_new_context(struct mm_struct *mm)
 	} else {
 		/* This is fork. Copy hash_context details from current->mm */
 		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+		/* inherit subpage prot detalis if we have one. */
+		if (current->mm->context.hash_context->spt) {
+			mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
+								GFP_KERNEL);
+			if (!mm->context.hash_context->spt) {
+				ida_free(&mmu_context_ida, index);
+				kfree(mm->context.hash_context);
+				return -ENOMEM;
+			}
+		}
+#endif
 
 	}
 
-	subpage_prot_init_new_context(mm);
-
 	pkey_mm_init(mm);
 	return index;
 }
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index c72252542210..c9dff4e1f295 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -29,6 +29,9 @@ void subpage_prot_free(struct mm_struct *mm)
 	unsigned long i, j, addr;
 	u32 **p;
 
+	if (!spt)
+		return;
+
 	for (i = 0; i < 4; ++i) {
 		if (spt->low_prot[i]) {
 			free_page((unsigned long)spt->low_prot[i]);
@@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm)
 		free_page((unsigned long)p);
 	}
 	spt->maxaddr = 0;
-}
-
-void subpage_prot_init_new_context(struct mm_struct *mm)
-{
-	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
-
-	memset(spt, 0, sizeof(*spt));
+	kfree(spt);
 }
 
 static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
@@ -99,6 +96,9 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
 	size_t nw;
 	unsigned long next, limit;
 
+	if (!spt)
+		return ;
+
 	down_write(&mm->mmap_sem);
 	limit = addr + len;
 	if (limit > spt->maxaddr)
@@ -218,6 +218,20 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
 		return -EFAULT;
 
 	down_write(&mm->mmap_sem);
+
+	if (!spt) {
+		/*
+		 * Allocate subpage prot table if not already done.
+		 * Do this with mmap_sem held
+		 */
+		spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
+		if (!spt) {
+			err = -ENOMEM;
+			goto out;
+		}
+		mm->context.hash_context->spt = spt;
+	}
+
 	subpage_mark_vma_nohuge(mm, addr, len);
 	for (limit = addr + len; addr < limit; addr = next) {
 		next = pmd_addr_end(addr, limit);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [v3,2/6] powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64
  2019-04-17 13:03 ` [PATCH v3 2/6] powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64 Aneesh Kumar K.V
@ 2019-04-21 14:19   ` Michael Ellerman
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Ellerman @ 2019-04-21 14:19 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, paulus, Christophe Leroy
  Cc: Aneesh Kumar K.V, linuxppc-dev

On Wed, 2019-04-17 at 13:03:47 UTC, "Aneesh Kumar K.V" wrote:
> Book3s64 always have PPC_MM_SLICES enabled. So remove the unncessary #ifdef
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Patches 2-6 applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/4f40b15f339d896f5726714842947c93

cheers

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 3/6] powerpc/mm: Add helpers for accessing hash translation related variables
  2019-04-17 13:03 ` [PATCH v3 3/6] powerpc/mm: Add helpers for accessing hash translation related variables Aneesh Kumar K.V
@ 2019-04-24 10:16   ` Christophe Leroy
  0 siblings, 0 replies; 11+ messages in thread
From: Christophe Leroy @ 2019-04-24 10:16 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, paulus, mpe; +Cc: linuxppc-dev



Le 17/04/2019 à 15:03, Aneesh Kumar K.V a écrit :
> We want to switch to allocating them runtime only when hash translation is
> enabled. Add helpers so that both book3s and nohash can be adapted to
> upcoming change easily.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 +-
>   arch/powerpc/include/asm/book3s/64/mmu.h      | 63 ++++++++++++++++++-
>   arch/powerpc/include/asm/nohash/32/mmu-8xx.h  | 50 +++++++++++++++
>   arch/powerpc/kernel/paca.c                    | 12 ++--
>   arch/powerpc/mm/hash_utils_64.c               | 10 +--
>   arch/powerpc/mm/slb.c                         |  2 +-
>   arch/powerpc/mm/slice.c                       | 49 +++++++--------
>   arch/powerpc/mm/subpage-prot.c                |  8 +--
>   8 files changed, 154 insertions(+), 44 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> index a28a28079edb..eb36fbfe4ef5 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -657,8 +657,8 @@ extern void slb_set_size(u16 size);
>   
>   /* 4 bits per slice and we have one slice per 1TB */
>   #define SLICE_ARRAY_SIZE	(H_PGTABLE_RANGE >> 41)
> -#define TASK_SLICE_ARRAY_SZ(x)	((x)->context.slb_addr_limit >> 41)
> -
> +#define LOW_SLICE_ARRAY_SZ	(BITS_PER_LONG / BITS_PER_BYTE)
> +#define TASK_SLICE_ARRAY_SZ(x)	((x)->slb_addr_limit >> 41)
>   #ifndef __ASSEMBLY__
>   
>   #ifdef CONFIG_PPC_SUBPAGE_PROT
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 484a8ff9b338..28213a36fef7 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -124,7 +124,7 @@ typedef struct {
>   	struct npu_context *npu_context;
>   
>   	 /* SLB page size encodings*/
> -	unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
> +	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
>   	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
>   	unsigned long slb_addr_limit;
>   # ifdef CONFIG_PPC_64K_PAGES
> @@ -159,6 +159,67 @@ typedef struct {
>   #endif
>   } mm_context_t;
>   
> +static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
> +{
> +	return ctx->user_psize;
> +}
> +
> +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
> +{
> +	ctx->user_psize = user_psize;
> +}
> +
> +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
> +{
> +	return ctx->low_slices_psize;
> +}
> +
> +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
> +{
> +	return ctx->high_slices_psize;
> +}
> +
> +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
> +{
> +	return ctx->slb_addr_limit;
> +}
> +
> +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
> +{
> +	ctx->slb_addr_limit = limit;
> +}
> +
> +#ifdef CONFIG_PPC_64K_PAGES
> +static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx)
> +{
> +	return &ctx->mask_64k;
> +}
> +#endif
> +
> +static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx)
> +{
> +	return &ctx->mask_4k;
> +}
> +
> +#ifdef CONFIG_HUGETLB_PAGE
> +static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx)
> +{
> +	return &ctx->mask_16m;
> +}
> +
> +static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx)
> +{
> +	return &ctx->mask_16g;
> +}
> +#endif

I think it would be better to move slice_mask_for_size() into mmu.h 
instead of defining those helpers.

> +
> +#ifdef CONFIG_PPC_SUBPAGE_PROT
> +static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
> +{
> +	return &ctx->spt;
> +}
> +#endif
> +
>   /*
>    * The current system page and segment sizes
>    */
> diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
> index 0a1a3fc54e54..0f4b0b50e5ad 100644
> --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
> +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
> @@ -167,6 +167,7 @@
>   #ifdef CONFIG_PPC_MM_SLICES
>   #include <asm/nohash/32/slice.h>
>   #define SLICE_ARRAY_SIZE	(1 << (32 - SLICE_LOW_SHIFT - 1))
> +#define LOW_SLICE_ARRAY_SZ	SLICE_ARRAY_SIZE
>   #endif
>   
>   #ifndef __ASSEMBLY__
> @@ -193,6 +194,55 @@ typedef struct {
>   	void *pte_frag;
>   } mm_context_t;
>   
> +#ifdef CONFIG_PPC_MM_SLICES
> +static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
> +{
> +	return ctx->user_psize;
> +}
> +
> +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
> +{
> +	ctx->user_psize = user_psize;
> +}
> +
> +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
> +{
> +	return ctx->low_slices_psize;
> +}
> +
> +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
> +{
> +	return ctx->high_slices_psize;
> +}
> +
> +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
> +{
> +	return ctx->slb_addr_limit;
> +}
> +
> +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
> +{
> +	ctx->slb_addr_limit = limit;
> +}
> +
> +static inline struct slice_mask *mm_ctx_slice_mask_base(mm_context_t *ctx)
> +{
> +	return &ctx->mask_base_psize;
> +}
> +
> +#ifdef CONFIG_HUGETLB_PAGE
> +static inline struct slice_mask *mm_ctx_slice_mask_512k(mm_context_t *ctx)
> +{
> +	return &ctx->mask_512k;
> +}
> +
> +static inline struct slice_mask *mm_ctx_slice_mask_8m(mm_context_t *ctx)
> +{
> +	return &ctx->mask_8m;
> +}
> +#endif

The 3 helpers above are never used, I think we will never need them.

What would be good is to move slice_mask_for_size() here in mmu-8xx.h

I'll rebase my series on top of yours since Michael has already merged it.

Christophe

> +#endif /* CONFIG_PPC_MM_SLICE */
> +
>   #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
>   #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
>   
> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
> index e7382abee868..9cc91d03ab62 100644
> --- a/arch/powerpc/kernel/paca.c
> +++ b/arch/powerpc/kernel/paca.c
> @@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm)
>   
>   	get_paca()->mm_ctx_id = context->id;
>   #ifdef CONFIG_PPC_MM_SLICES
> -	VM_BUG_ON(!mm->context.slb_addr_limit);
> -	get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
> -	memcpy(&get_paca()->mm_ctx_low_slices_psize,
> -	       &context->low_slices_psize, sizeof(context->low_slices_psize));
> -	memcpy(&get_paca()->mm_ctx_high_slices_psize,
> -	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
> +	VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
> +	get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
> +	memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
> +	       LOW_SLICE_ARRAY_SZ);
> +	memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
> +	       TASK_SLICE_ARRAY_SZ(context));
>   #else /* CONFIG_PPC_MM_SLICES */
>   	get_paca()->mm_ctx_user_psize = context->user_psize;
>   	get_paca()->mm_ctx_sllp = context->sllp;
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 0a4f939a8161..5a2bd132f92e 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -1147,7 +1147,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
>    */
>   static int subpage_protection(struct mm_struct *mm, unsigned long ea)
>   {
> -	struct subpage_prot_table *spt = &mm->context.spt;
> +	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
>   	u32 spp = 0;
>   	u32 **sbpm, *sbpp;
>   
> @@ -1470,7 +1470,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
>   	int psize = get_slice_psize(mm, ea);
>   
>   	/* We only prefault standard pages for now */
> -	if (unlikely(psize != mm->context.user_psize))
> +	if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
>   		return false;
>   
>   	/*
> @@ -1549,7 +1549,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
>   
>   	/* Hash it in */
>   #ifdef CONFIG_PPC_64K_PAGES
> -	if (mm->context.user_psize == MMU_PAGE_64K)
> +	if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
>   		rc = __hash_page_64K(ea, access, vsid, ptep, trap,
>   				     update_flags, ssize);
>   	else
> @@ -1562,8 +1562,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
>   	 */
>   	if (rc == -1)
>   		hash_failure_debug(ea, access, vsid, trap, ssize,
> -				   mm->context.user_psize,
> -				   mm->context.user_psize,
> +				   mm_ctx_user_psize(&mm->context),
> +				   mm_ctx_user_psize(&mm->context),
>   				   pte_val(*ptep));
>   out_exit:
>   	local_irq_restore(flags);
> diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
> index 5986df48359b..78c0c0a0e355 100644
> --- a/arch/powerpc/mm/slb.c
> +++ b/arch/powerpc/mm/slb.c
> @@ -739,7 +739,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
>   	 * consider this as bad access if we take a SLB miss
>   	 * on an address above addr limit.
>   	 */
> -	if (ea >= mm->context.slb_addr_limit)
> +	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
>   		return -EFAULT;
>   
>   	context = get_user_context(&mm->context, ea);
> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> index aec91dbcdc0b..35b278082391 100644
> --- a/arch/powerpc/mm/slice.c
> +++ b/arch/powerpc/mm/slice.c
> @@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
>   {
>   	struct vm_area_struct *vma;
>   
> -	if ((mm->context.slb_addr_limit - len) < addr)
> +	if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
>   		return 0;
>   	vma = find_vma(mm, addr);
>   	return (!vma || (addr + len) <= vm_start_gap(vma));
> @@ -155,15 +155,15 @@ static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
>   {
>   #ifdef CONFIG_PPC_64K_PAGES
>   	if (psize == MMU_PAGE_64K)
> -		return &mm->context.mask_64k;
> +		return mm_ctx_slice_mask_64k(&mm->context);
>   #endif
>   	if (psize == MMU_PAGE_4K)
> -		return &mm->context.mask_4k;
> +		return mm_ctx_slice_mask_4k(&mm->context);
>   #ifdef CONFIG_HUGETLB_PAGE
>   	if (psize == MMU_PAGE_16M)
> -		return &mm->context.mask_16m;
> +		return mm_ctx_slice_mask_16m(&mm->context);
>   	if (psize == MMU_PAGE_16G)
> -		return &mm->context.mask_16g;
> +		return mm_ctx_slice_mask_16g(&mm->context);
>   #endif
>   	BUG();
>   }
> @@ -253,7 +253,7 @@ static void slice_convert(struct mm_struct *mm,
>   	 */
>   	spin_lock_irqsave(&slice_convert_lock, flags);
>   
> -	lpsizes = mm->context.low_slices_psize;
> +	lpsizes = mm_ctx_low_slices(&mm->context);
>   	for (i = 0; i < SLICE_NUM_LOW; i++) {
>   		if (!(mask->low_slices & (1u << i)))
>   			continue;
> @@ -272,8 +272,8 @@ static void slice_convert(struct mm_struct *mm,
>   				(((unsigned long)psize) << (mask_index * 4));
>   	}
>   
> -	hpsizes = mm->context.high_slices_psize;
> -	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
> +	hpsizes = mm_ctx_high_slices(&mm->context);
> +	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
>   		if (!test_bit(i, mask->high_slices))
>   			continue;
>   
> @@ -292,8 +292,8 @@ static void slice_convert(struct mm_struct *mm,
>   	}
>   
>   	slice_dbg(" lsps=%lx, hsps=%lx\n",
> -		  (unsigned long)mm->context.low_slices_psize,
> -		  (unsigned long)mm->context.high_slices_psize);
> +		  (unsigned long)mm_ctx_low_slices(&mm->context),
> +		  (unsigned long)mm_ctx_high_slices(&mm->context));
>   
>   	spin_unlock_irqrestore(&slice_convert_lock, flags);
>   
> @@ -393,7 +393,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
>   	 * DEFAULT_MAP_WINDOW we should apply this.
>   	 */
>   	if (high_limit > DEFAULT_MAP_WINDOW)
> -		addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
> +		addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
>   
>   	while (addr > min_addr) {
>   		info.high_limit = addr;
> @@ -505,20 +505,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
>   			return -ENOMEM;
>   	}
>   
> -	if (high_limit > mm->context.slb_addr_limit) {
> +	if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
>   		/*
>   		 * Increasing the slb_addr_limit does not require
>   		 * slice mask cache to be recalculated because it should
>   		 * be already initialised beyond the old address limit.
>   		 */
> -		mm->context.slb_addr_limit = high_limit;
> +		mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
>   
>   		on_each_cpu(slice_flush_segments, mm, 1);
>   	}
>   
>   	/* Sanity checks */
>   	BUG_ON(mm->task_size == 0);
> -	BUG_ON(mm->context.slb_addr_limit == 0);
> +	BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
>   	VM_BUG_ON(radix_enabled());
>   
>   	slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
> @@ -696,7 +696,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
>   				     unsigned long flags)
>   {
>   	return slice_get_unmapped_area(addr, len, flags,
> -				       current->mm->context.user_psize, 0);
> +				       mm_ctx_user_psize(&current->mm->context), 0);
>   }
>   
>   unsigned long arch_get_unmapped_area_topdown(struct file *filp,
> @@ -706,7 +706,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
>   					     const unsigned long flags)
>   {
>   	return slice_get_unmapped_area(addr0, len, flags,
> -				       current->mm->context.user_psize, 1);
> +				       mm_ctx_user_psize(&current->mm->context), 1);
>   }
>   
>   unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
> @@ -717,10 +717,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
>   	VM_BUG_ON(radix_enabled());
>   
>   	if (slice_addr_is_low(addr)) {
> -		psizes = mm->context.low_slices_psize;
> +		psizes = mm_ctx_low_slices(&mm->context);
>   		index = GET_LOW_SLICE_INDEX(addr);
>   	} else {
> -		psizes = mm->context.high_slices_psize;
> +		psizes = mm_ctx_high_slices(&mm->context);
>   		index = GET_HIGH_SLICE_INDEX(addr);
>   	}
>   	mask_index = index & 0x1;
> @@ -742,20 +742,19 @@ void slice_init_new_context_exec(struct mm_struct *mm)
>   	 * duplicated.
>   	 */
>   #ifdef CONFIG_PPC64
> -	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
> +	mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW_USER64);
>   #else
>   	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
>   #endif
> -
> -	mm->context.user_psize = psize;
> +	mm_ctx_set_user_psize(&mm->context, psize);
>   
>   	/*
>   	 * Set all slice psizes to the default.
>   	 */
> -	lpsizes = mm->context.low_slices_psize;
> +	lpsizes = mm_ctx_low_slices(&mm->context);
>   	memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
>   
> -	hpsizes = mm->context.high_slices_psize;
> +	hpsizes = mm_ctx_high_slices(&mm->context);
>   	memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
>   
>   	/*
> @@ -777,7 +776,7 @@ void slice_setup_new_exec(void)
>   	if (!is_32bit_task())
>   		return;
>   
> -	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
> +	mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
>   }
>   #endif
>   
> @@ -816,7 +815,7 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
>   			   unsigned long len)
>   {
>   	const struct slice_mask *maskp;
> -	unsigned int psize = mm->context.user_psize;
> +	unsigned int psize = mm_ctx_user_psize(&mm->context);
>   
>   	VM_BUG_ON(radix_enabled());
>   
> diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
> index 5e4178790dee..c72252542210 100644
> --- a/arch/powerpc/mm/subpage-prot.c
> +++ b/arch/powerpc/mm/subpage-prot.c
> @@ -25,7 +25,7 @@
>    */
>   void subpage_prot_free(struct mm_struct *mm)
>   {
> -	struct subpage_prot_table *spt = &mm->context.spt;
> +	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
>   	unsigned long i, j, addr;
>   	u32 **p;
>   
> @@ -52,7 +52,7 @@ void subpage_prot_free(struct mm_struct *mm)
>   
>   void subpage_prot_init_new_context(struct mm_struct *mm)
>   {
> -	struct subpage_prot_table *spt = &mm->context.spt;
> +	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
>   
>   	memset(spt, 0, sizeof(*spt));
>   }
> @@ -93,7 +93,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
>   static void subpage_prot_clear(unsigned long addr, unsigned long len)
>   {
>   	struct mm_struct *mm = current->mm;
> -	struct subpage_prot_table *spt = &mm->context.spt;
> +	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
>   	u32 **spm, *spp;
>   	unsigned long i;
>   	size_t nw;
> @@ -189,7 +189,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
>   		unsigned long, len, u32 __user *, map)
>   {
>   	struct mm_struct *mm = current->mm;
> -	struct subpage_prot_table *spt = &mm->context.spt;
> +	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
>   	u32 **spm, *spp;
>   	unsigned long i;
>   	size_t nw;
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix
  2019-04-17 13:03 ` [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix Aneesh Kumar K.V
@ 2019-04-24 12:54   ` Christophe Leroy
  2019-04-26  4:50     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 11+ messages in thread
From: Christophe Leroy @ 2019-04-24 12:54 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, paulus, mpe; +Cc: linuxppc-dev



Le 17/04/2019 à 15:03, Aneesh Kumar K.V a écrit :
> Currently, our mm_context_t on book3s64 include all hash specific
> context details like slice mask and subpage protection details. We
> can skip allocating these with radix translation. This will help us to save
> 8K per mm_context with radix translation.
> 
> With the patch applied we have
> 
> sizeof(mm_context_t)  = 136
> sizeof(struct hash_mm_context)  = 8288
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/book3s/64/mmu-hash.h | 33 ++++++++++++-
>   arch/powerpc/include/asm/book3s/64/mmu.h      | 49 +++++--------------
>   arch/powerpc/kernel/setup-common.c            |  6 +++
>   arch/powerpc/mm/hash_utils_64.c               |  4 +-
>   arch/powerpc/mm/mmu_context_book3s64.c        | 16 +++++-
>   5 files changed, 68 insertions(+), 40 deletions(-)
> 

[...]

> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index a07de8608484..21b1ce200b22 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -947,6 +947,12 @@ void __init setup_arch(char **cmdline_p)
>   	init_mm.end_data = (unsigned long) _edata;
>   	init_mm.brk = klimit;
>   
> +#ifdef CONFIG_PPC_MM_SLICES
> +#if defined(CONFIG_PPC_8xx)
> +	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
> +#endif
> +#endif
> +

In the previous patch, you moved the above into early_init_mmu(). Why 
bringing it back here ?

Christophe

>   #ifdef CONFIG_SPAPR_TCE_IOMMU
>   	mm_iommu_init(&init_mm);
>   #endif
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 2cb3a456f5b5..04ac7c36d380 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -968,6 +968,7 @@ void __init hash__early_init_devtree(void)
>   	htab_scan_page_sizes();
>   }
>   
> +struct hash_mm_context init_hash_mm_context;
>   void __init hash__early_init_mmu(void)
>   {
>   #ifndef CONFIG_PPC_64K_PAGES
> @@ -1041,7 +1042,8 @@ void __init hash__early_init_mmu(void)
>   	 */
>   	htab_initialize();
>   
> -	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
> +	init_mm.context.hash_context = &init_hash_mm_context;
> +	init_mm.context.hash_context->slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
>   
>   	pr_info("Initializing hash mmu with SLB\n");
>   	/* Initialize SLB management */
> diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
> index f720c5cc0b5e..6eef5a36b2e9 100644
> --- a/arch/powerpc/mm/mmu_context_book3s64.c
> +++ b/arch/powerpc/mm/mmu_context_book3s64.c
> @@ -63,6 +63,12 @@ static int hash__init_new_context(struct mm_struct *mm)
>   	if (index < 0)
>   		return index;
>   
> +	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL);
> +	if (!mm->context.hash_context) {
> +		ida_free(&mmu_context_ida, index);
> +		return -ENOMEM;
> +	}
> +
>   	/*
>   	 * The old code would re-promote on fork, we don't do that when using
>   	 * slices as it could cause problem promoting slices that have been
> @@ -77,8 +83,14 @@ static int hash__init_new_context(struct mm_struct *mm)
>   	 * We should not be calling init_new_context() on init_mm. Hence a
>   	 * check against 0 is OK.
>   	 */
> -	if (mm->context.id == 0)
> +	if (mm->context.id == 0) {
> +		memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
>   		slice_init_new_context_exec(mm);
> +	} else {
> +		/* This is fork. Copy hash_context details from current->mm */
> +		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
> +
> +	}
>   
>   	subpage_prot_init_new_context(mm);
>   
> @@ -118,6 +130,7 @@ static int radix__init_new_context(struct mm_struct *mm)
>   	asm volatile("ptesync;isync" : : : "memory");
>   
>   	mm->context.npu_context = NULL;
> +	mm->context.hash_context = NULL;
>   
>   	return index;
>   }
> @@ -162,6 +175,7 @@ static void destroy_contexts(mm_context_t *ctx)
>   		if (context_id)
>   			ida_free(&mmu_context_ida, context_id);
>   	}
> +	kfree(ctx->hash_context);
>   }
>   
>   static void pmd_frag_destroy(void *pmd_frag)
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix
  2019-04-24 12:54   ` Christophe Leroy
@ 2019-04-26  4:50     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2019-04-26  4:50 UTC (permalink / raw)
  To: Christophe Leroy, npiggin, paulus, mpe; +Cc: linuxppc-dev

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Le 17/04/2019 à 15:03, Aneesh Kumar K.V a écrit :
>> Currently, our mm_context_t on book3s64 include all hash specific
>> context details like slice mask and subpage protection details. We
>> can skip allocating these with radix translation. This will help us to save
>> 8K per mm_context with radix translation.
>> 
>> With the patch applied we have
>> 
>> sizeof(mm_context_t)  = 136
>> sizeof(struct hash_mm_context)  = 8288
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/book3s/64/mmu-hash.h | 33 ++++++++++++-
>>   arch/powerpc/include/asm/book3s/64/mmu.h      | 49 +++++--------------
>>   arch/powerpc/kernel/setup-common.c            |  6 +++
>>   arch/powerpc/mm/hash_utils_64.c               |  4 +-
>>   arch/powerpc/mm/mmu_context_book3s64.c        | 16 +++++-
>>   5 files changed, 68 insertions(+), 40 deletions(-)
>> 
>
> [...]
>
>> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
>> index a07de8608484..21b1ce200b22 100644
>> --- a/arch/powerpc/kernel/setup-common.c
>> +++ b/arch/powerpc/kernel/setup-common.c
>> @@ -947,6 +947,12 @@ void __init setup_arch(char **cmdline_p)
>>   	init_mm.end_data = (unsigned long) _edata;
>>   	init_mm.brk = klimit;
>>   
>> +#ifdef CONFIG_PPC_MM_SLICES
>> +#if defined(CONFIG_PPC_8xx)
>> +	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
>> +#endif
>> +#endif
>> +
>
> In the previous patch, you moved the above into early_init_mmu(). Why 
> bringing it back here ?
>

Rebase error. I see that you have sent a series to handle this.

Thanks
-aneesh


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-04-26  4:52 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-17 13:03 [PATCH v3 0/6] Reduce memory usage for mm_context_t Aneesh Kumar K.V
2019-04-17 13:03 ` [PATCH v3 1/6] powerpc/mm/radix: Do slb preload only with hash translation mode Aneesh Kumar K.V
2019-04-17 13:03 ` [PATCH v3 2/6] powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64 Aneesh Kumar K.V
2019-04-21 14:19   ` [v3,2/6] " Michael Ellerman
2019-04-17 13:03 ` [PATCH v3 3/6] powerpc/mm: Add helpers for accessing hash translation related variables Aneesh Kumar K.V
2019-04-24 10:16   ` Christophe Leroy
2019-04-17 13:03 ` [PATCH v3 4/6] powerpc/mm: Move slb_addr_linit to early_init_mmu Aneesh Kumar K.V
2019-04-17 13:03 ` [PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix Aneesh Kumar K.V
2019-04-24 12:54   ` Christophe Leroy
2019-04-26  4:50     ` Aneesh Kumar K.V
2019-04-17 13:03 ` [PATCH v3 6/6] powerc/mm/hash: Reduce hash_mm_context size Aneesh Kumar K.V

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.