All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature
@ 2016-07-23  9:12 Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature Aneesh Kumar K.V
                   ` (10 more replies)
  0 siblings, 11 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Changes from V1:
* Update "powerpc/mm: Convert early cpu/mmu feature check to use the new helpers"
  based on resend code changes in this area.

We now do feature fixup early and hence we can reduce the usage of
 __cpu/__mmu_has_feature.

Aneesh Kumar K.V (5):
  powerpc/mm: Add __cpu/__mmu_has_feature
  powerpc/mm: Convert early cpu/mmu feature check to use the new helpers
  powerpc/mm/radix: Add radix_set_pte to use in early init
  powerpc: Call jump_label_init early
  powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label
    init

Kevin Hao (5):
  jump_label: make it possible for the archs to invoke jump_label_init()
    much earlier
  powerpc: kill mfvtb()
  powerpc: move the cpu_has_feature to a separate file
  powerpc: use the jump label for cpu_has_feature
  powerpc: use jump label for mmu_has_feature

 arch/powerpc/Kconfig.debug                    | 11 +++++
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  5 ++-
 arch/powerpc/include/asm/book3s/64/mmu.h      | 19 ++++++--
 arch/powerpc/include/asm/cacheflush.h         |  1 +
 arch/powerpc/include/asm/cpufeatures.h        | 49 +++++++++++++++++++++
 arch/powerpc/include/asm/cputable.h           | 16 +++----
 arch/powerpc/include/asm/cputime.h            |  1 +
 arch/powerpc/include/asm/dbell.h              |  1 +
 arch/powerpc/include/asm/dcr-native.h         |  1 +
 arch/powerpc/include/asm/mman.h               |  1 +
 arch/powerpc/include/asm/mmu.h                | 62 ++++++++++++++++++++++++++-
 arch/powerpc/include/asm/reg.h                |  9 ----
 arch/powerpc/include/asm/time.h               |  3 +-
 arch/powerpc/include/asm/xor.h                |  1 +
 arch/powerpc/kernel/align.c                   |  1 +
 arch/powerpc/kernel/cputable.c                | 37 ++++++++++++++++
 arch/powerpc/kernel/irq.c                     |  1 +
 arch/powerpc/kernel/paca.c                    |  2 +-
 arch/powerpc/kernel/process.c                 |  3 +-
 arch/powerpc/kernel/setup-common.c            |  1 +
 arch/powerpc/kernel/setup_32.c                |  1 +
 arch/powerpc/kernel/setup_64.c                |  4 +-
 arch/powerpc/kernel/smp.c                     |  1 +
 arch/powerpc/lib/feature-fixups.c             |  8 ++++
 arch/powerpc/mm/hash_native_64.c              |  2 +-
 arch/powerpc/mm/hash_utils_64.c               | 10 ++---
 arch/powerpc/mm/pgtable-radix.c               | 23 +++++++++-
 arch/powerpc/platforms/cell/pervasive.c       |  1 +
 arch/powerpc/xmon/ppc-dis.c                   |  2 +
 kernel/jump_label.c                           |  3 ++
 30 files changed, 243 insertions(+), 37 deletions(-)
 create mode 100644 arch/powerpc/include/asm/cpufeatures.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-25  5:26   ` Nicholas Piggin
  2016-07-23  9:12 ` [PATCH for-4.8 V2 02/10] powerpc/mm: Convert early cpu/mmu feature check to use the new helpers Aneesh Kumar K.V
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

In later patches, we will be switching cpu and mmu feature check to
use static keys. This would require us to have a variant of feature
check that can be used in early boot before jump label is initialized.
This patch adds the same. We also add a variant for radix_enabled()
check

We also update the return type to bool.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 19 +++++++++++++++----
 arch/powerpc/include/asm/cputable.h      | 15 ++++++++++-----
 arch/powerpc/include/asm/mmu.h           | 13 +++++++++++--
 arch/powerpc/xmon/ppc-dis.c              |  1 +
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 6d8306d9aa7a..1bb0e536c76b 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -24,9 +24,20 @@ struct mmu_psize_def {
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
 #ifdef CONFIG_PPC_RADIX_MMU
-#define radix_enabled() mmu_has_feature(MMU_FTR_TYPE_RADIX)
+static inline bool radix_enabled(void)
+{
+	return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+#define radix_enabled radix_enabled
+
+static inline bool __radix_enabled(void)
+{
+	return __mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+#define __radix_enabled __radix_enabled
 #else
 #define radix_enabled() (0)
+#define __radix_enabled() (0)
 #endif
 
 #endif /* __ASSEMBLY__ */
@@ -111,7 +122,7 @@ extern void hash__early_init_mmu(void);
 extern void radix__early_init_mmu(void);
 static inline void early_init_mmu(void)
 {
-	if (radix_enabled())
+	if (__radix_enabled())
 		return radix__early_init_mmu();
 	return hash__early_init_mmu();
 }
@@ -119,7 +130,7 @@ extern void hash__early_init_mmu_secondary(void);
 extern void radix__early_init_mmu_secondary(void);
 static inline void early_init_mmu_secondary(void)
 {
-	if (radix_enabled())
+	if (__radix_enabled())
 		return radix__early_init_mmu_secondary();
 	return hash__early_init_mmu_secondary();
 }
@@ -131,7 +142,7 @@ extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
 static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 					      phys_addr_t first_memblock_size)
 {
-	if (radix_enabled())
+	if (__radix_enabled())
 		return radix__setup_initial_memory_limit(first_memblock_base,
 						   first_memblock_size);
 	return hash__setup_initial_memory_limit(first_memblock_base,
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index df4fb5faba43..dfdf36bc2664 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -576,12 +576,17 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-static inline int cpu_has_feature(unsigned long feature)
+static inline bool __cpu_has_feature(unsigned long feature)
 {
-	return (CPU_FTRS_ALWAYS & feature) ||
-	       (CPU_FTRS_POSSIBLE
-		& cur_cpu_spec->cpu_features
-		& feature);
+	if (CPU_FTRS_ALWAYS & feature)
+		return true;
+
+	return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
+}
+
+static inline bool cpu_has_feature(unsigned long feature)
+{
+	return __cpu_has_feature(feature);
 }
 
 #define HBP_NUM 1
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 0e7c1a262075..828b92faec91 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -134,9 +134,14 @@ enum {
 		0,
 };
 
-static inline int mmu_has_feature(unsigned long feature)
+static inline bool __mmu_has_feature(unsigned long feature)
 {
-	return (MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+	return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+}
+
+static inline bool mmu_has_feature(unsigned long feature)
+{
+	return __mmu_has_feature(feature);
 }
 
 static inline void mmu_clear_feature(unsigned long feature)
@@ -232,5 +237,9 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 #define radix_enabled() (0)
 #endif
 
+#ifndef __radix_enabled
+#define __radix_enabled() (0)
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 89098f320ad5..acad77b4f7b6 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -19,6 +19,7 @@ You should have received a copy of the GNU General Public License
 along with this file; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
 
+#include <linux/types.h>
 #include <asm/cputable.h>
 #include "nonstdio.h"
 #include "ansidecl.h"
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 02/10] powerpc/mm: Convert early cpu/mmu feature check to use the new helpers
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init Aneesh Kumar K.V
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This switch the early feature check to use the non static key
variant of the function. In later patches we will be switching
cpu_has_feature and mmu_has_feature to use static keys and we can use
them only after static key/jump label is initialized. Any check for
feature before jump label init should be done using this new helper.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 ++--
 arch/powerpc/kernel/paca.c                    |  2 +-
 arch/powerpc/kernel/setup_64.c                |  4 ++--
 arch/powerpc/mm/hash_native_64.c              |  2 +-
 arch/powerpc/mm/hash_utils_64.c               | 10 +++++-----
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e4e1e64e2c8d..ceba5472fe58 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -278,7 +278,7 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
 	 */
 	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
 	v <<= HPTE_V_AVPN_SHIFT;
-	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+	if (!__cpu_has_feature(CPU_FTR_ARCH_300))
 		v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
@@ -306,7 +306,7 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
 					  int actual_psize, int ssize)
 {
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (__cpu_has_feature(CPU_FTR_ARCH_300))
 		pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
 
 	/* A 4K page needs no special encoding */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 93dae296b6be..1b0b89e80824 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -184,7 +184,7 @@ void setup_paca(struct paca_struct *new_paca)
 	 * if we do a GET_PACA() before the feature fixups have been
 	 * applied
 	 */
-	if (cpu_has_feature(CPU_FTR_HVMODE))
+	if (__cpu_has_feature(CPU_FTR_HVMODE))
 		mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
 	mtspr(SPRN_SPRG_PACA, local_paca);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d8216aed22b7..042d20a740ab 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -227,8 +227,8 @@ static void __init configure_exceptions(void)
 			opal_configure_cores();
 
 		/* Enable AIL if supported, and we are in hypervisor mode */
-		if (cpu_has_feature(CPU_FTR_HVMODE) &&
-		    cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		if (__cpu_has_feature(CPU_FTR_HVMODE) &&
+		    __cpu_has_feature(CPU_FTR_ARCH_207S)) {
 			unsigned long lpcr = mfspr(SPRN_LPCR);
 			mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
 		}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index d2d8efd79cbf..b6565c50cabf 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -746,6 +746,6 @@ void __init hpte_init_native(void)
 	mmu_hash_ops.flush_hash_range = native_flush_hash_range;
 	mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (__cpu_has_feature(CPU_FTR_ARCH_300))
 		ppc_md.register_process_table = native_register_proc_table;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 341632471b9d..a688f6c2b403 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -530,7 +530,7 @@ static bool might_have_hea(void)
 	 * we will never see an HEA ethernet device.
 	 */
 #ifdef CONFIG_IBMEBUS
-	return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+	return !__cpu_has_feature(CPU_FTR_ARCH_207S) &&
 		!firmware_has_feature(FW_FEATURE_SPLPAR);
 #else
 	return false;
@@ -561,7 +561,7 @@ static void __init htab_init_page_sizes(void)
 	 * Not in the device-tree, let's fallback on known size
 	 * list for 16M capable GP & GR
 	 */
-	if (mmu_has_feature(MMU_FTR_16M_PAGE))
+	if (__mmu_has_feature(MMU_FTR_16M_PAGE))
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
 found:
@@ -591,7 +591,7 @@ found:
 		mmu_vmalloc_psize = MMU_PAGE_64K;
 		if (mmu_linear_psize == MMU_PAGE_4K)
 			mmu_linear_psize = MMU_PAGE_64K;
-		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
+		if (__mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
 			/*
 			 * When running on pSeries using 64k pages for ioremap
 			 * would stop us accessing the HEA ethernet. So if we
@@ -765,7 +765,7 @@ static void __init htab_initialize(void)
 	/* Initialize page sizes */
 	htab_init_page_sizes();
 
-	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+	if (__mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 		mmu_kernel_ssize = MMU_SEGSIZE_1T;
 		mmu_highuser_ssize = MMU_SEGSIZE_1T;
 		printk(KERN_INFO "Using 1TB segments\n");
@@ -824,7 +824,7 @@ static void __init htab_initialize(void)
 		/* Initialize the HPT with no entries */
 		memset((void *)table, 0, htab_size_bytes);
 
-		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		if (!__cpu_has_feature(CPU_FTR_ARCH_300))
 			/* Set SDR1 */
 			mtspr(SPRN_SDR1, _SDR1);
 		else
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 02/10] powerpc/mm: Convert early cpu/mmu feature check to use the new helpers Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-25  6:23   ` Nicholas Piggin
  2016-07-25  8:36   ` Michael Ellerman
  2016-07-23  9:12 ` [PATCH for-4.8 V2 04/10] jump_label: make it possible for the archs to invoke jump_label_init() much earlier Aneesh Kumar K.V
                   ` (7 subsequent siblings)
  10 siblings, 2 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

We want to use the static key based feature check in set_pte_at. Since
we call radix__map_kernel_page early in boot before jump label is
initialized we can't call set_pte_at there. Add radix__set_pte for the
same.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/pgtable-radix.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 003ff48a11b6..6d2eb76b508e 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -39,6 +39,27 @@ static __ref void *early_alloc_pgtable(unsigned long size)
 
 	return pt;
 }
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+static void radix__set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+			   pte_t pte)
+{
+	/*
+	 * When handling numa faults, we already have the pte marked
+	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
+	 * Hence we can use set_pte_at for them.
+	 */
+	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+
+	/*
+	 * Add the pte bit when tryint set a pte
+	 */
+	pte = __pte(pte_val(pte) | _PAGE_PTE);
+
+	/* Perform the setting of the PTE */
+	radix__set_pte_at(mm, addr, ptep, pte, 0);
+}
 
 int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 			  pgprot_t flags,
@@ -102,7 +123,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 	}
 
 set_the_pte:
-	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+	radix__set_pte(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
 	smp_wmb();
 	return 0;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 04/10] jump_label: make it possible for the archs to invoke jump_label_init() much earlier
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 05/10] powerpc: Call jump_label_init early Aneesh Kumar K.V
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Kevin Hao, Aneesh Kumar K . V

From: Kevin Hao <haokexin@gmail.com>

For some archs (such as powerpc) would want to invoke jump_label_init()
in a much earlier stage. So check static_key_initialized in order to
make sure this function run only once.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 kernel/jump_label.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254eeb4b4e..14d81315fd7e 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -205,6 +205,9 @@ void __init jump_label_init(void)
 	struct static_key *key = NULL;
 	struct jump_entry *iter;
 
+	if (static_key_initialized)
+		return;
+
 	jump_label_lock();
 	jump_label_sort_entries(iter_start, iter_stop);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 05/10] powerpc: Call jump_label_init early
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 04/10] jump_label: make it possible for the archs to invoke jump_label_init() much earlier Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 06/10] powerpc: kill mfvtb() Aneesh Kumar K.V
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

Call jump_label_init early so that can use static keys for cpu and
mmu feature check. We should have finalzed all the cpu/mmu features when
we call setup_system and we also did feature fixup for ASM based code.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/lib/feature-fixups.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index defb2998b818..8b0b0b51e8aa 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -177,6 +177,12 @@ void apply_feature_fixups(void)
 			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 #endif
 	do_final_fixups();
+	/*
+	 * init jump label so that cpu and mmu feature check can be optimized
+	 * using jump label. We should have all the cpu/mmu features finalized
+	 * by now.
+	 */
+	jump_label_init();
 }
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 06/10] powerpc: kill mfvtb()
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 05/10] powerpc: Call jump_label_init early Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 07/10] powerpc: move the cpu_has_feature to a separate file Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Kevin Hao, Aneesh Kumar K . V

From: Kevin Hao <haokexin@gmail.com>

This function is only used by get_vtb(). They are almost the same
except the reading from the real register. Move the mfspr() to
get_vtb() and kill the function mfvtb(). With this, we can eliminate
the use of cpu_has_feature() in very core header file like reg.h.
This is a preparation for the use of jump label for cpu_has_feature().

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/reg.h  | 9 ---------
 arch/powerpc/include/asm/time.h | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d7e9ab5e4709..817c005205f0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1256,15 +1256,6 @@ static inline void msr_check_and_clear(unsigned long bits)
 		__msr_check_and_clear(bits);
 }
 
-static inline unsigned long mfvtb (void)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
-	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		return mfspr(SPRN_VTB);
-#endif
-	return 0;
-}
-
 #ifdef __powerpc64__
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define mftb()		({unsigned long rval;				\
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 09211640a0e0..cbbeaf0a6597 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -103,7 +103,7 @@ static inline u64 get_vtb(void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		return mfvtb();
+		return mfspr(SPRN_VTB);
 #endif
 	return 0;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 07/10] powerpc: move the cpu_has_feature to a separate file
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (5 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 06/10] powerpc: kill mfvtb() Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Kevin Hao, Aneesh Kumar K . V

From: Kevin Hao <haokexin@gmail.com>

We plan to use jump label for cpu_has_feature. In order to implement
this we need to include the linux/jump_label.h in asm/cputable.h.
But it seems that asm/cputable.h is so basic header file for ppc that
it is almost included by all the other header files. The including of
the linux/jump_label.h will introduces various recursive inclusion.
And it is very hard to fix that. So we choose to move the function
cpu_has_feature to a separate header file before using the jump label
for it. No functional change.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
 arch/powerpc/include/asm/cacheflush.h         |  1 +
 arch/powerpc/include/asm/cpufeatures.h        | 22 ++++++++++++++++++++++
 arch/powerpc/include/asm/cputable.h           | 13 -------------
 arch/powerpc/include/asm/cputime.h            |  1 +
 arch/powerpc/include/asm/dbell.h              |  1 +
 arch/powerpc/include/asm/dcr-native.h         |  1 +
 arch/powerpc/include/asm/mman.h               |  1 +
 arch/powerpc/include/asm/time.h               |  1 +
 arch/powerpc/include/asm/xor.h                |  1 +
 arch/powerpc/kernel/align.c                   |  1 +
 arch/powerpc/kernel/irq.c                     |  1 +
 arch/powerpc/kernel/process.c                 |  1 +
 arch/powerpc/kernel/setup-common.c            |  1 +
 arch/powerpc/kernel/setup_32.c                |  1 +
 arch/powerpc/kernel/smp.c                     |  1 +
 arch/powerpc/platforms/cell/pervasive.c       |  1 +
 arch/powerpc/xmon/ppc-dis.c                   |  1 +
 18 files changed, 38 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/include/asm/cpufeatures.h

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index ceba5472fe58..b396c6a8b3de 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -24,6 +24,7 @@
 #include <asm/book3s/64/pgtable.h>
 #include <asm/bug.h>
 #include <asm/processor.h>
+#include <asm/cpufeatures.h>
 
 /*
  * SLB
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 69fb16d7a811..e650819acc95 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -11,6 +11,7 @@
 
 #include <linux/mm.h>
 #include <asm/cputable.h>
+#include <asm/cpufeatures.h>
 
 /*
  * No cache flushing is required when address mappings are changed,
diff --git a/arch/powerpc/include/asm/cpufeatures.h b/arch/powerpc/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..bfa6cb8f5629
--- /dev/null
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_POWERPC_CPUFEATURES_H
+#define __ASM_POWERPC_CPUFEATURES_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cputable.h>
+
+static inline bool __cpu_has_feature(unsigned long feature)
+{
+	if (CPU_FTRS_ALWAYS & feature)
+		return true;
+
+	return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
+}
+
+static inline bool cpu_has_feature(unsigned long feature)
+{
+
+	return __cpu_has_feature(feature);
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index dfdf36bc2664..a49ea95849f8 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -576,19 +576,6 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-static inline bool __cpu_has_feature(unsigned long feature)
-{
-	if (CPU_FTRS_ALWAYS & feature)
-		return true;
-
-	return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
-}
-
-static inline bool cpu_has_feature(unsigned long feature)
-{
-	return __cpu_has_feature(feature);
-}
-
 #define HBP_NUM 1
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index e2452550bcb1..b91837865c0e 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -28,6 +28,7 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include <asm/div64.h>
 #include <asm/time.h>
 #include <asm/param.h>
+#include <asm/cpufeatures.h>
 
 typedef u64 __nocast cputime_t;
 typedef u64 __nocast cputime64_t;
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 5fa6b20eba10..2d9eae338f70 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -16,6 +16,7 @@
 #include <linux/threads.h>
 
 #include <asm/ppc-opcode.h>
+#include <asm/cpufeatures.h>
 
 #define PPC_DBELL_MSG_BRDCAST	(0x04000000)
 #define PPC_DBELL_TYPE(x)	(((x) & 0xf) << (63-36))
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 4efc11dacb98..0186ba05bfe1 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -24,6 +24,7 @@
 
 #include <linux/spinlock.h>
 #include <asm/cputable.h>
+#include <asm/cpufeatures.h>
 
 typedef struct {
 	unsigned int base;
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 2563c435a4b1..b0db2cc88900 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
 
 #include <asm/cputable.h>
 #include <linux/mm.h>
+#include <asm/cpufeatures.h>
 
 /*
  * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index cbbeaf0a6597..3620a96e2384 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeatures.h>
 
 /* time.c */
 extern unsigned long tb_ticks_per_jiffy;
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index 0abb97f3be10..15ba0d07937f 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -23,6 +23,7 @@
 #ifdef CONFIG_ALTIVEC
 
 #include <asm/cputable.h>
+#include <asm/cpufeatures.h>
 
 void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
 		   unsigned long *v2_in);
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index c7097f933114..6fb5b1a160aa 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -26,6 +26,7 @@
 #include <asm/emulated_ops.h>
 #include <asm/switch_to.h>
 #include <asm/disassemble.h>
+#include <asm/cpufeatures.h>
 
 struct aligninfo {
 	unsigned char len;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac910d9982df..3ac1d96c282a 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -75,6 +75,7 @@
 #endif
 #define CREATE_TRACE_POINTS
 #include <asm/trace.h>
+#include <asm/cpufeatures.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a8cca88e972f..75611b984faa 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -58,6 +58,7 @@
 #include <asm/code-patching.h>
 #include <asm/exec.h>
 #include <asm/livepatch.h>
+#include <asm/cpufeatures.h>
 
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 714b4ba7ab86..1cb3d62ae712 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -66,6 +66,7 @@
 #include <asm/hugetlb.h>
 #include <asm/livepatch.h>
 #include <asm/mmu_context.h>
+#include <asm/cpufeatures.h>
 
 #include "setup.h"
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 00f57754407e..f47989618bc7 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -37,6 +37,7 @@
 #include <asm/serial.h>
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
+#include <asm/cpufeatures.h>
 
 #define DBG(fmt...)
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5a1f015ea9f3..c6297a7cf85e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -55,6 +55,7 @@
 #include <asm/debug.h>
 #include <asm/kexec.h>
 #include <asm/asm-prototypes.h>
+#include <asm/cpufeatures.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index d17e98bc0c10..036215b8192c 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm/reg.h>
 #include <asm/cell-regs.h>
+#include <asm/cpufeatures.h>
 
 #include "pervasive.h"
 
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index acad77b4f7b6..88435c75139a 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -21,6 +21,7 @@ Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, US
 
 #include <linux/types.h>
 #include <asm/cputable.h>
+#include <asm/cpufeatures.h>
 #include "nonstdio.h"
 #include "ansidecl.h"
 #include "ppc.h"
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (6 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 07/10] powerpc: move the cpu_has_feature to a separate file Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-25  6:28   ` Nicholas Piggin
  2016-07-23  9:12 ` [PATCH for-4.8 V2 09/10] powerpc: use jump label for mmu_has_feature Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Kevin Hao, Aneesh Kumar K . V

From: Kevin Hao <haokexin@gmail.com>

The cpu features are fixed once the probe of cpu features are done.
And the function cpu_has_feature() does be used in some hot path.
The checking of the cpu features for each time of invoking of
cpu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label.

The generated assemble code of the following c program:
	if (cpu_has_feature(CPU_FTR_XXX))
		xxx()

Before:
	lis     r9,-16230
	lwz     r9,12324(r9)
	lwz     r9,12(r9)
	andi.   r10,r9,512
	beqlr-

After:
	nop	if CPU_FTR_XXX is enabled
	b xxx	if CPU_FTR_XXX is not enabled

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/cpufeatures.h | 21 +++++++++++++++++++++
 arch/powerpc/include/asm/cputable.h    |  8 ++++++++
 arch/powerpc/kernel/cputable.c         | 20 ++++++++++++++++++++
 arch/powerpc/lib/feature-fixups.c      |  1 +
 4 files changed, 50 insertions(+)

diff --git a/arch/powerpc/include/asm/cpufeatures.h b/arch/powerpc/include/asm/cpufeatures.h
index bfa6cb8f5629..4a4a0b898463 100644
--- a/arch/powerpc/include/asm/cpufeatures.h
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -13,10 +13,31 @@ static inline bool __cpu_has_feature(unsigned long feature)
 	return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
 }
 
+#ifdef CONFIG_JUMP_LABEL
+#include <linux/jump_label.h>
+
+extern struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES];
+
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+	int i;
+
+	if (CPU_FTRS_ALWAYS & feature)
+		return true;
+
+	if (!(CPU_FTRS_POSSIBLE & feature))
+		return false;
+
+	i = __builtin_ctzl(feature);
+	return static_branch_likely(&cpu_feat_keys[i]);
+}
+#else
 static inline bool cpu_has_feature(unsigned long feature)
 {
 
 	return __cpu_has_feature(feature);
 }
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a49ea95849f8..6c161e456759 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -122,6 +122,12 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
 
 extern const char *powerpc_base_platform;
 
+#ifdef CONFIG_JUMP_LABEL
+extern void cpu_feat_keys_init(void);
+#else
+static inline void cpu_feat_keys_init(void) { }
+#endif
+
 /* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
 enum {
 	TLB_INVAL_SCOPE_GLOBAL = 0,	/* invalidate all TLBs */
@@ -132,6 +138,8 @@ enum {
 
 /* CPU kernel features */
 
+#define MAX_CPU_FEATURES	(8 * sizeof(((struct cpu_spec *)0)->cpu_features))
+
 /* Retain the 32b definitions all use bottom half of word */
 #define CPU_FTR_COHERENT_ICACHE		ASM_CONST(0x00000001)
 #define CPU_FTR_L2CR			ASM_CONST(0x00000002)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index d81f826d1029..67ce4816998e 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -15,6 +15,7 @@
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/jump_label.h>
 
 #include <asm/oprofile_impl.h>
 #include <asm/cputable.h>
@@ -2224,3 +2225,22 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 
 	return NULL;
 }
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES] = {
+			[0 ... MAX_CPU_FEATURES - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(cpu_feat_keys);
+
+void __init cpu_feat_keys_init(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_CPU_FEATURES; i++) {
+		unsigned long f = 1ul << i;
+
+		if (!(cur_cpu_spec->cpu_features & f))
+			static_branch_disable(&cpu_feat_keys[i]);
+	}
+}
+#endif
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 8b0b0b51e8aa..ec698b9e6238 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -183,6 +183,7 @@ void apply_feature_fixups(void)
 	 * by now.
 	 */
 	jump_label_init();
+	cpu_feat_keys_init();
 }
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 09/10] powerpc: use jump label for mmu_has_feature
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (7 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-23  9:12 ` [PATCH for-4.8 V2 10/10] powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label init Aneesh Kumar K.V
  2016-07-25  5:22 ` [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Nicholas Piggin
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Kevin Hao, Aneesh Kumar K . V

From: Kevin Hao <haokexin@gmail.com>

The mmu features are fixed once the probe of mmu features are done.
And the function mmu_has_feature() does be used in some hot path.
The checking of the mmu features for each time of invoking of
mmu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label.

The generated assemble code of the following c program:
	if (mmu_has_feature(MMU_FTR_XXX))
		xxx()
Before:
	lis     r9,-16230
	lwz     r9,12324(r9)
	lwz     r9,24(r9)
	andi.   r10,r9,16
	beqlr+

After:
	nop	if MMU_FTR_XXX is enabled
	b xxx	if MMU_FTR_XXX is not enabled

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu.h    | 36 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/cputable.c    | 17 +++++++++++++++++
 arch/powerpc/lib/feature-fixups.c |  1 +
 3 files changed, 54 insertions(+)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 828b92faec91..3726161f6a8d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -139,6 +139,41 @@ static inline bool __mmu_has_feature(unsigned long feature)
 	return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
 }
 
+#ifdef CONFIG_JUMP_LABEL
+#include <linux/jump_label.h>
+
+#define MAX_MMU_FEATURES	(8 * sizeof(((struct cpu_spec *)0)->mmu_features))
+
+extern struct static_key_true mmu_feat_keys[MAX_MMU_FEATURES];
+
+extern void mmu_feat_keys_init(void);
+
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+	int i;
+
+	if (!(MMU_FTRS_POSSIBLE & feature))
+		return false;
+
+	i = __builtin_ctzl(feature);
+	return static_branch_likely(&mmu_feat_keys[i]);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+	int i;
+
+	i = __builtin_ctzl(feature);
+	cur_cpu_spec->mmu_features &= ~feature;
+	static_branch_disable(&mmu_feat_keys[i]);
+}
+#else
+
+static inline void mmu_feat_keys_init(void)
+{
+
+}
+
 static inline bool mmu_has_feature(unsigned long feature)
 {
 	return __mmu_has_feature(feature);
@@ -148,6 +183,7 @@ static inline void mmu_clear_feature(unsigned long feature)
 {
 	cur_cpu_spec->mmu_features &= ~feature;
 }
+#endif /* CONFIG_JUMP_LABEL */
 
 extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 67ce4816998e..fa1580788eda 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2243,4 +2243,21 @@ void __init cpu_feat_keys_init(void)
 			static_branch_disable(&cpu_feat_keys[i]);
 	}
 }
+
+struct static_key_true mmu_feat_keys[MAX_MMU_FEATURES] = {
+			[0 ... MAX_MMU_FEATURES - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(mmu_feat_keys);
+
+void __init mmu_feat_keys_init(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_MMU_FEATURES; i++) {
+		unsigned long f = 1ul << i;
+
+		if (!(cur_cpu_spec->mmu_features & f))
+			static_branch_disable(&mmu_feat_keys[i]);
+	}
+}
 #endif
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index ec698b9e6238..7c29906cf8e9 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -184,6 +184,7 @@ void apply_feature_fixups(void)
 	 */
 	jump_label_init();
 	cpu_feat_keys_init();
+	mmu_feat_keys_init();
 }
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH for-4.8 V2 10/10] powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label init
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (8 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 09/10] powerpc: use jump label for mmu_has_feature Aneesh Kumar K.V
@ 2016-07-23  9:12 ` Aneesh Kumar K.V
  2016-07-25  5:22 ` [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Nicholas Piggin
  10 siblings, 0 replies; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-23  9:12 UTC (permalink / raw)
  To: benh, paulus, mpe; +Cc: linuxppc-dev, Aneesh Kumar K.V

This enable us to catch the wrong usage of cpu_has_feature and
mmu_has_feature in the code. We need to use the feature bit based
check in show_regs because that is used in the reporting code.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig.debug             | 11 +++++++++++
 arch/powerpc/include/asm/cpufeatures.h |  6 ++++++
 arch/powerpc/include/asm/mmu.h         | 13 +++++++++++++
 arch/powerpc/kernel/process.c          |  2 +-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 8243ada23237..e4da4e4985fe 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -60,6 +60,17 @@ config CODE_PATCHING_SELFTEST
 	depends on DEBUG_KERNEL
 	default n
 
+config FEATURE_FIXUP_DEBUG
+	bool "Do extra check on feature fixup calls"
+	depends on DEBUG_KERNEL
+	default n
+	help
+	  This catch the wrong usage of cpu_has_feature and mmu_has_feature
+	  in the code.
+
+	  If you don't know what this means, say N
+
+
 config FTR_FIXUP_SELFTEST
 	bool "Run self-tests of the feature-fixup code"
 	depends on DEBUG_KERNEL
diff --git a/arch/powerpc/include/asm/cpufeatures.h b/arch/powerpc/include/asm/cpufeatures.h
index 4a4a0b898463..93e7e3e87af4 100644
--- a/arch/powerpc/include/asm/cpufeatures.h
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -22,6 +22,12 @@ static __always_inline bool cpu_has_feature(unsigned long feature)
 {
 	int i;
 
+#ifdef CONFIG_FEATURE_FIXUP_DEBUG
+	if (!static_key_initialized) {
+		WARN_ON(1);
+		return __cpu_has_feature(feature);
+	}
+#endif
 	if (CPU_FTRS_ALWAYS & feature)
 		return true;
 
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3726161f6a8d..5c1f3a4cb99f 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -152,6 +152,12 @@ static __always_inline bool mmu_has_feature(unsigned long feature)
 {
 	int i;
 
+#ifdef CONFIG_FEATURE_FIXUP_DEBUG
+	if (!static_key_initialized) {
+		WARN_ON(1);
+		return __mmu_has_feature(feature);
+	}
+#endif
 	if (!(MMU_FTRS_POSSIBLE & feature))
 		return false;
 
@@ -163,6 +169,13 @@ static inline void mmu_clear_feature(unsigned long feature)
 {
 	int i;
 
+#ifdef CONFIG_FEATURE_FIXUP_DEBUG
+	if (!static_key_initialized) {
+		WARN_ON(1);
+		cur_cpu_spec->mmu_features &= ~feature;
+		return;
+	}
+#endif
 	i = __builtin_ctzl(feature);
 	cur_cpu_spec->mmu_features &= ~feature;
 	static_branch_disable(&mmu_feat_keys[i]);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75611b984faa..5a08cff6621c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1315,7 +1315,7 @@ void show_regs(struct pt_regs * regs)
 	print_msr_bits(regs->msr);
 	printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
 	trap = TRAP(regs);
-	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+	if ((regs->trap != 0xc00) && __cpu_has_feature(CPU_FTR_CFAR))
 		printk("CFAR: "REG" ", regs->orig_gpr3);
 	if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature
  2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
                   ` (9 preceding siblings ...)
  2016-07-23  9:12 ` [PATCH for-4.8 V2 10/10] powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label init Aneesh Kumar K.V
@ 2016-07-25  5:22 ` Nicholas Piggin
  2016-07-25  6:25   ` Aneesh Kumar K.V
  10 siblings, 1 reply; 21+ messages in thread
From: Nicholas Piggin @ 2016-07-25  5:22 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, linuxppc-dev

On Sat, 23 Jul 2016 14:42:33 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> Changes from V1:
> * Update "powerpc/mm: Convert early cpu/mmu feature check to use the
> new helpers" based on resend code changes in this area.
> 
> We now do feature fixup early and hence we can reduce the usage of
>  __cpu/__mmu_has_feature.

Is there a particular reason for for-4.8?

I've only just started following this development so it might be
obvious, but if you could add some small justifications for why
a patch or series is done, it would be of great help to me.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature
  2016-07-23  9:12 ` [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature Aneesh Kumar K.V
@ 2016-07-25  5:26   ` Nicholas Piggin
  0 siblings, 0 replies; 21+ messages in thread
From: Nicholas Piggin @ 2016-07-25  5:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, linuxppc-dev

On Sat, 23 Jul 2016 14:42:34 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> In later patches, we will be switching cpu and mmu feature check to
> use static keys. This would require us to have a variant of feature
> check that can be used in early boot before jump label is initialized.
> This patch adds the same. We also add a variant for radix_enabled()
> check
> 
> We also update the return type to bool.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/mmu.h | 19 +++++++++++++++----
>  arch/powerpc/include/asm/cputable.h      | 15 ++++++++++-----
>  arch/powerpc/include/asm/mmu.h           | 13 +++++++++++--
>  arch/powerpc/xmon/ppc-dis.c              |  1 +
>  4 files changed, 37 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h
> b/arch/powerpc/include/asm/book3s/64/mmu.h index
> 6d8306d9aa7a..1bb0e536c76b 100644 ---
> a/arch/powerpc/include/asm/book3s/64/mmu.h +++
> b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -24,9 +24,20 @@ struct
> mmu_psize_def { extern struct mmu_psize_def
> mmu_psize_defs[MMU_PAGE_COUNT]; 
>  #ifdef CONFIG_PPC_RADIX_MMU
> -#define radix_enabled() mmu_has_feature(MMU_FTR_TYPE_RADIX)
> +static inline bool radix_enabled(void)
> +{
> +	return mmu_has_feature(MMU_FTR_TYPE_RADIX);
> +}
> +#define radix_enabled radix_enabled
> +
> +static inline bool __radix_enabled(void)
> +{
> +	return __mmu_has_feature(MMU_FTR_TYPE_RADIX);
> +}

I'm probably guilty of this too, but the prefix-more-underscores naming
convention for a "special" variant of a function sucks, especially for
names used beyond a single file.

Might _early or similar be an improvement?

Thanks,
Nick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init
  2016-07-23  9:12 ` [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init Aneesh Kumar K.V
@ 2016-07-25  6:23   ` Nicholas Piggin
  2016-07-25  8:33     ` Michael Ellerman
  2016-07-25  8:36   ` Michael Ellerman
  1 sibling, 1 reply; 21+ messages in thread
From: Nicholas Piggin @ 2016-07-25  6:23 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, linuxppc-dev

On Sat, 23 Jul 2016 14:42:36 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> We want to use the static key based feature check in set_pte_at. Since
> we call radix__map_kernel_page early in boot before jump label is
> initialized we can't call set_pte_at there. Add radix__set_pte for the
> same.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/mm/pgtable-radix.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/pgtable-radix.c
> b/arch/powerpc/mm/pgtable-radix.c index 003ff48a11b6..6d2eb76b508e
> 100644 --- a/arch/powerpc/mm/pgtable-radix.c
> +++ b/arch/powerpc/mm/pgtable-radix.c
> @@ -39,6 +39,27 @@ static __ref void *early_alloc_pgtable(unsigned
> long size) 
>  	return pt;
>  }
> +/*
> + * set_pte stores a linux PTE into the linux page table.
> + */
> +static void radix__set_pte(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep,
> +			   pte_t pte)
> +{
> +	/*
> +	 * When handling numa faults, we already have the pte marked
> +	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
> +	 * Hence we can use set_pte_at for them.
> +	 */
> +	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
> +
> +	/*
> +	 * Add the pte bit when tryint set a pte
> +	 */
> +	pte = __pte(pte_val(pte) | _PAGE_PTE);
> +
> +	/* Perform the setting of the PTE */
> +	radix__set_pte_at(mm, addr, ptep, pte, 0);
> +}
>  
>  int radix__map_kernel_page(unsigned long ea, unsigned long pa,
>  			  pgprot_t flags,
> @@ -102,7 +123,7 @@ int radix__map_kernel_page(unsigned long ea,
> unsigned long pa, }
>  
>  set_the_pte:
> -	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
> flags));
> +	radix__set_pte(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
> flags)); smp_wmb();

What we have in existing code is set_pte_at() function that adds
the _PAGE_PTE bit, then calls __set_pte_at(), which calls radix or hash
version of __set_pte_at().

Now we also have radix__set_pte(), which has the function of the
set_pte_at(), which is starting to confuse the naming convention.
The new function is a radix-only set_pte_at(), rather than the
radix implementation that gets called via set_pte().

set_pte_at_radix()? That kind of sucks too, though. It might be better
if the radix/hash variants were called __radix__set_pte_at(), and this
new function was called radix__set_pte_at().

Thanks,
Nick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature
  2016-07-25  5:22 ` [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Nicholas Piggin
@ 2016-07-25  6:25   ` Aneesh Kumar K.V
  2016-07-25  6:37     ` Nicholas Piggin
  0 siblings, 1 reply; 21+ messages in thread
From: Aneesh Kumar K.V @ 2016-07-25  6:25 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: benh, paulus, mpe, linuxppc-dev

Nicholas Piggin <npiggin@gmail.com> writes:

> On Sat, 23 Jul 2016 14:42:33 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>
>> Changes from V1:
>> * Update "powerpc/mm: Convert early cpu/mmu feature check to use the
>> new helpers" based on resend code changes in this area.
>> 
>> We now do feature fixup early and hence we can reduce the usage of
>>  __cpu/__mmu_has_feature.
>
> Is there a particular reason for for-4.8?
>
> I've only just started following this development so it might be
> obvious, but if you could add some small justifications for why
> a patch or series is done, it would be of great help to me.

The goal is to reduce the impact of radix series on existing MMU
function. With radix series, we do

if (radix_enabled())
        radix_function()
else
        hash_function()

We did try to reduce the impact in most code path like linux page table
accessors by moving linux pte bits around to match the radix/hardware
requirements. But we still have other code paths where we do the above
conditional.

Now for-4.8 is mainly because, I was trying to make sure 4.8 release
will have a good performing radix/hash implementation which distros can
base their kernel on. This series was posted to external list multiple
times and I didn't receive many objections to the series. Hence I was
thinking it to be a good idea to get it upstream by 4.8.
 
-aneesh

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature
  2016-07-23  9:12 ` [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature Aneesh Kumar K.V
@ 2016-07-25  6:28   ` Nicholas Piggin
  2016-07-25 11:30     ` Kevin Hao
  0 siblings, 1 reply; 21+ messages in thread
From: Nicholas Piggin @ 2016-07-25  6:28 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, Kevin Hao, linuxppc-dev

On Sat, 23 Jul 2016 14:42:41 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> From: Kevin Hao <haokexin@gmail.com>
> 
> The cpu features are fixed once the probe of cpu features are done.
> And the function cpu_has_feature() does be used in some hot path.
> The checking of the cpu features for each time of invoking of
> cpu_has_feature() seems suboptimal. This tries to reduce this
> overhead of this check by using jump label.
> 
> The generated assemble code of the following c program:
> 	if (cpu_has_feature(CPU_FTR_XXX))
> 		xxx()
> 
> Before:
> 	lis     r9,-16230
> 	lwz     r9,12324(r9)
> 	lwz     r9,12(r9)
> 	andi.   r10,r9,512
> 	beqlr-
> 
> After:
> 	nop	if CPU_FTR_XXX is enabled
> 	b xxx	if CPU_FTR_XXX is not enabled
> 
> Signed-off-by: Kevin Hao <haokexin@gmail.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/cpufeatures.h | 21 +++++++++++++++++++++
>  arch/powerpc/include/asm/cputable.h    |  8 ++++++++
>  arch/powerpc/kernel/cputable.c         | 20 ++++++++++++++++++++
>  arch/powerpc/lib/feature-fixups.c      |  1 +
>  4 files changed, 50 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/cpufeatures.h
> b/arch/powerpc/include/asm/cpufeatures.h index
> bfa6cb8f5629..4a4a0b898463 100644 ---
> a/arch/powerpc/include/asm/cpufeatures.h +++
> b/arch/powerpc/include/asm/cpufeatures.h @@ -13,10 +13,31 @@ static
> inline bool __cpu_has_feature(unsigned long feature)
> return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature); }
>  
> +#ifdef CONFIG_JUMP_LABEL
> +#include <linux/jump_label.h>
> +
> +extern struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES];
> +
> +static __always_inline bool cpu_has_feature(unsigned long feature)
> +{
> +	int i;
> +
> +	if (CPU_FTRS_ALWAYS & feature)
> +		return true;
> +
> +	if (!(CPU_FTRS_POSSIBLE & feature))
> +		return false;
> +
> +	i = __builtin_ctzl(feature);
> +	return static_branch_likely(&cpu_feat_keys[i]);
> +}

Is feature ever not-constant, or could it ever be, I wonder? We could
do a build time check to ensure it is always constant?

Or alternatively, make non-constant cases skip the first two tests?

Thanks,
Nick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature
  2016-07-25  6:25   ` Aneesh Kumar K.V
@ 2016-07-25  6:37     ` Nicholas Piggin
  0 siblings, 0 replies; 21+ messages in thread
From: Nicholas Piggin @ 2016-07-25  6:37 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: benh, paulus, mpe, linuxppc-dev

On Mon, 25 Jul 2016 11:55:50 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> Nicholas Piggin <npiggin@gmail.com> writes:
> 
> > On Sat, 23 Jul 2016 14:42:33 +0530
> > "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >  
> >> Changes from V1:
> >> * Update "powerpc/mm: Convert early cpu/mmu feature check to use
> >> the new helpers" based on resend code changes in this area.
> >> 
> >> We now do feature fixup early and hence we can reduce the usage of
> >>  __cpu/__mmu_has_feature.  
> >
> > Is there a particular reason for for-4.8?
> >
> > I've only just started following this development so it might be
> > obvious, but if you could add some small justifications for why
> > a patch or series is done, it would be of great help to me.  
> 
> The goal is to reduce the impact of radix series on existing MMU
> function. With radix series, we do
> 
> if (radix_enabled())
>         radix_function()
> else
>         hash_function()
> 
> We did try to reduce the impact in most code path like linux page
> table accessors by moving linux pte bits around to match the
> radix/hardware requirements. But we still have other code paths where
> we do the above conditional.
> 
> Now for-4.8 is mainly because, I was trying to make sure 4.8 release
> will have a good performing radix/hash implementation which distros
> can base their kernel on. This series was posted to external list
> multiple times and I didn't receive many objections to the series.
> Hence I was thinking it to be a good idea to get it upstream by 4.8.

Thanks, I was just curious. I don't have an objection.

It would be a bigger change, but it might be nice to do alternate
patching for some of these, so we could even avoid the branch for
the radix case in some of the critical functions. That's something
for later though.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init
  2016-07-25  6:23   ` Nicholas Piggin
@ 2016-07-25  8:33     ` Michael Ellerman
  0 siblings, 0 replies; 21+ messages in thread
From: Michael Ellerman @ 2016-07-25  8:33 UTC (permalink / raw)
  To: Nicholas Piggin, Aneesh Kumar K.V; +Cc: benh, paulus, linuxppc-dev

Nicholas Piggin <npiggin@gmail.com> writes:

> On Sat, 23 Jul 2016 14:42:36 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> @@ -102,7 +123,7 @@ int radix__map_kernel_page(unsigned long ea,
>> unsigned long pa, }
>>  
>>  set_the_pte:
>> -	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
>> flags));
>> +	radix__set_pte(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
>> flags)); smp_wmb();
>
> What we have in existing code is set_pte_at() function that adds
> the _PAGE_PTE bit, then calls __set_pte_at(), which calls radix or hash
> version of __set_pte_at().
>
> Now we also have radix__set_pte(), which has the function of the
> set_pte_at(), which is starting to confuse the naming convention.
> The new function is a radix-only set_pte_at(), rather than the
> radix implementation that gets called via set_pte().
>
> set_pte_at_radix()? That kind of sucks too, though. It might be better
> if the radix/hash variants were called __radix__set_pte_at(), and this
> new function was called radix__set_pte_at().

I think Aneesh originally used set_pte_at_r() or maybe rset_pte_at()?

It was my idea to use radix__ and hash__ as prefixes for all the
radix/hash functions.

That was 1) to make it clear that it's not part of the name as such, ie.
it's a prefix, and 2) because it's ugly as hell and hopefully that would
motivate us to consolidate as many of them as possible.

I balked at adding __radix__set_pte_at(), and just went with
radix__set_pte_at(). But it does complicate things now.

In fact I think we need to rethink this whole series, and not actually
do it this way at all, meaning this naming problem will go away.

cheers

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init
  2016-07-23  9:12 ` [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init Aneesh Kumar K.V
  2016-07-25  6:23   ` Nicholas Piggin
@ 2016-07-25  8:36   ` Michael Ellerman
  2016-07-25  8:56     ` Nicholas Piggin
  1 sibling, 1 reply; 21+ messages in thread
From: Michael Ellerman @ 2016-07-25  8:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V, benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V

"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> writes:

> We want to use the static key based feature check in set_pte_at. Since
> we call radix__map_kernel_page early in boot before jump label is
> initialized we can't call set_pte_at there. Add radix__set_pte for the
> same.

Although this is an OK solution to this problem, I think it highlights a
bigger problem, which is that we're still doing the feature patching too
late.

If we can move the feature patching prior to MMU init, then all (or more
of) these problems with pre vs post patching go away.

I'll see if I can come up with something tomorrow.

cheers

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init
  2016-07-25  8:36   ` Michael Ellerman
@ 2016-07-25  8:56     ` Nicholas Piggin
  0 siblings, 0 replies; 21+ messages in thread
From: Nicholas Piggin @ 2016-07-25  8:56 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Aneesh Kumar K.V, benh, paulus, linuxppc-dev

On Mon, 25 Jul 2016 18:36:09 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:

> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> writes:
> 
> > We want to use the static key based feature check in set_pte_at.
> > Since we call radix__map_kernel_page early in boot before jump
> > label is initialized we can't call set_pte_at there. Add
> > radix__set_pte for the same.  
> 
> Although this is an OK solution to this problem, I think it
> highlights a bigger problem, which is that we're still doing the
> feature patching too late.
> 
> If we can move the feature patching prior to MMU init, then all (or
> more of) these problems with pre vs post patching go away.
> 
> I'll see if I can come up with something tomorrow.

Agreed, that would be much nicer if you can make it work.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature
  2016-07-25  6:28   ` Nicholas Piggin
@ 2016-07-25 11:30     ` Kevin Hao
  0 siblings, 0 replies; 21+ messages in thread
From: Kevin Hao @ 2016-07-25 11:30 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Aneesh Kumar K.V, benh, paulus, mpe, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 2765 bytes --]

On Mon, Jul 25, 2016 at 04:28:49PM +1000, Nicholas Piggin wrote:
> On Sat, 23 Jul 2016 14:42:41 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
> 
> > From: Kevin Hao <haokexin@gmail.com>
> > 
> > The cpu features are fixed once the probe of cpu features are done.
> > And the function cpu_has_feature() does be used in some hot path.
> > The checking of the cpu features for each time of invoking of
> > cpu_has_feature() seems suboptimal. This tries to reduce this
> > overhead of this check by using jump label.
> > 
> > The generated assemble code of the following c program:
> > 	if (cpu_has_feature(CPU_FTR_XXX))
> > 		xxx()
> > 
> > Before:
> > 	lis     r9,-16230
> > 	lwz     r9,12324(r9)
> > 	lwz     r9,12(r9)
> > 	andi.   r10,r9,512
> > 	beqlr-
> > 
> > After:
> > 	nop	if CPU_FTR_XXX is enabled
> > 	b xxx	if CPU_FTR_XXX is not enabled
> > 
> > Signed-off-by: Kevin Hao <haokexin@gmail.com>
> > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> > ---
> >  arch/powerpc/include/asm/cpufeatures.h | 21 +++++++++++++++++++++
> >  arch/powerpc/include/asm/cputable.h    |  8 ++++++++
> >  arch/powerpc/kernel/cputable.c         | 20 ++++++++++++++++++++
> >  arch/powerpc/lib/feature-fixups.c      |  1 +
> >  4 files changed, 50 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/cpufeatures.h
> > b/arch/powerpc/include/asm/cpufeatures.h index
> > bfa6cb8f5629..4a4a0b898463 100644 ---
> > a/arch/powerpc/include/asm/cpufeatures.h +++
> > b/arch/powerpc/include/asm/cpufeatures.h @@ -13,10 +13,31 @@ static
> > inline bool __cpu_has_feature(unsigned long feature)
> > return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature); }
> >  
> > +#ifdef CONFIG_JUMP_LABEL
> > +#include <linux/jump_label.h>
> > +
> > +extern struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES];
> > +
> > +static __always_inline bool cpu_has_feature(unsigned long feature)
> > +{
> > +	int i;
> > +
> > +	if (CPU_FTRS_ALWAYS & feature)
> > +		return true;
> > +
> > +	if (!(CPU_FTRS_POSSIBLE & feature))
> > +		return false;
> > +
> > +	i = __builtin_ctzl(feature);
> > +	return static_branch_likely(&cpu_feat_keys[i]);
> > +}
> 
> Is feature ever not-constant, or could it ever be, I wonder? We could
> do a build time check to ensure it is always constant?

In the current code, all the using of this function are passing a constant
argument. But yes, due to the implementation of jump label, we should add
a check here to ensure that a constant is passed to this function. Something
likes this:

	if (!__builtin_constant_p(feature))
		return __cpu_has_feature(feature);

We need the same change for the mmu_has_feature().

Thanks,
Kevin

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-07-25 11:30 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-23  9:12 [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature Aneesh Kumar K.V
2016-07-25  5:26   ` Nicholas Piggin
2016-07-23  9:12 ` [PATCH for-4.8 V2 02/10] powerpc/mm: Convert early cpu/mmu feature check to use the new helpers Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init Aneesh Kumar K.V
2016-07-25  6:23   ` Nicholas Piggin
2016-07-25  8:33     ` Michael Ellerman
2016-07-25  8:36   ` Michael Ellerman
2016-07-25  8:56     ` Nicholas Piggin
2016-07-23  9:12 ` [PATCH for-4.8 V2 04/10] jump_label: make it possible for the archs to invoke jump_label_init() much earlier Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 05/10] powerpc: Call jump_label_init early Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 06/10] powerpc: kill mfvtb() Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 07/10] powerpc: move the cpu_has_feature to a separate file Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature Aneesh Kumar K.V
2016-07-25  6:28   ` Nicholas Piggin
2016-07-25 11:30     ` Kevin Hao
2016-07-23  9:12 ` [PATCH for-4.8 V2 09/10] powerpc: use jump label for mmu_has_feature Aneesh Kumar K.V
2016-07-23  9:12 ` [PATCH for-4.8 V2 10/10] powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label init Aneesh Kumar K.V
2016-07-25  5:22 ` [PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature Nicholas Piggin
2016-07-25  6:25   ` Aneesh Kumar K.V
2016-07-25  6:37     ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.