From mboxrd@z Thu Jan 1 00:00:00 1970 Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752972AbeADLwL (ORCPT + 1 other); Thu, 4 Jan 2018 06:52:11 -0500 Received: from mx1.redhat.com ([209.132.183.28]:49526 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752835AbeADLwJ (ORCPT ); Thu, 4 Jan 2018 06:52:09 -0500 From: Prarit Bhargava To: linux-kernel@vger.kernel.org Cc: Prarit Bhargava , Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , x86@kernel.org, Jonathan Corbet , Andi Kleen , Vitaly Kuznetsov , linux-doc@vger.kernel.org, linux-edac@vger.kernel.org, oprofile-list@lists.sf.net Subject: [PATCH] x86: Add topology_hw_smt_threads() and remove smp_num_siblings Date: Thu, 4 Jan 2018 06:52:04 -0500 Message-Id: <20180104115204.17067-1-prarit@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Thu, 04 Jan 2018 11:52:09 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: Commit bbb65d2d365e ("x86: use cpuid vector 0xb when available for detecting cpu topology") changed the value of smp_num_siblings from the active number of threads in a core to the maximum number threads in a core. e.g.) On Intel Haswell and newer systems smp_num_siblings is two even if SMT is disabled. topology_max_smt_threads() already returns the active number of threads. Introduce topology_hw_smt_threads() which returns the maximum number of threads. These are used to fix and replace references to smp_num_siblings. Signed-off-by: Prarit Bhargava Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Jonathan Corbet Cc: Andi Kleen Cc: Vitaly Kuznetsov Cc: linux-doc@vger.kernel.org Cc: linux-edac@vger.kernel.org Cc: oprofile-list@lists.sf.net --- Documentation/x86/topology.txt | 13 +++++++++---- arch/x86/include/asm/perf_event_p4.h | 4 ++-- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/topology.h | 10 +++++----- arch/x86/kernel/cpu/amd.c | 6 ++---- arch/x86/kernel/cpu/common.c | 18 +++++++++++------- arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 ++- arch/x86/kernel/cpu/topology.c | 4 ++-- arch/x86/kernel/itmt.c | 2 +- arch/x86/kernel/process.c | 3 ++- arch/x86/kernel/smpboot.c | 22 ++++++++-------------- arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/oprofile/op_model_p4.c | 4 ++-- 13 files changed, 47 insertions(+), 46 deletions(-) diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt index f3e9d7e9ed6c..9fa07a4460df 100644 --- a/Documentation/x86/topology.txt +++ b/Documentation/x86/topology.txt @@ -83,13 +83,18 @@ The topology of a system is described in the units of: Core-related topology information in the kernel: - - smp_num_siblings: + - topology_hw_smt_threads: - The number of threads in a core. The number of threads in a package can be - calculated by: + The maximum number of threads that a core's hardware supports. For + example, on Intel Haswell and newer systems this is 2 even if SMT is + disabled. - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings + - topology_max_smt_threads: + The number of threads/core available at runtime. The number of threads in + a package can be calculated by: + + threads_per_package = cpuinfo_x86.x86_max_cores * topology_max_smt_threads * Threads: diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 94de1a05aeba..11afdadce9c2 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config) static inline int p4_ht_active(void) { #ifdef CONFIG_SMP - return smp_num_siblings > 1; + return topology_max_smt_threads() > 1; #endif return 0; } @@ -189,7 +189,7 @@ static inline int p4_ht_active(void) static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (topology_max_smt_threads() == 2) return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 461f53d27708..cf28a3932917 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -18,7 +18,6 @@ #include #include -extern int smp_num_siblings; extern unsigned int num_processors; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); @@ -170,7 +169,6 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } -#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c1d2a9892352..b5ff1c784eef 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -116,16 +116,16 @@ extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) extern int __max_smt_threads; - -static inline int topology_max_smt_threads(void) -{ - return __max_smt_threads; -} +#define topology_max_smt_threads() (__max_smt_threads) +extern int __hw_smt_threads; +#define topology_hw_smt_threads() (__hw_smt_threads) int topology_update_package_map(unsigned int apicid, unsigned int cpu); extern int topology_phys_to_logical_pkg(unsigned int pkg); #else #define topology_max_packages() (1) +#define topology_max_smt_threads() (1) +#define topology_hw_smt_threads() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bcb75dc97d44..f2582505e2f9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -332,16 +332,14 @@ static void amd_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); node_id = ecx & 0xff; - smp_num_siblings = ((ebx >> 8) & 0xff) + 1; + __hw_smt_threads = ((ebx >> 8) & 0xff) + 1; if (c->x86 == 0x15) c->cu_id = ebx & 0xff; if (c->x86 >= 0x17) { c->cpu_core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; + c->x86_max_cores /= topology_hw_smt_threads(); } /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c47de4ebf63a..1070e2832ca3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -654,22 +654,22 @@ void detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - smp_num_siblings = (ebx & 0xff0000) >> 16; + __hw_smt_threads = (ebx & 0xff0000) >> 16; - if (smp_num_siblings == 1) { + if (topology_hw_smt_threads() == 1) { pr_info_once("CPU0: Hyper-Threading is disabled\n"); goto out; } - if (smp_num_siblings <= 1) + if (topology_hw_smt_threads() <= 1) goto out; - index_msb = get_count_order(smp_num_siblings); + index_msb = get_count_order(topology_hw_smt_threads()); c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); - smp_num_siblings = smp_num_siblings / c->x86_max_cores; + __hw_smt_threads = topology_hw_smt_threads() / c->x86_max_cores; - index_msb = get_count_order(smp_num_siblings); + index_msb = get_count_order(topology_hw_smt_threads()); core_bits = get_count_order(c->x86_max_cores); @@ -677,7 +677,7 @@ void detect_ht(struct cpuinfo_x86 *c) ((1 << core_bits) - 1); out: - if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { + if (!printed && (c->x86_max_cores * topology_hw_smt_threads()) > 1) { pr_info("CPU: Physical Processor ID: %d\n", c->phys_proc_id); pr_info("CPU: Processor Core ID: %d\n", @@ -1228,6 +1228,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); + /* Must be called before select_idle_routine */ + if (c != &boot_cpu_data) + set_cpu_sibling_map(raw_smp_processor_id()); + select_idle_routine(c); #ifdef CONFIG_NUMA diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 231ad23b24a9..3785929d11af 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -420,7 +420,8 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); + cores_per_node = (c->x86_max_cores * topology_hw_smt_threads()) / + amd_get_nodes_per_socket(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b099024d339c..8cf2927c73ec 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -56,7 +56,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) /* * Populate HT related information from sub-leaf level 0. */ - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_level_siblings = __hw_smt_threads = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; @@ -85,7 +85,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) */ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); - c->x86_max_cores = (core_level_siblings / smp_num_siblings); + c->x86_max_cores = (core_level_siblings / topology_hw_smt_threads()); if (!printed) { pr_info("CPU: Physical Processor ID: %d\n", diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index f73f475d0573..4b887d3f79ed 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -206,7 +206,7 @@ void sched_set_itmt_core_prio(int prio, int core_cpu) * of the priority chain and only used when * all other high priority cpus are out of capacity. */ - smt_prio = prio * smp_num_siblings / i; + smt_prio = prio * topology_hw_smt_threads() / i; per_cpu(sched_core_priority, cpu) = smt_prio; i++; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index aed9d94bd46f..67575faee8dd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -476,7 +476,8 @@ static __cpuidle void mwait_idle(void) void select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) + if (boot_option_idle_override == IDLE_POLL && + topology_hw_smt_threads() > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); #endif if (x86_idle || boot_option_idle_override == IDLE_POLL) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed556d50d7ed..56bdcdba006f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -79,10 +79,6 @@ #include #include -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; @@ -105,8 +101,11 @@ unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; -/* Maximum number of SMT threads on any online core */ +/* Maximum number of SMT threads on any online core determined by topology */ int __read_mostly __max_smt_threads = 1; +/* Maximum number of SMT threads read from hardware */ +int __read_mostly __hw_smt_threads = 1; +EXPORT_SYMBOL(__hw_smt_threads); /* Flag to indicate if a complete sched domain rebuild is required */ bool x86_topology_update; @@ -177,16 +176,11 @@ static void smp_callin(void) apic_ap_setup(); /* - * Save our processor parameters. Note: this information - * is needed for clock calibration. - */ - smp_store_cpu_info(cpuid); - - /* - * The topology information must be up to date before + * Save CPU parameters and topology. This information is needed for + * clock calibration, and must be update to date before calling * calibrate_delay() and notify_cpu_starting(). */ - set_cpu_sibling_map(raw_smp_processor_id()); + smp_store_cpu_info(cpuid); /* * Get our bogomips. @@ -463,7 +457,7 @@ static bool x86_has_numa_in_package; void set_cpu_sibling_map(int cpu) { - bool has_smt = smp_num_siblings > 1; + bool has_smt = topology_hw_smt_threads() > 1; bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 174c59774cc9..8029db4c4d3b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -566,7 +566,7 @@ static int __init p4_init(char **cpu_type) model = &op_p4_spec; return 1; #else - switch (smp_num_siblings) { + switch (topology_hw_smt_threads()) { case 1: *cpu_type = "i386/p4"; model = &op_p4_spec; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ad1d91f475ab..7bebd042df15 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -43,7 +43,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT; static inline void setup_num_counters(void) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) { + if (topology_hw_smt_threads() == 2) { num_counters = NUM_COUNTERS_HT2; num_controls = NUM_CONTROLS_HT2; } @@ -53,7 +53,7 @@ static inline void setup_num_counters(void) static inline int addr_increment(void) { #ifdef CONFIG_SMP - return smp_num_siblings == 2 ? 2 : 1; + return topology_hw_smt_threads() == 2 ? 2 : 1; #else return 1; #endif -- 2.15.0.rc0.39.g2f0e14e64