From mboxrd@z Thu Jan 1 00:00:00 1970 Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752914AbeADLus (ORCPT + 1 other); Thu, 4 Jan 2018 06:50:48 -0500 Received: from mx1.redhat.com ([209.132.183.28]:43774 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752783AbeADLur (ORCPT ); Thu, 4 Jan 2018 06:50:47 -0500 Subject: Re: [PATCH] x86: Add topology_hw_smt_threads() and remove smp_num_siblings To: linux-kernel@vger.kernel.org References: <20180104112620.16843-1-prarit@redhat.com> From: Prarit Bhargava Message-ID: <8187f3d3-979b-fe90-c12f-ada69574c8e7@redhat.com> Date: Thu, 4 Jan 2018 06:50:46 -0500 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.7.0 MIME-Version: 1.0 In-Reply-To: <20180104112620.16843-1-prarit@redhat.com> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Thu, 04 Jan 2018 11:50:46 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: Oops. Forgot to cc everyone. Please ignore. P. On 01/04/2018 06:26 AM, Prarit Bhargava wrote: > Commit bbb65d2d365e ("x86: use cpuid vector 0xb when available for > detecting cpu topology") changed the value of smp_num_siblings from the > active number of threads in a core to the maximum number threads in a > core. e.g.) On Intel Haswell and newer systems smp_num_siblings is > two even if SMT is disabled. > > topology_max_smt_threads() already returns the active number of threads. > Introduce topology_hw_smt_threads() which returns the maximum number of > threads. These are used to fix and replace references to smp_num_siblings. > > Signed-off-by: Prarit Bhargava > --- > Documentation/x86/topology.txt | 13 +++++++++---- > arch/x86/include/asm/perf_event_p4.h | 4 ++-- > arch/x86/include/asm/smp.h | 2 -- > arch/x86/include/asm/topology.h | 10 +++++----- > arch/x86/kernel/cpu/amd.c | 6 ++---- > arch/x86/kernel/cpu/common.c | 18 +++++++++++------- > arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 ++- > arch/x86/kernel/cpu/topology.c | 4 ++-- > arch/x86/kernel/itmt.c | 2 +- > arch/x86/kernel/process.c | 3 ++- > arch/x86/kernel/smpboot.c | 22 ++++++++-------------- > arch/x86/oprofile/nmi_int.c | 2 +- > arch/x86/oprofile/op_model_p4.c | 4 ++-- > 13 files changed, 47 insertions(+), 46 deletions(-) > > diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt > index f3e9d7e9ed6c..9fa07a4460df 100644 > --- a/Documentation/x86/topology.txt > +++ b/Documentation/x86/topology.txt > @@ -83,13 +83,18 @@ The topology of a system is described in the units of: > > Core-related topology information in the kernel: > > - - smp_num_siblings: > + - topology_hw_smt_threads: > > - The number of threads in a core. The number of threads in a package can be > - calculated by: > + The maximum number of threads that a core's hardware supports. For > + example, on Intel Haswell and newer systems this is 2 even if SMT is > + disabled. > > - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings > + - topology_max_smt_threads: > > + The number of threads/core available at runtime. The number of threads in > + a package can be calculated by: > + > + threads_per_package = cpuinfo_x86.x86_max_cores * topology_max_smt_threads > > * Threads: > > diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h > index 94de1a05aeba..11afdadce9c2 100644 > --- a/arch/x86/include/asm/perf_event_p4.h > +++ b/arch/x86/include/asm/perf_event_p4.h > @@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config) > static inline int p4_ht_active(void) > { > #ifdef CONFIG_SMP > - return smp_num_siblings > 1; > + return topology_max_smt_threads() > 1; > #endif > return 0; > } > @@ -189,7 +189,7 @@ static inline int p4_ht_active(void) > static inline int p4_ht_thread(int cpu) > { > #ifdef CONFIG_SMP > - if (smp_num_siblings == 2) > + if (topology_max_smt_threads() == 2) > return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); > #endif > return 0; > diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h > index 461f53d27708..cf28a3932917 100644 > --- a/arch/x86/include/asm/smp.h > +++ b/arch/x86/include/asm/smp.h > @@ -18,7 +18,6 @@ > #include > #include > > -extern int smp_num_siblings; > extern unsigned int num_processors; > > DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); > @@ -170,7 +169,6 @@ static inline int wbinvd_on_all_cpus(void) > wbinvd(); > return 0; > } > -#define smp_num_siblings 1 > #endif /* CONFIG_SMP */ > > extern unsigned disabled_cpus; > diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h > index c1d2a9892352..b5ff1c784eef 100644 > --- a/arch/x86/include/asm/topology.h > +++ b/arch/x86/include/asm/topology.h > @@ -116,16 +116,16 @@ extern unsigned int __max_logical_packages; > #define topology_max_packages() (__max_logical_packages) > > extern int __max_smt_threads; > - > -static inline int topology_max_smt_threads(void) > -{ > - return __max_smt_threads; > -} > +#define topology_max_smt_threads() (__max_smt_threads) > +extern int __hw_smt_threads; > +#define topology_hw_smt_threads() (__hw_smt_threads) > > int topology_update_package_map(unsigned int apicid, unsigned int cpu); > extern int topology_phys_to_logical_pkg(unsigned int pkg); > #else > #define topology_max_packages() (1) > +#define topology_max_smt_threads() (1) > +#define topology_hw_smt_threads() (1) > static inline int > topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } > static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } > diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c > index bcb75dc97d44..f2582505e2f9 100644 > --- a/arch/x86/kernel/cpu/amd.c > +++ b/arch/x86/kernel/cpu/amd.c > @@ -332,16 +332,14 @@ static void amd_get_topology(struct cpuinfo_x86 *c) > cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); > > node_id = ecx & 0xff; > - smp_num_siblings = ((ebx >> 8) & 0xff) + 1; > + __hw_smt_threads = ((ebx >> 8) & 0xff) + 1; > > if (c->x86 == 0x15) > c->cu_id = ebx & 0xff; > > if (c->x86 >= 0x17) { > c->cpu_core_id = ebx & 0xff; > - > - if (smp_num_siblings > 1) > - c->x86_max_cores /= smp_num_siblings; > + c->x86_max_cores /= topology_hw_smt_threads(); > } > > /* > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c > index c47de4ebf63a..1070e2832ca3 100644 > --- a/arch/x86/kernel/cpu/common.c > +++ b/arch/x86/kernel/cpu/common.c > @@ -654,22 +654,22 @@ void detect_ht(struct cpuinfo_x86 *c) > > cpuid(1, &eax, &ebx, &ecx, &edx); > > - smp_num_siblings = (ebx & 0xff0000) >> 16; > + __hw_smt_threads = (ebx & 0xff0000) >> 16; > > - if (smp_num_siblings == 1) { > + if (topology_hw_smt_threads() == 1) { > pr_info_once("CPU0: Hyper-Threading is disabled\n"); > goto out; > } > > - if (smp_num_siblings <= 1) > + if (topology_hw_smt_threads() <= 1) > goto out; > > - index_msb = get_count_order(smp_num_siblings); > + index_msb = get_count_order(topology_hw_smt_threads()); > c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); > > - smp_num_siblings = smp_num_siblings / c->x86_max_cores; > + __hw_smt_threads = topology_hw_smt_threads() / c->x86_max_cores; > > - index_msb = get_count_order(smp_num_siblings); > + index_msb = get_count_order(topology_hw_smt_threads()); > > core_bits = get_count_order(c->x86_max_cores); > > @@ -677,7 +677,7 @@ void detect_ht(struct cpuinfo_x86 *c) > ((1 << core_bits) - 1); > > out: > - if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { > + if (!printed && (c->x86_max_cores * topology_hw_smt_threads()) > 1) { > pr_info("CPU: Physical Processor ID: %d\n", > c->phys_proc_id); > pr_info("CPU: Processor Core ID: %d\n", > @@ -1228,6 +1228,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) > /* Init Machine Check Exception if available. */ > mcheck_cpu_init(c); > > + /* Must be called before select_idle_routine */ > + if (c != &boot_cpu_data) > + set_cpu_sibling_map(raw_smp_processor_id()); > + > select_idle_routine(c); > > #ifdef CONFIG_NUMA > diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c > index 231ad23b24a9..3785929d11af 100644 > --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c > +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c > @@ -420,7 +420,8 @@ static u32 get_nbc_for_node(int node_id) > struct cpuinfo_x86 *c = &boot_cpu_data; > u32 cores_per_node; > > - cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); > + cores_per_node = (c->x86_max_cores * topology_hw_smt_threads()) / > + amd_get_nodes_per_socket(); > > return cores_per_node * node_id; > } > diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c > index b099024d339c..8cf2927c73ec 100644 > --- a/arch/x86/kernel/cpu/topology.c > +++ b/arch/x86/kernel/cpu/topology.c > @@ -56,7 +56,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) > /* > * Populate HT related information from sub-leaf level 0. > */ > - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); > + core_level_siblings = __hw_smt_threads = LEVEL_MAX_SIBLINGS(ebx); > core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); > > sub_index = 1; > @@ -85,7 +85,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) > */ > c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); > > - c->x86_max_cores = (core_level_siblings / smp_num_siblings); > + c->x86_max_cores = (core_level_siblings / topology_hw_smt_threads()); > > if (!printed) { > pr_info("CPU: Physical Processor ID: %d\n", > diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c > index f73f475d0573..4b887d3f79ed 100644 > --- a/arch/x86/kernel/itmt.c > +++ b/arch/x86/kernel/itmt.c > @@ -206,7 +206,7 @@ void sched_set_itmt_core_prio(int prio, int core_cpu) > * of the priority chain and only used when > * all other high priority cpus are out of capacity. > */ > - smt_prio = prio * smp_num_siblings / i; > + smt_prio = prio * topology_hw_smt_threads() / i; > per_cpu(sched_core_priority, cpu) = smt_prio; > i++; > } > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c > index aed9d94bd46f..67575faee8dd 100644 > --- a/arch/x86/kernel/process.c > +++ b/arch/x86/kernel/process.c > @@ -476,7 +476,8 @@ static __cpuidle void mwait_idle(void) > void select_idle_routine(const struct cpuinfo_x86 *c) > { > #ifdef CONFIG_SMP > - if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) > + if (boot_option_idle_override == IDLE_POLL && > + topology_hw_smt_threads() > 1) > pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); > #endif > if (x86_idle || boot_option_idle_override == IDLE_POLL) > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index ed556d50d7ed..56bdcdba006f 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -79,10 +79,6 @@ > #include > #include > > -/* Number of siblings per CPU package */ > -int smp_num_siblings = 1; > -EXPORT_SYMBOL(smp_num_siblings); > - > /* Last level cache ID of each logical CPU */ > DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; > > @@ -105,8 +101,11 @@ unsigned int __max_logical_packages __read_mostly; > EXPORT_SYMBOL(__max_logical_packages); > static unsigned int logical_packages __read_mostly; > > -/* Maximum number of SMT threads on any online core */ > +/* Maximum number of SMT threads on any online core determined by topology */ > int __read_mostly __max_smt_threads = 1; > +/* Maximum number of SMT threads read from hardware */ > +int __read_mostly __hw_smt_threads = 1; > +EXPORT_SYMBOL(__hw_smt_threads); > > /* Flag to indicate if a complete sched domain rebuild is required */ > bool x86_topology_update; > @@ -177,16 +176,11 @@ static void smp_callin(void) > apic_ap_setup(); > > /* > - * Save our processor parameters. Note: this information > - * is needed for clock calibration. > - */ > - smp_store_cpu_info(cpuid); > - > - /* > - * The topology information must be up to date before > + * Save CPU parameters and topology. This information is needed for > + * clock calibration, and must be update to date before calling > * calibrate_delay() and notify_cpu_starting(). > */ > - set_cpu_sibling_map(raw_smp_processor_id()); > + smp_store_cpu_info(cpuid); > > /* > * Get our bogomips. > @@ -463,7 +457,7 @@ static bool x86_has_numa_in_package; > > void set_cpu_sibling_map(int cpu) > { > - bool has_smt = smp_num_siblings > 1; > + bool has_smt = topology_hw_smt_threads() > 1; > bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; > struct cpuinfo_x86 *c = &cpu_data(cpu); > struct cpuinfo_x86 *o; > diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c > index 174c59774cc9..8029db4c4d3b 100644 > --- a/arch/x86/oprofile/nmi_int.c > +++ b/arch/x86/oprofile/nmi_int.c > @@ -566,7 +566,7 @@ static int __init p4_init(char **cpu_type) > model = &op_p4_spec; > return 1; > #else > - switch (smp_num_siblings) { > + switch (topology_hw_smt_threads()) { > case 1: > *cpu_type = "i386/p4"; > model = &op_p4_spec; > diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c > index ad1d91f475ab..7bebd042df15 100644 > --- a/arch/x86/oprofile/op_model_p4.c > +++ b/arch/x86/oprofile/op_model_p4.c > @@ -43,7 +43,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT; > static inline void setup_num_counters(void) > { > #ifdef CONFIG_SMP > - if (smp_num_siblings == 2) { > + if (topology_hw_smt_threads() == 2) { > num_counters = NUM_COUNTERS_HT2; > num_controls = NUM_CONTROLS_HT2; > } > @@ -53,7 +53,7 @@ static inline void setup_num_counters(void) > static inline int addr_increment(void) > { > #ifdef CONFIG_SMP > - return smp_num_siblings == 2 ? 2 : 1; > + return topology_hw_smt_threads() == 2 ? 2 : 1; > #else > return 1; > #endif >