From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1030224Ab3BGR6I (ORCPT ); Thu, 7 Feb 2013 12:58:08 -0500 Received: from mail-qe0-f44.google.com ([209.85.128.44]:38367 "EHLO mail-qe0-f44.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1759241Ab3BGR6G (ORCPT ); Thu, 7 Feb 2013 12:58:06 -0500 MIME-Version: 1.0 In-Reply-To: <20130207175703.GA3398@jshin-Toonie> References: <1360171589-6381-1-git-send-email-jacob.shin@amd.com> <1360171589-6381-7-git-send-email-jacob.shin@amd.com> <20130207175703.GA3398@jshin-Toonie> Date: Thu, 7 Feb 2013 18:58:04 +0100 Message-ID: Subject: Re: [PATCH 6/6] perf, amd: Enable northbridge performance counters on AMD family 15h From: Stephane Eranian To: Jacob Shin Cc: Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , x86 , Peter Zijlstra , Paul Mackerras , Arnaldo Carvalho de Melo , Jiri Olsa , LKML Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Feb 7, 2013 at 6:57 PM, Jacob Shin wrote: > On Wed, Feb 06, 2013 at 11:26:29AM -0600, Jacob Shin wrote: >> On AMD family 15h processors, there are 4 new performance counters >> (in addition to 6 core performance counters) that can be used for >> counting northbridge events (i.e. DRAM accesses). Their bit fields are >> almost identical to the core performance counters. However, unlike the >> core performance counters, these MSRs are shared between multiple >> cores (that share the same northbridge). We will reuse the same code >> path as existing family 10h northbridge event constraints handler >> logic to enforce this sharing. >> >> Signed-off-by: Jacob Shin > > Hi Ingo, could you please apply this one to tip as well? I recieved > tip-bot emails for all other patches in this series except for this > last one 6/6. > > Or was that intentional? If so, what other changes are required/ > recommended? > I am testing this patch right now. Should be done by tomorrow. > Thanks! > > -Jacob > >> --- >> arch/x86/include/asm/cpufeature.h | 2 + >> arch/x86/include/asm/perf_event.h | 9 ++ >> arch/x86/include/uapi/asm/msr-index.h | 2 + >> arch/x86/kernel/cpu/perf_event_amd.c | 171 +++++++++++++++++++++++++++++---- >> 4 files changed, 164 insertions(+), 20 deletions(-) >> >> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h >> index 2d9075e..93fe929 100644 >> --- a/arch/x86/include/asm/cpufeature.h >> +++ b/arch/x86/include/asm/cpufeature.h >> @@ -167,6 +167,7 @@ >> #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ >> #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ >> #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ >> +#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ >> >> /* >> * Auxiliary flags: Linux defined - For features scattered in various >> @@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32]; >> #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) >> #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) >> #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) >> +#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) >> #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) >> #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) >> #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h >> index 2234eaaec..57cb634 100644 >> --- a/arch/x86/include/asm/perf_event.h >> +++ b/arch/x86/include/asm/perf_event.h >> @@ -29,9 +29,14 @@ >> #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) >> #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL >> >> +#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) >> #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) >> #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) >> >> +#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 >> +#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ >> + (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) >> + >> #define AMD64_EVENTSEL_EVENT \ >> (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) >> #define INTEL_ARCH_EVENT_MASK \ >> @@ -46,8 +51,12 @@ >> #define AMD64_RAW_EVENT_MASK \ >> (X86_RAW_EVENT_MASK | \ >> AMD64_EVENTSEL_EVENT) >> +#define AMD64_RAW_EVENT_MASK_NB \ >> + (AMD64_EVENTSEL_EVENT | \ >> + ARCH_PERFMON_EVENTSEL_UMASK) >> #define AMD64_NUM_COUNTERS 4 >> #define AMD64_NUM_COUNTERS_CORE 6 >> +#define AMD64_NUM_COUNTERS_NB 4 >> >> #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c >> #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) >> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h >> index 1031604..27c05d2 100644 >> --- a/arch/x86/include/uapi/asm/msr-index.h >> +++ b/arch/x86/include/uapi/asm/msr-index.h >> @@ -195,6 +195,8 @@ >> /* Fam 15h MSRs */ >> #define MSR_F15H_PERF_CTL 0xc0010200 >> #define MSR_F15H_PERF_CTR 0xc0010201 >> +#define MSR_F15H_NB_PERF_CTL 0xc0010240 >> +#define MSR_F15H_NB_PERF_CTR 0xc0010241 >> >> /* Fam 10h MSRs */ >> #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 >> diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c >> index 05462f0..dfdab42 100644 >> --- a/arch/x86/kernel/cpu/perf_event_amd.c >> +++ b/arch/x86/kernel/cpu/perf_event_amd.c >> @@ -132,11 +132,14 @@ static u64 amd_pmu_event_map(int hw_event) >> return amd_perfmon_event_map[hw_event]; >> } >> >> +static struct event_constraint *amd_nb_event_constraint; >> + >> /* >> * Previously calculated offsets >> */ >> static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; >> static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; >> +static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; >> >> /* >> * Legacy CPUs: >> @@ -144,10 +147,14 @@ static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; >> * >> * CPUs with core performance counter extensions: >> * 6 counters starting at 0xc0010200 each offset by 2 >> + * >> + * CPUs with north bridge performance counter extensions: >> + * 4 additional counters starting at 0xc0010240 each offset by 2 >> + * (indexed right above either one of the above core counters) >> */ >> static inline int amd_pmu_addr_offset(int index, bool eventsel) >> { >> - int offset; >> + int offset, first, base; >> >> if (!index) >> return index; >> @@ -160,7 +167,23 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) >> if (offset) >> return offset; >> >> - if (!cpu_has_perfctr_core) >> + if (amd_nb_event_constraint && >> + test_bit(index, amd_nb_event_constraint->idxmsk)) { >> + /* >> + * calculate the offset of NB counters with respect to >> + * base eventsel or perfctr >> + */ >> + >> + first = find_first_bit(amd_nb_event_constraint->idxmsk, >> + X86_PMC_IDX_MAX); >> + >> + if (eventsel) >> + base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; >> + else >> + base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; >> + >> + offset = base + ((index - first) << 1); >> + } else if (!cpu_has_perfctr_core) >> offset = index; >> else >> offset = index << 1; >> @@ -175,24 +198,36 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) >> >> static inline int amd_pmu_rdpmc_index(int index) >> { >> - return index; >> -} >> + int ret, first; >> >> -static int amd_pmu_hw_config(struct perf_event *event) >> -{ >> - int ret; >> + if (!index) >> + return index; >> >> - /* pass precise event sampling to ibs: */ >> - if (event->attr.precise_ip && get_ibs_caps()) >> - return -ENOENT; >> + ret = rdpmc_indexes[index]; >> >> - ret = x86_pmu_hw_config(event); >> if (ret) >> return ret; >> >> - if (has_branch_stack(event)) >> - return -EOPNOTSUPP; >> + if (amd_nb_event_constraint && >> + test_bit(index, amd_nb_event_constraint->idxmsk)) { >> + /* >> + * according to the mnual, ECX value of the NB counters is >> + * the index of the NB counter (0, 1, 2 or 3) plus 6 >> + */ >> + >> + first = find_first_bit(amd_nb_event_constraint->idxmsk, >> + X86_PMC_IDX_MAX); >> + ret = index - first + 6; >> + } else >> + ret = index; >> + >> + rdpmc_indexes[index] = ret; >> >> + return ret; >> +} >> + >> +static int amd_core_hw_config(struct perf_event *event) >> +{ >> if (event->attr.exclude_host && event->attr.exclude_guest) >> /* >> * When HO == GO == 1 the hardware treats that as GO == HO == 0 >> @@ -206,10 +241,33 @@ static int amd_pmu_hw_config(struct perf_event *event) >> else if (event->attr.exclude_guest) >> event->hw.config |= AMD64_EVENTSEL_HOSTONLY; >> >> - if (event->attr.type != PERF_TYPE_RAW) >> - return 0; >> + return 0; >> +} >> >> - event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; >> +/* >> + * NB counters do not support the following event select bits: >> + * Host/Guest only >> + * Counter mask >> + * Invert counter mask >> + * Edge detect >> + * OS/User mode >> + */ >> +static int amd_nb_hw_config(struct perf_event *event) >> +{ >> + /* for NB, we only allow system wide counting mode */ >> + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) >> + return -EINVAL; >> + >> + if (event->attr.exclude_user || event->attr.exclude_kernel || >> + event->attr.exclude_host || event->attr.exclude_guest) >> + return -EINVAL; >> + >> + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | >> + ARCH_PERFMON_EVENTSEL_OS); >> + >> + if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | >> + ARCH_PERFMON_EVENTSEL_INT)) >> + return -EINVAL; >> >> return 0; >> } >> @@ -227,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) >> return (hwc->config & 0xe0) == 0xe0; >> } >> >> +static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) >> +{ >> + return amd_nb_event_constraint && amd_is_nb_event(hwc); >> +} >> + >> static inline int amd_has_nb(struct cpu_hw_events *cpuc) >> { >> struct amd_nb *nb = cpuc->amd_nb; >> @@ -234,6 +297,30 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) >> return nb && nb->nb_id != -1; >> } >> >> +static int amd_pmu_hw_config(struct perf_event *event) >> +{ >> + int ret; >> + >> + /* pass precise event sampling to ibs: */ >> + if (event->attr.precise_ip && get_ibs_caps()) >> + return -ENOENT; >> + >> + if (has_branch_stack(event)) >> + return -EOPNOTSUPP; >> + >> + ret = x86_pmu_hw_config(event); >> + if (ret) >> + return ret; >> + >> + if (event->attr.type == PERF_TYPE_RAW) >> + event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; >> + >> + if (amd_is_perfctr_nb_event(&event->hw)) >> + return amd_nb_hw_config(event); >> + >> + return amd_core_hw_config(event); >> +} >> + >> static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, >> struct perf_event *event) >> { >> @@ -254,6 +341,19 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, >> } >> } >> >> +static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) >> +{ >> + int core_id = cpu_data(smp_processor_id()).cpu_core_id; >> + >> + /* deliver interrupts only to this core */ >> + if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { >> + hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; >> + hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; >> + hwc->config |= (u64)(core_id) << >> + AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; >> + } >> +} >> + >> /* >> * AMD64 NorthBridge events need special treatment because >> * counter access needs to be synchronized across all cores >> @@ -299,6 +399,12 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev >> struct perf_event *old; >> int idx, new = -1; >> >> + if (!c) >> + c = &unconstrained; >> + >> + if (cpuc->is_fake) >> + return c; >> + >> /* >> * detect if already present, if so reuse >> * >> @@ -335,6 +441,9 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev >> if (new == -1) >> return &emptyconstraint; >> >> + if (amd_is_perfctr_nb_event(hwc)) >> + amd_nb_interrupt_hw_config(hwc); >> + >> return &nb->event_constraints[new]; >> } >> >> @@ -434,7 +543,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) >> if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) >> return &unconstrained; >> >> - return __amd_get_nb_event_constraints(cpuc, event, &unconstrained); >> + return __amd_get_nb_event_constraints(cpuc, event, >> + amd_nb_event_constraint); >> } >> >> static void amd_put_event_constraints(struct cpu_hw_events *cpuc, >> @@ -533,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, >> static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); >> static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); >> >> +static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); >> +static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); >> + >> static struct event_constraint * >> amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) >> { >> @@ -598,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev >> return &amd_f15_PMC20; >> } >> case AMD_EVENT_NB: >> - /* not yet implemented */ >> - return &emptyconstraint; >> + return __amd_get_nb_event_constraints(cpuc, event, >> + amd_nb_event_constraint); >> default: >> return &emptyconstraint; >> } >> @@ -647,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = { >> >> static int setup_event_constraints(void) >> { >> - if (boot_cpu_data.x86 >= 0x15) >> + if (boot_cpu_data.x86 == 0x15) >> x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; >> return 0; >> } >> @@ -677,6 +790,23 @@ static int setup_perfctr_core(void) >> return 0; >> } >> >> +static int setup_perfctr_nb(void) >> +{ >> + if (!cpu_has_perfctr_nb) >> + return -ENODEV; >> + >> + x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; >> + >> + if (cpu_has_perfctr_core) >> + amd_nb_event_constraint = &amd_NBPMC96; >> + else >> + amd_nb_event_constraint = &amd_NBPMC74; >> + >> + printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); >> + >> + return 0; >> +} >> + >> __init int amd_pmu_init(void) >> { >> /* Performance-monitoring supported from K7 and later: */ >> @@ -687,6 +817,7 @@ __init int amd_pmu_init(void) >> >> setup_event_constraints(); >> setup_perfctr_core(); >> + setup_perfctr_nb(); >> >> /* Events are common for all AMDs */ >> memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, >> -- >> 1.7.9.5 >> >