From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756439Ab3INAto (ORCPT ); Fri, 13 Sep 2013 20:49:44 -0400 Received: from e8.ny.us.ibm.com ([32.97.182.138]:58556 "EHLO e8.ny.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755770Ab3INAtZ (ORCPT ); Fri, 13 Sep 2013 20:49:25 -0400 From: Sukadev Bhattiprolu To: Cc: linuxppc-dev@ozlabs.org, Stephane Eranian , Michael Ellerman , Paul Mackerras , Anshuman Khandual Subject: [PATCH 5/8][v4] powerpc/perf: Export Power8 memory hierarchy info to user space. Date: Fri, 13 Sep 2013 17:49:12 -0700 Message-Id: <1379119755-21025-6-git-send-email-sukadev@linux.vnet.ibm.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1379119755-21025-1-git-send-email-sukadev@linux.vnet.ibm.com> References: <1379119755-21025-1-git-send-email-sukadev@linux.vnet.ibm.com> X-TM-AS-MML: No X-Content-Scanned: Fidelis XPS MAILER x-cbid: 13091400-0320-0000-0000-000001011538 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Power8, the LDST field in SIER identifies the memory hierarchy level (eg: L1, L2 etc), from which a data-cache miss for a marked instruction was satisfied. Use the 'perf_mem_data_src' object to export this hierarchy level to user space. Fortunately, the memory hierarchy levels in Power8 map fairly easily into the arch-neutral levels as described by the ldst_src_map[] table. Usage: perf record -d -e 'cpu/PM_MRK_GRP_CMPL/' perf report -n --mem-mode --sort=mem,sym,dso,symbol_daddr,dso_daddr" For samples involving load/store instructions, the memory hierarchy level is shown as "L1 hit", "Remote RAM hit" etc. # or perf record --data perf report -D Sample records contain a 'data_src' field which encodes the memory hierarchy level: Eg: data_src 0x442 indicates MEM_OP_LOAD, MEM_LVL_HIT, MEM_LVL_L2 (i.e load hit L2). Note that the PMU event PM_MRK_GRP_CMPL tracks all marked group completions events. While some of these are loads and stores, others like 'add' instructions may also be sampled. One alternative of sampling on PM_MRK_GRP_CMPL and throwing away non-loads and non-store samples could yield an inconsistent profile of the application. As the precise semantics of 'perf mem -t load' or 'perf mem -t store' (which require sampling only loads or only stores) cannot be implemented on Power, we don't implement 'perf mem' on Power for now. Thanks to input from Stephane Eranian, Michael Ellerman and Michael Neuling. Cc: Stephane Eranian Cc: Michael Ellerman Signed-off-by: Sukadev Bhattiprolu --- Changelog[v4]: Drop support for 'perf mem' for Power (use perf-record and perf-report directly) arch/powerpc/include/asm/perf_event_server.h | 2 + arch/powerpc/perf/core-book3s.c | 11 ++++++ arch/powerpc/perf/power8-pmu.c | 53 ++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index cc5f45b..2252798 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -37,6 +37,8 @@ struct power_pmu { void (*config_bhrb)(u64 pmu_bhrb_filter); void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); int (*limited_pmc_event)(u64 event_id); + void (*get_mem_data_src)(union perf_mem_data_src *dsrc, + struct pt_regs *regs); u32 flags; const struct attribute_group **attr_groups; int n_generic; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a3985ae..e61fd05 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1693,6 +1693,13 @@ ssize_t power_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%02llx\n", pmu_attr->id); } +static inline void power_get_mem_data_src(union perf_mem_data_src *dsrc, + struct pt_regs *regs) +{ + if (ppmu->get_mem_data_src) + ppmu->get_mem_data_src(dsrc, regs); +} + struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, @@ -1774,6 +1781,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, data.br_stack = &cpuhw->bhrb_stack; } + if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && + ppmu->get_mem_data_src) + ppmu->get_mem_data_src(&data.data_src, regs); + if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); } diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 5c61e59..4ecf903 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -537,6 +537,58 @@ static struct attribute_group power8_pmu_events_group = { .attrs = power8_events_attr, }; +#define POWER8_SIER_TYPE_SHIFT 15 +#define POWER8_SIER_TYPE_MASK (0x7LL << POWER8_SIER_TYPE_SHIFT) + +#define POWER8_SIER_LDST_SHIFT 1 +#define POWER8_SIER_LDST_MASK (0x7LL << POWER8_SIER_LDST_SHIFT) + +#define P(a, b) PERF_MEM_S(a, b) +#define PLH(a, b) (P(OP, LOAD) | P(LVL, HIT) | P(a, b)) +#define PSM(a, b) (P(OP, STORE) | P(LVL, MISS) | P(a, b)) + +/* + * Power8 interpretations: + * REM_CCE1: 1-hop indicates L2/L3 cache of a different core on same chip + * REM_CCE2: 2-hop indicates different chip or different node. + */ +static u64 ldst_src_map[] = { + /* 000 */ P(LVL, NA), + + /* 001 */ PLH(LVL, L1), + /* 010 */ PLH(LVL, L2), + /* 011 */ PLH(LVL, L3), + /* 100 */ PLH(LVL, LOC_RAM), + /* 101 */ PLH(LVL, REM_CCE1), + /* 110 */ PLH(LVL, REM_CCE2), + + /* 111 */ PSM(LVL, L1), +}; + +static inline bool is_load_store_inst(u64 sier) +{ + u64 val; + val = (sier & POWER8_SIER_TYPE_MASK) >> POWER8_SIER_TYPE_SHIFT; + + /* 1 = load, 2 = store */ + return val == 1 || val == 2; +} + +static void power8_get_mem_data_src(union perf_mem_data_src *dsrc, + struct pt_regs *regs) +{ + u64 idx; + u64 sier; + + sier = mfspr(SPRN_SIER); + + if (is_load_store_inst(sier)) { + idx = (sier & POWER8_SIER_LDST_MASK) >> POWER8_SIER_LDST_SHIFT; + + dsrc->val |= ldst_src_map[idx]; + } +} + PMU_FORMAT_ATTR(event, "config:0-49"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); @@ -640,6 +692,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, + .get_mem_data_src = power8_get_mem_data_src, .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, -- 1.7.9.5 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e8.ny.us.ibm.com (e8.ny.us.ibm.com [32.97.182.138]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client CN "e8.ny.us.ibm.com", Issuer "GeoTrust SSL CA" (not verified)) by ozlabs.org (Postfix) with ESMTPS id DC2F42C0166 for ; Sat, 14 Sep 2013 10:49:27 +1000 (EST) Received: from /spool/local by e8.ny.us.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Fri, 13 Sep 2013 20:49:25 -0400 Received: from b01cxnp23034.gho.pok.ibm.com (b01cxnp23034.gho.pok.ibm.com [9.57.198.29]) by d01dlp01.pok.ibm.com (Postfix) with ESMTP id 5120D38C803B for ; Fri, 13 Sep 2013 20:49:22 -0400 (EDT) Received: from d01av02.pok.ibm.com (d01av02.pok.ibm.com [9.56.224.216]) by b01cxnp23034.gho.pok.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id r8E0nMBP65404986 for ; Sat, 14 Sep 2013 00:49:22 GMT Received: from d01av02.pok.ibm.com (loopback [127.0.0.1]) by d01av02.pok.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id r8E0nL19001566 for ; Fri, 13 Sep 2013 21:49:22 -0300 From: Sukadev Bhattiprolu To: Subject: [PATCH 5/8][v4] powerpc/perf: Export Power8 memory hierarchy info to user space. Date: Fri, 13 Sep 2013 17:49:12 -0700 Message-Id: <1379119755-21025-6-git-send-email-sukadev@linux.vnet.ibm.com> In-Reply-To: <1379119755-21025-1-git-send-email-sukadev@linux.vnet.ibm.com> References: <1379119755-21025-1-git-send-email-sukadev@linux.vnet.ibm.com> Cc: linuxppc-dev@ozlabs.org, Paul Mackerras , Michael Ellerman , Stephane Eranian , Anshuman Khandual List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Power8, the LDST field in SIER identifies the memory hierarchy level (eg: L1, L2 etc), from which a data-cache miss for a marked instruction was satisfied. Use the 'perf_mem_data_src' object to export this hierarchy level to user space. Fortunately, the memory hierarchy levels in Power8 map fairly easily into the arch-neutral levels as described by the ldst_src_map[] table. Usage: perf record -d -e 'cpu/PM_MRK_GRP_CMPL/' perf report -n --mem-mode --sort=mem,sym,dso,symbol_daddr,dso_daddr" For samples involving load/store instructions, the memory hierarchy level is shown as "L1 hit", "Remote RAM hit" etc. # or perf record --data perf report -D Sample records contain a 'data_src' field which encodes the memory hierarchy level: Eg: data_src 0x442 indicates MEM_OP_LOAD, MEM_LVL_HIT, MEM_LVL_L2 (i.e load hit L2). Note that the PMU event PM_MRK_GRP_CMPL tracks all marked group completions events. While some of these are loads and stores, others like 'add' instructions may also be sampled. One alternative of sampling on PM_MRK_GRP_CMPL and throwing away non-loads and non-store samples could yield an inconsistent profile of the application. As the precise semantics of 'perf mem -t load' or 'perf mem -t store' (which require sampling only loads or only stores) cannot be implemented on Power, we don't implement 'perf mem' on Power for now. Thanks to input from Stephane Eranian, Michael Ellerman and Michael Neuling. Cc: Stephane Eranian Cc: Michael Ellerman Signed-off-by: Sukadev Bhattiprolu --- Changelog[v4]: Drop support for 'perf mem' for Power (use perf-record and perf-report directly) arch/powerpc/include/asm/perf_event_server.h | 2 + arch/powerpc/perf/core-book3s.c | 11 ++++++ arch/powerpc/perf/power8-pmu.c | 53 ++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index cc5f45b..2252798 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -37,6 +37,8 @@ struct power_pmu { void (*config_bhrb)(u64 pmu_bhrb_filter); void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); int (*limited_pmc_event)(u64 event_id); + void (*get_mem_data_src)(union perf_mem_data_src *dsrc, + struct pt_regs *regs); u32 flags; const struct attribute_group **attr_groups; int n_generic; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a3985ae..e61fd05 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1693,6 +1693,13 @@ ssize_t power_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%02llx\n", pmu_attr->id); } +static inline void power_get_mem_data_src(union perf_mem_data_src *dsrc, + struct pt_regs *regs) +{ + if (ppmu->get_mem_data_src) + ppmu->get_mem_data_src(dsrc, regs); +} + struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, @@ -1774,6 +1781,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, data.br_stack = &cpuhw->bhrb_stack; } + if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && + ppmu->get_mem_data_src) + ppmu->get_mem_data_src(&data.data_src, regs); + if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); } diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 5c61e59..4ecf903 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -537,6 +537,58 @@ static struct attribute_group power8_pmu_events_group = { .attrs = power8_events_attr, }; +#define POWER8_SIER_TYPE_SHIFT 15 +#define POWER8_SIER_TYPE_MASK (0x7LL << POWER8_SIER_TYPE_SHIFT) + +#define POWER8_SIER_LDST_SHIFT 1 +#define POWER8_SIER_LDST_MASK (0x7LL << POWER8_SIER_LDST_SHIFT) + +#define P(a, b) PERF_MEM_S(a, b) +#define PLH(a, b) (P(OP, LOAD) | P(LVL, HIT) | P(a, b)) +#define PSM(a, b) (P(OP, STORE) | P(LVL, MISS) | P(a, b)) + +/* + * Power8 interpretations: + * REM_CCE1: 1-hop indicates L2/L3 cache of a different core on same chip + * REM_CCE2: 2-hop indicates different chip or different node. + */ +static u64 ldst_src_map[] = { + /* 000 */ P(LVL, NA), + + /* 001 */ PLH(LVL, L1), + /* 010 */ PLH(LVL, L2), + /* 011 */ PLH(LVL, L3), + /* 100 */ PLH(LVL, LOC_RAM), + /* 101 */ PLH(LVL, REM_CCE1), + /* 110 */ PLH(LVL, REM_CCE2), + + /* 111 */ PSM(LVL, L1), +}; + +static inline bool is_load_store_inst(u64 sier) +{ + u64 val; + val = (sier & POWER8_SIER_TYPE_MASK) >> POWER8_SIER_TYPE_SHIFT; + + /* 1 = load, 2 = store */ + return val == 1 || val == 2; +} + +static void power8_get_mem_data_src(union perf_mem_data_src *dsrc, + struct pt_regs *regs) +{ + u64 idx; + u64 sier; + + sier = mfspr(SPRN_SIER); + + if (is_load_store_inst(sier)) { + idx = (sier & POWER8_SIER_LDST_MASK) >> POWER8_SIER_LDST_SHIFT; + + dsrc->val |= ldst_src_map[idx]; + } +} + PMU_FORMAT_ATTR(event, "config:0-49"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); @@ -640,6 +692,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, + .get_mem_data_src = power8_get_mem_data_src, .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, -- 1.7.9.5