From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751787AbdF3Imw (ORCPT ); Fri, 30 Jun 2017 04:42:52 -0400 Received: from mga06.intel.com ([134.134.136.31]:61044 "EHLO mga06.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751584AbdF3Imv (ORCPT ); Fri, 30 Jun 2017 04:42:51 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.40,285,1496127600"; d="scan'208";a="873389480" From: Adrian Hunter To: Arnaldo Carvalho de Melo , Andi Kleen Cc: linux-kernel@vger.kernel.org Subject: [PATCH V3 25/37] perf script: Add synthesized Intel PT power and ptwrite events Date: Fri, 30 Jun 2017 11:36:42 +0300 Message-Id: <1498811802-2301-1-git-send-email-adrian.hunter@intel.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1495786658-18063-26-git-send-email-adrian.hunter@intel.com> References: <1495786658-18063-26-git-send-email-adrian.hunter@intel.com> Organization: Intel Finland Oy, Registered Address: PL 281, 00181 Helsinki, Business Identity Code: 0357606 - 4, Domiciled in Helsinki Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Add definitions for synthesized Intel PT events for power and ptwrite. Signed-off-by: Adrian Hunter --- Changes in V3: Avoid using __packed tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++++++++++++++- tools/perf/util/event.h | 118 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e87b480bbdd0..b458a0cc3544 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1203,10 +1203,122 @@ static void print_sample_bpf_output(struct perf_sample *sample) (char *)(sample->raw_data)); } -static void print_sample_synth(struct perf_sample *sample __maybe_unused, +static void print_sample_spacing(int len, int spacing) +{ + if (len > 0 && len < spacing) + printf("%*s", spacing - len, ""); +} + +static void print_sample_pt_spacing(int len) +{ + print_sample_spacing(len, 34); +} + +static void print_sample_synth_ptwrite(struct perf_sample *sample) +{ + struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" IP: %u payload: %#" PRIx64 " ", + data->ip, le64_to_cpu(data->payload)); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_mwait(struct perf_sample *sample) +{ + struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" hints: %#x extensions: %#x ", + data->hints, data->extensions); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_pwre(struct perf_sample *sample) +{ + struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" hw: %u cstate: %u sub-cstate: %u ", + data->hw, data->cstate, data->subcstate); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_exstop(struct perf_sample *sample) +{ + struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" IP: %u ", data->ip); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_pwrx(struct perf_sample *sample) +{ + struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ", + data->deepest_cstate, data->last_cstate, + data->wake_reason); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_cbr(struct perf_sample *sample) +{ + struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample); + unsigned int percent, freq; + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + freq = (le32_to_cpu(data->freq) + 500) / 1000; + len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq); + if (data->max_nonturbo) { + percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10; + len += printf("(%3u%%) ", percent); + } + print_sample_pt_spacing(len); +} + +static void print_sample_synth(struct perf_sample *sample, struct perf_evsel *evsel) { switch (evsel->attr.config) { + case PERF_SYNTH_INTEL_PTWRITE: + print_sample_synth_ptwrite(sample); + break; + case PERF_SYNTH_INTEL_MWAIT: + print_sample_synth_mwait(sample); + break; + case PERF_SYNTH_INTEL_PWRE: + print_sample_synth_pwre(sample); + break; + case PERF_SYNTH_INTEL_EXSTOP: + print_sample_synth_exstop(sample); + break; + case PERF_SYNTH_INTEL_PWRX: + print_sample_synth_pwrx(sample); + break; + case PERF_SYNTH_INTEL_CBR: + print_sample_synth_cbr(sample); + break; default: break; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 855733c2adcf..9967c87af7a6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -255,6 +255,124 @@ enum auxtrace_error_type { /* Attribute type for custom synthesized events */ #define PERF_TYPE_SYNTH (INT_MAX + 1U) +/* Attribute config for custom synthesized events */ +enum perf_synth_id { + PERF_SYNTH_INTEL_PTWRITE, + PERF_SYNTH_INTEL_MWAIT, + PERF_SYNTH_INTEL_PWRE, + PERF_SYNTH_INTEL_EXSTOP, + PERF_SYNTH_INTEL_PWRX, + PERF_SYNTH_INTEL_CBR, +}; + +/* + * Raw data formats for synthesized events. Note that 4 bytes of padding are + * present to match the 'size' member of PERF_SAMPLE_RAW data which is always + * 8-byte aligned. That means we must dereference raw_data with an offset of 4. + * Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the + * structure sizes are 4 bytes bigger than the raw_size, refer + * perf_synth__raw_size(). + */ + +struct perf_synth_intel_ptwrite { + u32 padding; + union { + struct { + u32 ip : 1, + reserved : 31; + }; + u32 flags; + }; + u64 payload; +}; + +struct perf_synth_intel_mwait { + u32 padding; + u32 reserved; + union { + struct { + u64 hints : 8, + reserved1 : 24, + extensions : 2, + reserved2 : 30; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_pwre { + u32 padding; + u32 reserved; + union { + struct { + u64 reserved1 : 7, + hw : 1, + subcstate : 4, + cstate : 4, + reserved2 : 48; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_exstop { + u32 padding; + union { + struct { + u32 ip : 1, + reserved : 31; + }; + u32 flags; + }; +}; + +struct perf_synth_intel_pwrx { + u32 padding; + u32 reserved; + union { + struct { + u64 deepest_cstate : 4, + last_cstate : 4, + wake_reason : 4, + reserved1 : 52; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_cbr { + u32 padding; + union { + struct { + u32 cbr : 8, + reserved1 : 8, + max_nonturbo : 8, + reserved2 : 8; + }; + u32 flags; + }; + u32 freq; + u32 reserved3; +}; + +/* + * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get + * 8-byte alignment. + */ +static inline void *perf_sample__synth_ptr(struct perf_sample *sample) +{ + return sample->raw_data - 4; +} + +static inline void *perf_synth__raw_data(void *p) +{ + return p + 4; +} + +#define perf_synth__raw_size(d) (sizeof(d) - 4) + +#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4) + /* * The kernel collects the number of events it couldn't send in a stretch and * when possible sends this number in a PERF_RECORD_LOST event. The number of -- 1.9.1