From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753339AbcDUPav (ORCPT ); Thu, 21 Apr 2016 11:30:51 -0400 Received: from mga01.intel.com ([192.55.52.88]:39239 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753288AbcDUPat (ORCPT ); Thu, 21 Apr 2016 11:30:49 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,513,1455004800"; d="scan'208";a="689973521" From: Alexander Shishkin To: Peter Zijlstra Cc: Ingo Molnar , linux-kernel@vger.kernel.org, vince@deater.net, eranian@google.com, Arnaldo Carvalho de Melo , Mathieu Poirier , Alexander Shishkin Subject: [PATCH v1 5/5] perf/x86/intel/pt: Add support for address range filtering in PT Date: Thu, 21 Apr 2016 18:17:03 +0300 Message-Id: <1461251823-12416-6-git-send-email-alexander.shishkin@linux.intel.com> X-Mailer: git-send-email 2.8.0.rc3 In-Reply-To: <1461251823-12416-1-git-send-email-alexander.shishkin@linux.intel.com> References: <1461251823-12416-1-git-send-email-alexander.shishkin@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Newer versions of Intel PT support address ranges, which can be used to define IP address range-based filters or TraceSTOP regions. Number of ranges in enumerated via cpuid. This patch implements pmu callbacks and related low-level code to allow filter validation, configuration and programming into the hardware. Signed-off-by: Alexander Shishkin --- arch/x86/events/intel/pt.c | 183 ++++++++++++++++++++++++++++++++++++++++++++- arch/x86/events/intel/pt.h | 28 +++++++ 2 files changed, 210 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 891447dd61..e577afb74d 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -253,6 +253,73 @@ static bool pt_event_valid(struct perf_event *event) * These all are cpu affine and operate on a local PT */ +/* Address ranges and their corresponding msr configuration registers */ +static const struct pt_address_range { + unsigned long msr_a; + unsigned long msr_b; + unsigned int reg_off; +} pt_address_ranges[] = { + { + .msr_a = MSR_IA32_RTIT_ADDR0_A, + .msr_b = MSR_IA32_RTIT_ADDR0_B, + .reg_off = RTIT_CTL_ADDR0_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR1_A, + .msr_b = MSR_IA32_RTIT_ADDR1_B, + .reg_off = RTIT_CTL_ADDR1_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR2_A, + .msr_b = MSR_IA32_RTIT_ADDR2_B, + .reg_off = RTIT_CTL_ADDR2_OFFSET, + }, + { + .msr_a = MSR_IA32_RTIT_ADDR3_A, + .msr_b = MSR_IA32_RTIT_ADDR3_B, + .reg_off = RTIT_CTL_ADDR3_OFFSET, + } +}; + +static u64 pt_config_filters(struct perf_event *event) +{ + struct pt_filters *filters = event->hw.addr_filters; + struct pt *pt = this_cpu_ptr(&pt_ctx); + unsigned int range = 0; + u64 rtit_ctl = 0; + + if (!filters) + return 0; + + for (range = 0; range < filters->nr_filters; range++) { + struct pt_filter *filter = &filters->filter[range]; + + /* + * Note, if the range has zero start/end addresses due + * to its dynamic object not being loaded yet, we just + * go ahead and program zeroed range, which will simply + * produce no data. Note^2: if executable code at 0x0 + * is a concern, we can set up an "invalid" configuration + * such as msr_b < msr_a. + */ + + /* avoid redundant msr writes */ + if (pt->filters.filter[range].msr_a != filter->msr_a) { + wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a); + pt->filters.filter[range].msr_a = filter->msr_a; + } + + if (pt->filters.filter[range].msr_b != filter->msr_b) { + wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b); + pt->filters.filter[range].msr_b = filter->msr_b; + } + + rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + } + + return rtit_ctl; +} + static void pt_config(struct perf_event *event) { u64 reg; @@ -262,7 +329,8 @@ static void pt_config(struct perf_event *event) wrmsrl(MSR_IA32_RTIT_STATUS, 0); } - reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; + reg = pt_config_filters(event); + reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; if (!event->attr.exclude_kernel) reg |= RTIT_CTL_OS; @@ -907,6 +975,105 @@ static void pt_buffer_free_aux(void *data) kfree(buf); } +static int pt_addr_filters_init(struct perf_event *event) +{ + struct pt_filters *filters; + int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); + + if (!pt_cap_get(PT_CAP_num_address_ranges)) + return 0; + + filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node); + if (!filters) + return -ENOMEM; + + if (event->parent) + memcpy(filters, event->parent->hw.addr_filters, + sizeof(*filters)); + + event->hw.addr_filters = filters; + event->hw.addr_filters_offs = filters->offs; + + return 0; +} + +static void pt_addr_filters_fini(struct perf_event *event) +{ + kfree(event->hw.addr_filters); + event->hw.addr_filters = NULL; + event->hw.addr_filters_offs = NULL; +} + +static int pt_event_addr_filters_validate(struct list_head *filters) +{ + struct perf_addr_filter *filter; + int range = 0; + + list_for_each_entry(filter, filters, entry) { + /* PT doesn't support single address triggers */ + if (!filter->range) + return -EOPNOTSUPP; + + if (filter->kernel && !kernel_ip(filter->offset)) + return -EINVAL; + + if (++range > pt_cap_get(PT_CAP_num_address_ranges)) + return -EOPNOTSUPP; + } + + return 0; +} + +static void pt_event_start(struct perf_event *event, int mode); +static void pt_event_stop(struct perf_event *event, int mode); + +static void pt_event_addr_filters_setup(struct perf_event *event, + unsigned long *offs, int flags) +{ + struct perf_addr_filters_head *head = perf_event_addr_filters(event); + struct pt_filters *filters = event->hw.addr_filters; + unsigned int reload = flags & PERF_EF_RELOAD; + struct perf_addr_filter *filter; + unsigned long msr_a, msr_b; + int range = 0; + + if (!filters || + WARN_ON_ONCE(pt_event_addr_filters_validate(&head->list))) + return; + + if (reload) + pt_event_stop(event, PERF_EF_UPDATE); + + list_for_each_entry(filter, &head->list, entry) { + /* kernel-space filters are already configured */ + if (filter->kernel) { + msr_a = filter->offset; + msr_b = filter->offset + filter->size; + } else if (!offs) { + /* clear the range */ + msr_a = msr_b = 0; + } else if (offs[range] == 0) { + /* keep the existing range */ + goto next; + } else { + /* apply the offset */ + msr_a = filter->offset + offs[range]; + msr_b = filter->size + msr_a; + } + + filters->filter[range].msr_a = msr_a; + filters->filter[range].msr_b = msr_b; +next: + filters->filter[range].config = filter->filter ? 1 : 2; + range++; + } + + filters->nr_filters = range; + + if (reload) + pt_event_start(event, PERF_EF_RELOAD); +} + /** * intel_pt_interrupt() - PT PMI handler */ @@ -1075,6 +1242,7 @@ static void pt_event_read(struct perf_event *event) static void pt_event_destroy(struct perf_event *event) { + pt_addr_filters_fini(event); x86_del_exclusive(x86_lbr_exclusive_pt); } @@ -1089,6 +1257,11 @@ static int pt_event_init(struct perf_event *event) if (x86_add_exclusive(x86_lbr_exclusive_pt)) return -EBUSY; + if (pt_addr_filters_init(event)) { + x86_del_exclusive(x86_lbr_exclusive_pt); + return -ENOMEM; + } + event->destroy = pt_event_destroy; return 0; @@ -1152,6 +1325,14 @@ static __init int pt_init(void) pt_pmu.pmu.read = pt_event_read; pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; pt_pmu.pmu.free_aux = pt_buffer_free_aux; + + if (pt_cap_get(PT_CAP_num_address_ranges)) { + pt_pmu.pmu.addr_filters_validate = + pt_event_addr_filters_validate; + pt_pmu.pmu.addr_filters_setup = + pt_event_addr_filters_setup; + } + ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); return ret; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 6ce8cd20b9..27bd3838c5 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -105,13 +105,41 @@ struct pt_buffer { struct topa_entry *topa_index[0]; }; +#define PT_FILTERS_NUM 4 + +/** + * struct pt_filter - IP range filter configuration + * @msr_a: range start, goes to RTIT_ADDRn_A + * @msr_b: range end, goes to RTIT_ADDRn_B + * @config: 4-bit field in RTIT_CTL + */ +struct pt_filter { + unsigned long msr_a; + unsigned long msr_b; + unsigned long config; +}; + +/** + * struct pt_filters - IP range filtering context + * @filter: filters defined for this context + * @offs: array of mapped object's offsets + * @nr_filters: number of defined filters in the @filter array + */ +struct pt_filters { + struct pt_filter filter[PT_FILTERS_NUM]; + unsigned long offs[PT_FILTERS_NUM]; + unsigned int nr_filters; +}; + /** * struct pt - per-cpu pt context * @handle: perf output handle + * @filters: last configured filters * @handle_nmi: do handle PT PMI on this cpu, there's an active event */ struct pt { struct perf_output_handle handle; + struct pt_filters filters; int handle_nmi; }; -- 2.8.0.rc3