From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750923AbdBTUs0 (ORCPT ); Mon, 20 Feb 2017 15:48:26 -0500 Received: from mail-yw0-f171.google.com ([209.85.161.171]:34546 "EHLO mail-yw0-f171.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750806AbdBTUsY (ORCPT ); Mon, 20 Feb 2017 15:48:24 -0500 MIME-Version: 1.0 In-Reply-To: <20170220133352.17995-3-alexander.shishkin@linux.intel.com> References: <20170220133352.17995-1-alexander.shishkin@linux.intel.com> <20170220133352.17995-3-alexander.shishkin@linux.intel.com> From: Mathieu Poirier Date: Mon, 20 Feb 2017 13:42:03 -0700 Message-ID: Subject: Re: [PATCH 2/4] perf: Keep AUX flags in the output handle To: Alexander Shishkin Cc: Peter Zijlstra , Ingo Molnar , "linux-kernel@vger.kernel.org" , Vince Weaver , Stephane Eranian , Arnaldo Carvalho de Melo , Will Deacon Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 20 February 2017 at 06:33, Alexander Shishkin wrote: > From: Will Deacon > > In preparation for adding more flags to perf AUX records, introduce a > separate API for setting the flags for a session, rather than appending > more bool arguments to perf_aux_output_end. This allows to set each > flag at the time a corresponding condition is detected, instead of > tracking it in each driver's private state. > > Cc: Mathieu Poirier > Signed-off-by: Will Deacon > Signed-off-by: Alexander Shishkin > --- > arch/x86/events/intel/bts.c | 16 +++++------ > arch/x86/events/intel/pt.c | 17 ++++++------ > arch/x86/events/intel/pt.h | 1 - > drivers/hwtracing/coresight/coresight-etb10.c | 7 +++-- > drivers/hwtracing/coresight/coresight-etm-perf.c | 9 +++---- > drivers/hwtracing/coresight/coresight-priv.h | 2 -- > drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 +++-- For the CS files: Acked-by: Mathieu Poirier > include/linux/coresight.h | 2 +- > include/linux/perf_event.h | 8 +++--- > kernel/events/ring_buffer.c | 34 ++++++++++++++++-------- > 10 files changed, 55 insertions(+), 48 deletions(-) > > diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c > index 982c9e31da..8ae8c5ce3a 100644 > --- a/arch/x86/events/intel/bts.c > +++ b/arch/x86/events/intel/bts.c > @@ -63,7 +63,6 @@ struct bts_buffer { > unsigned int cur_buf; > bool snapshot; > local_t data_size; > - local_t lost; > local_t head; > unsigned long end; > void **data_pages; > @@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts) > return; > > if (ds->bts_index >= ds->bts_absolute_maximum) > - local_inc(&buf->lost); > + perf_aux_output_flag(&bts->handle, > + PERF_AUX_FLAG_TRUNCATED); > > /* > * old and head are always in the same physical buffer, so we > @@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags) > return; > > fail_end_stop: > - perf_aux_output_end(&bts->handle, 0, false); > + perf_aux_output_end(&bts->handle, 0); > > fail_stop: > event->hw.state = PERF_HES_STOPPED; > @@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags) > bts->handle.head = > local_xchg(&buf->data_size, > buf->nr_pages << PAGE_SHIFT); > - > - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), > - !!local_xchg(&buf->lost, 0)); > + perf_aux_output_end(&bts->handle, > + local_xchg(&buf->data_size, 0)); > } > > cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; > @@ -484,8 +483,7 @@ int intel_bts_interrupt(void) > if (old_head == local_read(&buf->head)) > return handled; > > - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), > - !!local_xchg(&buf->lost, 0)); > + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); > > buf = perf_aux_output_begin(&bts->handle, event); > if (buf) > @@ -500,7 +498,7 @@ int intel_bts_interrupt(void) > * cleared handle::event > */ > barrier(); > - perf_aux_output_end(&bts->handle, 0, false); > + perf_aux_output_end(&bts->handle, 0); > } > } > > diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c > index d92a60ef08..a2d0050fde 100644 > --- a/arch/x86/events/intel/pt.c > +++ b/arch/x86/events/intel/pt.c > @@ -823,7 +823,8 @@ static void pt_handle_status(struct pt *pt) > */ > if (!pt_cap_get(PT_CAP_topa_multiple_entries) || > buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { > - local_inc(&buf->lost); > + perf_aux_output_flag(&pt->handle, > + PERF_AUX_FLAG_TRUNCATED); > advance++; > } > } > @@ -916,8 +917,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, > > /* can't stop in the middle of an output region */ > if (buf->output_off + handle->size + 1 < > - sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) > + sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { > + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); > return -EINVAL; > + } > > > /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ > @@ -1269,8 +1272,7 @@ void intel_pt_interrupt(void) > > pt_update_head(pt); > > - perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), > - local_xchg(&buf->lost, 0)); > + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); > > if (!event->hw.state) { > int ret; > @@ -1285,7 +1287,7 @@ void intel_pt_interrupt(void) > /* snapshot counters don't use PMI, so it's safe */ > ret = pt_buffer_reset_markers(buf, &pt->handle); > if (ret) { > - perf_aux_output_end(&pt->handle, 0, true); > + perf_aux_output_end(&pt->handle, 0); > return; > } > > @@ -1357,7 +1359,7 @@ static void pt_event_start(struct perf_event *event, int mode) > return; > > fail_end_stop: > - perf_aux_output_end(&pt->handle, 0, true); > + perf_aux_output_end(&pt->handle, 0); > fail_stop: > hwc->state = PERF_HES_STOPPED; > } > @@ -1398,8 +1400,7 @@ static void pt_event_stop(struct perf_event *event, int mode) > pt->handle.head = > local_xchg(&buf->data_size, > buf->nr_pages << PAGE_SHIFT); > - perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), > - local_xchg(&buf->lost, 0)); > + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); > } > } > > diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h > index fa81f8d8ed..25fa9710f4 100644 > --- a/arch/x86/events/intel/pt.h > +++ b/arch/x86/events/intel/pt.h > @@ -144,7 +144,6 @@ struct pt_buffer { > size_t output_off; > unsigned long nr_pages; > local_t data_size; > - local_t lost; > local64_t head; > bool snapshot; > unsigned long stop_pos, intr_pos; > diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c > index d7325c6534..82c8ddcf09 100644 > --- a/drivers/hwtracing/coresight/coresight-etb10.c > +++ b/drivers/hwtracing/coresight/coresight-etb10.c > @@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev, > > static unsigned long etb_reset_buffer(struct coresight_device *csdev, > struct perf_output_handle *handle, > - void *sink_config, bool *lost) > + void *sink_config) > { > unsigned long size = 0; > struct cs_buffers *buf = sink_config; > @@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev, > * resetting parameters here and squaring off with the ring > * buffer API in the tracer PMU is fine. > */ > - *lost = !!local_xchg(&buf->lost, 0); > size = local_xchg(&buf->data_size, 0); > } > > @@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev, > (unsigned long)write_ptr); > > write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1); > - local_inc(&buf->lost); > + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); > } > > /* > @@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev, > */ > status = readl_relaxed(drvdata->base + ETB_STATUS_REG); > if (status & ETB_STATUS_RAM_FULL) { > - local_inc(&buf->lost); > + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); > to_read = capacity; > read_ptr = write_ptr; > } else { > diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c > index 70eaa74dc2..47ea0eee67 100644 > --- a/drivers/hwtracing/coresight/coresight-etm-perf.c > +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c > @@ -301,7 +301,8 @@ static void etm_event_start(struct perf_event *event, int flags) > return; > > fail_end_stop: > - perf_aux_output_end(handle, 0, true); > + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); > + perf_aux_output_end(handle, 0); > fail: > event->hw.state = PERF_HES_STOPPED; > goto out; > @@ -309,7 +310,6 @@ static void etm_event_start(struct perf_event *event, int flags) > > static void etm_event_stop(struct perf_event *event, int mode) > { > - bool lost; > int cpu = smp_processor_id(); > unsigned long size; > struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); > @@ -347,10 +347,9 @@ static void etm_event_stop(struct perf_event *event, int mode) > return; > > size = sink_ops(sink)->reset_buffer(sink, handle, > - event_data->snk_config, > - &lost); > + event_data->snk_config); > > - perf_aux_output_end(handle, size, lost); > + perf_aux_output_end(handle, size); > } > > /* Disabling the path make its elements available to other sessions */ > diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h > index ef9d8e93e3..5f662d8205 100644 > --- a/drivers/hwtracing/coresight/coresight-priv.h > +++ b/drivers/hwtracing/coresight/coresight-priv.h > @@ -76,7 +76,6 @@ enum cs_mode { > * @nr_pages: max number of pages granted to us > * @offset: offset within the current buffer > * @data_size: how much we collected in this run > - * @lost: other than zero if we had a HW buffer wrap around > * @snapshot: is this run in snapshot mode > * @data_pages: a handle the ring buffer > */ > @@ -85,7 +84,6 @@ struct cs_buffers { > unsigned int nr_pages; > unsigned long offset; > local_t data_size; > - local_t lost; > bool snapshot; > void **data_pages; > }; > diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c > index 1549436e24..aec61a6d5c 100644 > --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c > +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c > @@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, > > static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, > struct perf_output_handle *handle, > - void *sink_config, bool *lost) > + void *sink_config) > { > long size = 0; > struct cs_buffers *buf = sink_config; > @@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, > * resetting parameters here and squaring off with the ring > * buffer API in the tracer PMU is fine. > */ > - *lost = !!local_xchg(&buf->lost, 0); > size = local_xchg(&buf->data_size, 0); > } > > @@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, > */ > status = readl_relaxed(drvdata->base + TMC_STS); > if (status & TMC_STS_FULL) { > - local_inc(&buf->lost); > + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); > to_read = drvdata->size; > } else { > to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); > @@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, > read_ptr -= drvdata->size; > /* Tell the HW */ > writel_relaxed(read_ptr, drvdata->base + TMC_RRP); > - local_inc(&buf->lost); > + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); > } > > cur = buf->cur; > diff --git a/include/linux/coresight.h b/include/linux/coresight.h > index 2a5982c37d..035c16c9a5 100644 > --- a/include/linux/coresight.h > +++ b/include/linux/coresight.h > @@ -201,7 +201,7 @@ struct coresight_ops_sink { > void *sink_config); > unsigned long (*reset_buffer)(struct coresight_device *csdev, > struct perf_output_handle *handle, > - void *sink_config, bool *lost); > + void *sink_config); > void (*update_buffer)(struct coresight_device *csdev, > struct perf_output_handle *handle, > void *sink_config); > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index a39564314e..cdbaa88dc8 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -812,6 +812,7 @@ struct perf_output_handle { > struct ring_buffer *rb; > unsigned long wakeup; > unsigned long size; > + u64 aux_flags; > union { > void *addr; > unsigned long head; > @@ -860,10 +861,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) > extern void *perf_aux_output_begin(struct perf_output_handle *handle, > struct perf_event *event); > extern void perf_aux_output_end(struct perf_output_handle *handle, > - unsigned long size, bool truncated); > + unsigned long size); > extern int perf_aux_output_skip(struct perf_output_handle *handle, > unsigned long size); > extern void *perf_get_aux(struct perf_output_handle *handle); > +extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); > > extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); > extern void perf_pmu_unregister(struct pmu *pmu); > @@ -1278,8 +1280,8 @@ static inline void * > perf_aux_output_begin(struct perf_output_handle *handle, > struct perf_event *event) { return NULL; } > static inline void > -perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, > - bool truncated) { } > +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) > + { } > static inline int > perf_aux_output_skip(struct perf_output_handle *handle, > unsigned long size) { return -EINVAL; } > diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c > index 6415807169..f3ebafe060 100644 > --- a/kernel/events/ring_buffer.c > +++ b/kernel/events/ring_buffer.c > @@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) > rb->paused = 1; > } > > +void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) > +{ > + /* > + * OVERWRITE is determined by perf_aux_output_end() and can't > + * be passed in directly. > + */ > + if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE)) > + return; > + > + handle->aux_flags |= flags; > +} > +EXPORT_SYMBOL_GPL(perf_aux_output_flag); > + > /* > * This is called before hardware starts writing to the AUX area to > * obtain an output handle and make sure there's room in the buffer. > @@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, > handle->event = event; > handle->head = aux_head; > handle->size = 0; > + handle->aux_flags = 0; > > /* > * In overwrite mode, AUX data stores do not depend on aux_tail, > @@ -409,34 +423,32 @@ EXPORT_SYMBOL_GPL(perf_aux_output_begin); > * of the AUX buffer management code is that after pmu::stop(), the AUX > * transaction must be stopped and therefore drop the AUX reference count. > */ > -void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, > - bool truncated) > +void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) > { > struct ring_buffer *rb = handle->rb; > - bool wakeup = truncated; > + bool wakeup = !!handle->aux_flags; > unsigned long aux_head; > - u64 flags = 0; > - > - if (truncated) > - flags |= PERF_AUX_FLAG_TRUNCATED; > > /* in overwrite mode, driver provides aux_head via handle */ > if (rb->aux_overwrite) { > - flags |= PERF_AUX_FLAG_OVERWRITE; > + handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; > > aux_head = handle->head; > local_set(&rb->aux_head, aux_head); > } else { > + handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; > + > aux_head = local_read(&rb->aux_head); > local_add(size, &rb->aux_head); > } > > - if (size || flags) { > + if (size || handle->aux_flags) { > /* > * Only send RECORD_AUX if we have something useful to communicate > */ > > - perf_event_aux_event(handle->event, aux_head, size, flags); > + perf_event_aux_event(handle->event, aux_head, size, > + handle->aux_flags); > } > > aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); > @@ -447,7 +459,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, > } > > if (wakeup) { > - if (truncated) > + if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) > handle->event->pending_disable = 1; > perf_output_wakeup(handle); > } > -- > 2.11.0 >