From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753222AbbDBSj3 (ORCPT ); Thu, 2 Apr 2015 14:39:29 -0400 Received: from terminus.zytor.com ([198.137.202.10]:36974 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753169AbbDBSjZ (ORCPT ); Thu, 2 Apr 2015 14:39:25 -0400 Date: Thu, 2 Apr 2015 11:38:32 -0700 From: tip-bot for Alexander Shishkin Message-ID: Cc: peterz@infradead.org, efault@gmx.de, fweisbec@gmail.com, torvalds@linux-foundation.org, bp@alien8.de, kaixu.xia@linaro.org, mingo@kernel.org, eranian@google.com, tglx@linutronix.de, linux-kernel@vger.kernel.org, alexander.shishkin@linux.intel.com, paulus@samba.org, hpa@zytor.com, rric@kernel.org Reply-To: linux-kernel@vger.kernel.org, eranian@google.com, tglx@linutronix.de, paulus@samba.org, alexander.shishkin@linux.intel.com, hpa@zytor.com, rric@kernel.org, efault@gmx.de, peterz@infradead.org, fweisbec@gmail.com, mingo@kernel.org, torvalds@linux-foundation.org, kaixu.xia@linaro.org, bp@alien8.de In-Reply-To: <1422613866-113186-1-git-send-email-alexander.shishkin@linux.intel.com> References: <1422613866-113186-1-git-send-email-alexander.shishkin@linux.intel.com> To: linux-tip-commits@vger.kernel.org Subject: [tip:perf/core] perf: Add a pmu capability for "exclusive" events Git-Commit-ID: bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Commit-ID: bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0 Gitweb: http://git.kernel.org/tip/bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0 Author: Alexander Shishkin AuthorDate: Fri, 30 Jan 2015 12:31:06 +0200 Committer: Ingo Molnar CommitDate: Thu, 2 Apr 2015 17:14:12 +0200 perf: Add a pmu capability for "exclusive" events Usually, pmus that do, for example, instruction tracing, would only ever be able to have one event per task per cpu (or per perf_event_context). For such pmus it makes sense to disallow creating conflicting events early on, so as to provide consistent behavior for the user. This patch adds a pmu capability that indicates such constraint on event creation. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1422613866-113186-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 + kernel/events/core.c | 119 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 13a1eb3..f936a1e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -176,6 +176,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_NMI 0x02 #define PERF_PMU_CAP_AUX_NO_SG 0x04 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 +#define PERF_PMU_CAP_EXCLUSIVE 0x10 /** * struct pmu - generic performance monitoring unit @@ -196,6 +197,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; diff --git a/kernel/events/core.c b/kernel/events/core.c index da51128..6d9fdae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3459,6 +3459,91 @@ static void unaccount_event(struct perf_event *event) unaccount_event_cpu(event, event->cpu); } +/* + * The following implement mutual exclusion of events on "exclusive" pmus + * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled + * at a time, so we disallow creating events that might conflict, namely: + * + * 1) cpu-wide events in the presence of per-task events, + * 2) per-task events in the presence of cpu-wide events, + * 3) two matching events on the same context. + * + * The former two cases are handled in the allocation path (perf_event_alloc(), + * __free_event()), the latter -- before the first perf_install_in_context(). + */ +static int exclusive_event_init(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return 0; + + /* + * Prevent co-existence of per-task and cpu-wide events on the + * same exclusive pmu. + * + * Negative pmu::exclusive_cnt means there are cpu-wide + * events on this "exclusive" pmu, positive means there are + * per-task events. + * + * Since this is called in perf_event_alloc() path, event::ctx + * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK + * to mean "per-task event", because unlike other attach states it + * never gets cleared. + */ + if (event->attach_state & PERF_ATTACH_TASK) { + if (!atomic_inc_unless_negative(&pmu->exclusive_cnt)) + return -EBUSY; + } else { + if (!atomic_dec_unless_positive(&pmu->exclusive_cnt)) + return -EBUSY; + } + + return 0; +} + +static void exclusive_event_destroy(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return; + + /* see comment in exclusive_event_init() */ + if (event->attach_state & PERF_ATTACH_TASK) + atomic_dec(&pmu->exclusive_cnt); + else + atomic_inc(&pmu->exclusive_cnt); +} + +static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) +{ + if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && + (e1->cpu == e2->cpu || + e1->cpu == -1 || + e2->cpu == -1)) + return true; + return false; +} + +/* Called under the same ctx::mutex as perf_install_in_context() */ +static bool exclusive_event_installable(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *iter_event; + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return true; + + list_for_each_entry(iter_event, &ctx->event_list, event_entry) { + if (exclusive_event_match(iter_event, event)) + return false; + } + + return true; +} + static void __free_event(struct perf_event *event) { if (!event->parent) { @@ -3472,8 +3557,10 @@ static void __free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); - if (event->pmu) + if (event->pmu) { + exclusive_event_destroy(event); module_put(event->pmu->module); + } call_rcu(&event->rcu_head, free_event_rcu); } @@ -7150,6 +7237,7 @@ got_cpu_context: pmu->event_idx = perf_event_idx_default; list_add_rcu(&pmu->entry, &pmus); + atomic_set(&pmu->exclusive_cnt, 0); ret = 0; unlock: mutex_unlock(&pmus_lock); @@ -7405,16 +7493,23 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_ns; } + err = exclusive_event_init(event); + if (err) + goto err_pmu; + if (!event->parent) { if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { err = get_callchain_buffers(); if (err) - goto err_pmu; + goto err_per_task; } } return event; +err_per_task: + exclusive_event_destroy(event); + err_pmu: if (event->destroy) event->destroy(event); @@ -7819,6 +7914,11 @@ SYSCALL_DEFINE5(perf_event_open, goto err_alloc; } + if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) { + err = -EBUSY; + goto err_context; + } + if (task) { put_task_struct(task); task = NULL; @@ -7941,6 +8041,13 @@ SYSCALL_DEFINE5(perf_event_open, get_ctx(ctx); } + if (!exclusive_event_installable(event, ctx)) { + err = -EBUSY; + mutex_unlock(&ctx->mutex); + fput(event_file); + goto err_context; + } + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); @@ -8032,6 +8139,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); + if (!exclusive_event_installable(event, ctx)) { + mutex_unlock(&ctx->mutex); + perf_unpin_context(ctx); + put_ctx(ctx); + err = -EBUSY; + goto err_free; + } + perf_install_in_context(ctx, event, cpu); perf_unpin_context(ctx); mutex_unlock(&ctx->mutex);