From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com,
jbaron@akamai.com, torvalds@linux-foundation.org,
tglx@linutronix.de, mingo@kernel.org, namit@vmware.com,
hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org,
jpoimboe@redhat.com
Subject: [RESEND][PATCH v3 17/17] x86/perf, static_call: Optimize x86_pmu methods
Date: Tue, 24 Mar 2020 14:56:20 +0100 [thread overview]
Message-ID: <20200324142246.308204077@infradead.org> (raw)
In-Reply-To: 20200324135603.483964896@infradead.org
Replace many of the indirect calls with static_call().
XXX run performance numbers
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/events/core.c | 130 +++++++++++++++++++++++++++++++++----------------
1 file changed, 90 insertions(+), 40 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -28,6 +28,7 @@
#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/nospec.h>
+#include <linux/static_call.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -52,6 +53,30 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu
DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+DEFINE_STATIC_COND_CALL(x86_pmu_handle_irq, *x86_pmu.handle_irq);
+DEFINE_STATIC_COND_CALL(x86_pmu_disable_all, *x86_pmu.disable_all);
+DEFINE_STATIC_COND_CALL(x86_pmu_enable_all, *x86_pmu.enable_all);
+DEFINE_STATIC_COND_CALL(x86_pmu_enable, *x86_pmu.enable);
+DEFINE_STATIC_COND_CALL(x86_pmu_disable, *x86_pmu.disable);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_add, *x86_pmu.add);
+DEFINE_STATIC_COND_CALL(x86_pmu_del, *x86_pmu.del);
+DEFINE_STATIC_COND_CALL(x86_pmu_read, *x86_pmu.read);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
+DEFINE_STATIC_COND_CALL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
+DEFINE_STATIC_COND_CALL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling);
+DEFINE_STATIC_COND_CALL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
+DEFINE_STATIC_COND_CALL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_sched_task, *x86_pmu.sched_task);
+DEFINE_STATIC_COND_CALL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
+DEFINE_STATIC_COND_CALL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
+
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -660,7 +685,7 @@ static void x86_pmu_disable(struct pmu *
cpuc->enabled = 0;
barrier();
- x86_pmu.disable_all();
+ static_call(x86_pmu_disable_all)();
}
void x86_pmu_enable_all(int added)
@@ -907,8 +932,7 @@ int x86_schedule_events(struct cpu_hw_ev
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
n0 -= cpuc->n_txn;
- if (x86_pmu.start_scheduling)
- x86_pmu.start_scheduling(cpuc);
+ static_cond_call(x86_pmu_start_scheduling)(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = cpuc->event_constraint[i];
@@ -925,7 +949,7 @@ int x86_schedule_events(struct cpu_hw_ev
* change due to external factors (sibling state, allow_tfa).
*/
if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
- c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+ c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
cpuc->event_constraint[i] = c;
}
@@ -1008,8 +1032,7 @@ int x86_schedule_events(struct cpu_hw_ev
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
- if (x86_pmu.commit_scheduling)
- x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+ static_cond_call(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
}
} else {
for (i = n0; i < n; i++) {
@@ -1018,15 +1041,13 @@ int x86_schedule_events(struct cpu_hw_ev
/*
* release events that failed scheduling
*/
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, e);
+ static_cond_call(x86_pmu_put_event_constraints)(cpuc, e);
cpuc->event_constraint[i] = NULL;
}
}
- if (x86_pmu.stop_scheduling)
- x86_pmu.stop_scheduling(cpuc);
+ static_cond_call(x86_pmu_stop_scheduling)(cpuc);
return unsched ? -EINVAL : 0;
}
@@ -1217,7 +1238,7 @@ static void x86_pmu_enable(struct pmu *p
cpuc->enabled = 1;
barrier();
- x86_pmu.enable_all(added);
+ static_call(x86_pmu_enable_all)(added);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1338,7 +1359,7 @@ static int x86_pmu_add(struct perf_event
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
- ret = x86_pmu.schedule_events(cpuc, n, assign);
+ ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
if (ret)
goto out;
/*
@@ -1356,13 +1377,11 @@ static int x86_pmu_add(struct perf_event
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
- if (x86_pmu.add) {
- /*
- * This is before x86_pmu_enable() will call x86_pmu_start(),
- * so we enable LBRs before an event needs them etc..
- */
- x86_pmu.add(event);
- }
+ /*
+ * This is before x86_pmu_enable() will call x86_pmu_start(),
+ * so we enable LBRs before an event needs them etc..
+ */
+ static_cond_call(x86_pmu_add)(event);
ret = 0;
out:
@@ -1390,7 +1409,7 @@ static void x86_pmu_start(struct perf_ev
cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
__set_bit(idx, cpuc->running);
- x86_pmu.enable(event);
+ static_call(x86_pmu_enable)(event);
perf_event_update_userpage(event);
}
@@ -1460,7 +1479,7 @@ void x86_pmu_stop(struct perf_event *eve
struct hw_perf_event *hwc = &event->hw;
if (test_bit(hwc->idx, cpuc->active_mask)) {
- x86_pmu.disable(event);
+ static_call(x86_pmu_disable)(event);
__clear_bit(hwc->idx, cpuc->active_mask);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
@@ -1510,8 +1529,7 @@ static void x86_pmu_del(struct perf_even
if (i >= cpuc->n_events - cpuc->n_added)
--cpuc->n_added;
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, event);
+ static_cond_call(x86_pmu_put_event_constraints)(cpuc, event);
/* Delete the array entry. */
while (++i < cpuc->n_events) {
@@ -1524,13 +1542,12 @@ static void x86_pmu_del(struct perf_even
perf_event_update_userpage(event);
do_del:
- if (x86_pmu.del) {
- /*
- * This is after x86_pmu_stop(); so we disable LBRs after any
- * event can need them etc..
- */
- x86_pmu.del(event);
- }
+
+ /*
+ * This is after x86_pmu_stop(); so we disable LBRs after any
+ * event can need them etc..
+ */
+ static_cond_call(x86_pmu_del)(event);
}
int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -1608,7 +1625,7 @@ perf_event_nmi_handler(unsigned int cmd,
return NMI_DONE;
start_clock = sched_clock();
- ret = x86_pmu.handle_irq(regs);
+ ret = static_call(x86_pmu_handle_irq)(regs);
finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
@@ -1821,6 +1838,38 @@ ssize_t x86_event_sysfs_show(char *page,
static struct attribute_group x86_pmu_attr_group;
static struct attribute_group x86_pmu_caps_group;
+static void x86_pmu_static_call_update(void)
+{
+ static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
+ static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
+ static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
+ static_call_update(x86_pmu_enable, x86_pmu.enable);
+ static_call_update(x86_pmu_disable, x86_pmu.disable);
+
+ static_call_update(x86_pmu_add, x86_pmu.add);
+ static_call_update(x86_pmu_del, x86_pmu.del);
+ static_call_update(x86_pmu_read, x86_pmu.read);
+
+ static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
+ static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
+ static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
+
+ static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
+ static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
+ static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
+
+ static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
+ static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
+
+ static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
+ static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
+}
+
+static void _x86_pmu_read(struct perf_event *event)
+{
+ x86_perf_event_update(event);
+}
+
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -1885,6 +1934,11 @@ static int __init init_hw_perf_events(vo
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
+ if (!x86_pmu.read)
+ x86_pmu.read = _x86_pmu_read;
+
+ x86_pmu_static_call_update();
+
/*
* Install callbacks. Core will call them for each online
* cpu.
@@ -1921,11 +1975,9 @@ static int __init init_hw_perf_events(vo
}
early_initcall(init_hw_perf_events);
-static inline void x86_pmu_read(struct perf_event *event)
+static void x86_pmu_read(struct perf_event *event)
{
- if (x86_pmu.read)
- return x86_pmu.read(event);
- x86_perf_event_update(event);
+ static_call(x86_pmu_read)(event);
}
/*
@@ -2002,7 +2054,7 @@ static int x86_pmu_commit_txn(struct pmu
if (!x86_pmu_initialized())
return -EAGAIN;
- ret = x86_pmu.schedule_events(cpuc, n, assign);
+ ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
if (ret)
return ret;
@@ -2300,15 +2352,13 @@ static const struct attribute_group *x86
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
{
- if (x86_pmu.sched_task)
- x86_pmu.sched_task(ctx, sched_in);
+ static_cond_call(x86_pmu_sched_task)(ctx, sched_in);
}
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
- if (x86_pmu.swap_task_ctx)
- x86_pmu.swap_task_ctx(prev, next);
+ static_cond_call(x86_pmu_swap_task_ctx)(prev, next);
}
void perf_check_microcode(void)
next prev parent reply other threads:[~2020-03-24 14:25 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-24 13:56 [RESEND][PATCH v3 00/17] Add static_call() Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 01/17] notifier: Fix broken error handling pattern Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 02/17] module: Fix up module_notifier return values Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 03/17] module: Properly propagate MODULE_STATE_COMING failure Peter Zijlstra
2020-03-25 17:35 ` Jessica Yu
2020-03-27 4:51 ` Josh Poimboeuf
2020-03-27 12:04 ` Miroslav Benes
2020-03-24 13:56 ` [RESEND][PATCH v3 04/17] jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 05/17] compiler.h: Make __ADDRESSABLE() symbol truly unique Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 06/17] static_call: Add basic static call infrastructure Peter Zijlstra
2020-03-26 16:42 ` Nadav Amit
2020-03-26 17:01 ` Peter Zijlstra
2020-03-26 18:09 ` Nadav Amit
2020-03-26 18:28 ` Peter Zijlstra
2020-03-26 19:02 ` Nadav Amit
2020-03-26 19:13 ` Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 07/17] static_call: Add inline " Peter Zijlstra
2020-03-26 15:54 ` Borislav Petkov
2020-03-24 13:56 ` [RESEND][PATCH v3 08/17] static_call: Avoid kprobes on inline static_call()s Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 09/17] x86/static_call: Add out-of-line static call implementation Peter Zijlstra
2020-03-26 14:57 ` Borislav Petkov
2020-04-06 1:08 ` Fangrui Song
2020-04-06 11:04 ` Peter Zijlstra
2020-04-06 18:29 ` Nick Desaulniers
2020-03-24 13:56 ` [RESEND][PATCH v3 10/17] x86/static_call: Add inline static call implementation for x86-64 Peter Zijlstra
2020-03-26 15:17 ` Borislav Petkov
2020-03-26 16:06 ` Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 11/17] static_call: Simple self-test Peter Zijlstra
2020-03-26 15:44 ` Borislav Petkov
2020-03-26 17:08 ` Peter Zijlstra
2020-03-26 17:33 ` Borislav Petkov
2020-03-24 13:56 ` [RESEND][PATCH v3 12/17] tracepoint: Optimize using static_call() Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 13/17] x86/alternatives: Teach text_poke_bp() to emulate RET Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 14/17] static_call: Add static_cond_call() Peter Zijlstra
2020-03-24 16:14 ` Linus Torvalds
2020-03-24 16:22 ` Andy Lutomirski
2020-03-24 16:33 ` Linus Torvalds
2020-03-24 17:03 ` Peter Zijlstra
2020-03-25 18:13 ` Peter Zijlstra
2020-03-25 18:26 ` Linus Torvalds
2020-03-25 19:34 ` hpa
2020-03-25 20:52 ` Linus Torvalds
2020-03-25 22:07 ` Peter Zijlstra
2020-03-24 16:54 ` Peter Zijlstra
2020-03-26 23:37 ` Rasmus Villemoes
2020-03-27 10:08 ` Peter Zijlstra
2020-03-27 13:25 ` Rasmus Villemoes
2020-03-24 13:56 ` [RESEND][PATCH v3 15/17] static_call: Handle tail-calls Peter Zijlstra
2020-03-24 13:56 ` [RESEND][PATCH v3 16/17] static_call: Allow early init Peter Zijlstra
2020-03-24 13:56 ` Peter Zijlstra [this message]
2020-03-25 17:49 ` [RESEND][PATCH v3 00/17] Add static_call() Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200324142246.308204077@infradead.org \
--to=peterz@infradead.org \
--cc=ard.biesheuvel@linaro.org \
--cc=bristot@redhat.com \
--cc=hpa@zytor.com \
--cc=jbaron@akamai.com \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namit@vmware.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).