linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, acme@redhat.com, jolsa@redhat.com,
	kim.phillips@amd.com, namhyung@kernel.org, irogers@google.com
Subject: [PATCH v1 07/13] perf/core: add idle hooks
Date: Thu,  9 Sep 2021 00:56:54 -0700	[thread overview]
Message-ID: <20210909075700.4025355-8-eranian@google.com> (raw)
In-Reply-To: <20210909075700.4025355-1-eranian@google.com>

This patch adds a new set of hooks to connect perf_events with the
idle task. On some PMU models, it may be necessary to flush or stop
the PMU when going to low power. Upon return from low power, the opposite
action, i.e., re-enable the PMU, may be necessary. The patch adds
perf_pmu_register_lopwr_cb() to register a callback on entry or return
from low power. The callback is invoked with a boolean arg. If true,
then this is an entry. If false, this is a return.

The callback is invoked from the idle code with interrupts already
disabled.

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 include/linux/perf_event.h |  8 ++++++
 kernel/events/core.c       | 58 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/idle.c        | 15 +++++++++-
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2d510ad750ed..32ffc009b2ec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -300,6 +300,7 @@ struct pmu {
 	/* number of address filters this PMU can do */
 	unsigned int			nr_addr_filters;
 
+	struct list_head		lopwr_entry;
 	/*
 	 * Fully disable/enable this PMU, can be used to protect from the PMI
 	 * as well as for lazy/batch writing of the MSRs.
@@ -430,6 +431,8 @@ struct pmu {
 	void (*sched_task)		(struct perf_event_context *ctx,
 					bool sched_in);
 
+	void (*lopwr_cb)		(bool lopwr_in);
+
 	/*
 	 * Kmem cache of PMU specific data
 	 */
@@ -1429,6 +1432,11 @@ extern void perf_event_task_tick(void);
 extern int perf_event_account_interrupt(struct perf_event *event);
 extern int perf_event_period(struct perf_event *event, u64 value);
 extern u64 perf_event_pause(struct perf_event *event, bool reset);
+extern void perf_lopwr_cb(bool lopwr_in);
+extern void perf_lopwr_active_inc(void);
+extern void perf_lopwr_active_dec(void);
+extern void perf_register_lopwr_cb(struct pmu *pmu, void (*lowpwr_cb)(bool));
+
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cb1f9b8392e..f739fd92e74b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3521,6 +3521,64 @@ void perf_sched_cb_inc(struct pmu *pmu)
 	this_cpu_inc(perf_sched_cb_usages);
 }
 
+/*
+ * The perf_lopwr_cb() is invoked from the idle task. As such it
+ * cannot grab a mutex that may end up sleeping. The idle task cannot
+ * sleep by construction. Therefore we create a spinlock and a new
+ * list of PMUs to invoke on idle. The list is protected by a spinlock
+ * Normally we would use the pmus_lock and iterate over each PMUs. But
+ * mutex is not possible and we need to iterate only over the PMU which
+ * do require a idle callback.
+ */
+static DEFINE_SPINLOCK(lopwr_cb_lock);
+static LIST_HEAD(lopwr_cb_pmus);
+static DEFINE_PER_CPU(int, lopwr_nr_active);
+
+void perf_lopwr_active_inc(void)
+{
+	__this_cpu_inc(lopwr_nr_active);
+}
+
+void perf_lopwr_active_dec(void)
+{
+	__this_cpu_dec(lopwr_nr_active);
+}
+
+/*
+ * lopwr_in = true means going to low power state
+ * lopwr_in = false means returning from low power state
+ */
+void perf_lopwr_cb(bool lopwr_in)
+{
+	struct pmu *pmu;
+	unsigned long flags;
+
+	if (!__this_cpu_read(lopwr_nr_active))
+		return;
+
+	spin_lock_irqsave(&lopwr_cb_lock, flags);
+
+	list_for_each_entry(pmu, &lopwr_cb_pmus, lopwr_entry) {
+		if (pmu->lopwr_cb)
+			pmu->lopwr_cb(lopwr_in);
+	}
+
+	spin_unlock_irqrestore(&lopwr_cb_lock, flags);
+}
+EXPORT_SYMBOL_GPL(perf_lopwr_cb);
+
+void perf_register_lopwr_cb(struct pmu *pmu, void (*func)(bool))
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&lopwr_cb_lock, flags);
+
+	pmu->lopwr_cb   = func;
+	list_add_tail(&pmu->lopwr_entry, &lopwr_cb_pmus);
+
+	spin_unlock_irqrestore(&lopwr_cb_lock, flags);
+}
+
 /*
  * This function provides the context switch callback to the lower code
  * layer. It is invoked ONLY when the context switch callback is enabled.
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 912b47aa99d8..14ce130aee1b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -179,6 +179,7 @@ static void cpuidle_idle_call(void)
 	 */
 	if (need_resched()) {
 		local_irq_enable();
+		perf_lopwr_cb(false);
 		return;
 	}
 
@@ -191,7 +192,14 @@ static void cpuidle_idle_call(void)
 	if (cpuidle_not_available(drv, dev)) {
 		tick_nohz_idle_stop_tick();
 
+		if (!cpu_idle_force_poll)
+			perf_lopwr_cb(true);
+
 		default_idle_call();
+
+		if (!cpu_idle_force_poll)
+			perf_lopwr_cb(false);
+
 		goto exit_idle;
 	}
 
@@ -249,8 +257,10 @@ static void cpuidle_idle_call(void)
 	/*
 	 * It is up to the idle functions to reenable local interrupts
 	 */
-	if (WARN_ON_ONCE(irqs_disabled()))
+	if (WARN_ON_ONCE(irqs_disabled())) {
 		local_irq_enable();
+		perf_lopwr_cb(false);
+	}
 }
 
 /*
@@ -279,9 +289,12 @@ static void do_idle(void)
 	__current_set_polling();
 	tick_nohz_idle_enter();
 
+
 	while (!need_resched()) {
 		rmb();
 
+		perf_lopwr_cb(true);
+
 		local_irq_disable();
 
 		if (cpu_is_offline(cpu)) {
-- 
2.33.0.153.gba50c8fa24-goog


  parent reply	other threads:[~2021-09-09  7:59 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-09  7:56 [PATCH v1 00/13] perf/x86/amd: Add AMD Fam19h Branch Sampling support Stephane Eranian
2021-09-09  7:56 ` [PATCH v1 01/13] perf/core: add union to struct perf_branch_entry Stephane Eranian
2021-09-09 19:03   ` Peter Zijlstra
2021-09-10 12:09     ` Michael Ellerman
2021-09-10 14:16       ` Michael Ellerman
2021-09-15  6:03         ` Stephane Eranian
2021-09-17  6:37           ` Madhavan Srinivasan
2021-09-17  6:48             ` Stephane Eranian
2021-09-17  7:05               ` Michael Ellerman
2021-09-17  7:39                 ` Stephane Eranian
2021-09-17 12:38                   ` Michael Ellerman
2021-09-17 16:42                     ` Stephane Eranian
2021-09-19 10:27                       ` Michael Ellerman
2021-09-09  7:56 ` [PATCH v1 02/13] x86/cpufeatures: add AMD Fam19h Branch Sampling feature Stephane Eranian
2021-09-09  7:56 ` [PATCH v1 03/13] perf/x86/amd: add AMD Fam19h Branch Sampling support Stephane Eranian
2021-09-09 10:44   ` kernel test robot
2021-09-09 15:33   ` kernel test robot
2021-09-09  7:56 ` [PATCH v1 04/13] perf/x86/amd: add branch-brs helper event for Fam19h BRS Stephane Eranian
2021-09-09  7:56 ` [PATCH v1 05/13] perf/x86/amd: enable branch sampling priv level filtering Stephane Eranian
2021-09-09  7:56 ` [PATCH v1 06/13] perf/x86/amd: add AMD branch sampling period adjustment Stephane Eranian
2021-09-09  7:56 ` Stephane Eranian [this message]
2021-09-09  9:15   ` [PATCH v1 07/13] perf/core: add idle hooks Peter Zijlstra
2021-09-09 10:42   ` kernel test robot
2021-09-09 11:02   ` kernel test robot
2021-09-09  7:56 ` [PATCH v1 08/13] perf/x86/core: " Stephane Eranian
2021-09-09  9:16   ` Peter Zijlstra
2021-09-09  7:56 ` [PATCH v1 09/13] perf/x86/amd: add idle hooks for branch sampling Stephane Eranian
2021-09-09  9:20   ` Peter Zijlstra
2021-09-09  7:56 ` [PATCH v1 10/13] perf tools: add branch-brs as a new event Stephane Eranian
2021-09-09  7:56 ` [PATCH v1 11/13] perf tools: improve IBS error handling Stephane Eranian
2021-09-13 19:34   ` Arnaldo Carvalho de Melo
2021-10-04 21:57     ` Kim Phillips
2021-10-04 23:44       ` Arnaldo Carvalho de Melo
2021-09-09  7:56 ` [PATCH v1 12/13] perf tools: improve error handling of AMD Branch Sampling Stephane Eranian
2021-10-04 21:57   ` Kim Phillips
2021-09-09  7:57 ` [PATCH v1 13/13] perf report: add addr_from/addr_to sort dimensions Stephane Eranian
2021-09-09  8:55 ` [PATCH v1 00/13] perf/x86/amd: Add AMD Fam19h Branch Sampling support Peter Zijlstra
2021-09-15  5:55   ` Stephane Eranian
2021-09-15  9:04     ` Peter Zijlstra
2021-10-28 18:30       ` Stephane Eranian
2021-09-27 20:17     ` Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210909075700.4025355-8-eranian@google.com \
    --to=eranian@google.com \
    --cc=acme@redhat.com \
    --cc=irogers@google.com \
    --cc=jolsa@redhat.com \
    --cc=kim.phillips@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).