All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Will Deacon <will@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Marc Zyngier <maz@kernel.org>, Guo Ren <guoren@kernel.org>,
	Nick Hu <nickhu@andestech.com>, Greentime Hu <green.hu@gmail.com>,
	Vincent Chen <deanbo422@gmail.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	James Morse <james.morse@arm.com>,
	Alexandru Elisei <alexandru.elisei@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Stefano Stabellini <sstabellini@kernel.org>,
	linux-arm-kernel@lists.infradead.org,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	kvmarm@lists.cs.columbia.edu, linux-csky@vger.kernel.org,
	linux-riscv@lists.infradead.org, kvm@vger.kernel.org,
	xen-devel@lists.xenproject.org,
	Artem Kashkanov <artem.kashkanov@intel.com>,
	Like Xu <like.xu.linux@gmail.com>,
	Zhu Lingshan <lingshan.zhu@intel.com>
Subject: [PATCH v3 09/16] perf/core: Use static_call to optimize perf_guest_info_callbacks
Date: Tue, 21 Sep 2021 17:05:26 -0700	[thread overview]
Message-ID: <20210922000533.713300-10-seanjc@google.com> (raw)
In-Reply-To: <20210922000533.713300-1-seanjc@google.com>

Use static_call to optimize perf's guest callbacks on arm64 and x86,
which are now the only architectures that define the callbacks.  Use
DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as
the callback semantics are that a return value '0' means "not in guest".

static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is
also advantageous versus other solutions, e.g. per-cpu callbacks, in that
a per-cpu memory load is not needed to detect the !guest case.

Based on code from Peter and Like.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/perf_event.h | 28 ++++++----------------------
 kernel/events/core.c       | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index eefa197d5354..d582dfeb4e20 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1240,37 +1240,21 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 
 #ifdef CONFIG_GUEST_PERF_EVENTS
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
-static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
-{
-	/* Reg/unreg perf_guest_cbs waits for readers via synchronize_rcu(). */
-	lockdep_assert_preemption_disabled();
+DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
+DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
 
-	/* Prevent reloading between a !NULL check and dereferences. */
-	return READ_ONCE(perf_guest_cbs);
-}
 static inline unsigned int perf_guest_state(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	return guest_cbs ? guest_cbs->state() : 0;
+	return static_call(__perf_guest_state)();
 }
 static inline unsigned long perf_guest_get_ip(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	/*
-	 * Arbitrarily return '0' in the unlikely scenario that the callbacks
-	 * are unregistered between checking guest state and getting the IP.
-	 */
-	return guest_cbs ? guest_cbs->get_ip() : 0;
+	return static_call(__perf_guest_get_ip)();
 }
 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	if (guest_cbs && guest_cbs->handle_intel_pt_intr)
-		return guest_cbs->handle_intel_pt_intr();
-	return 0;
+	return static_call(__perf_guest_handle_intel_pt_intr)();
 }
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6ec05809f54..79c8ee1778a4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6485,12 +6485,23 @@ static void perf_pending_event(struct irq_work *entry)
 #ifdef CONFIG_GUEST_PERF_EVENTS
 struct perf_guest_info_callbacks *perf_guest_cbs;
 
+DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
+DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
 	if (WARN_ON_ONCE(perf_guest_cbs))
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, cbs);
+	static_call_update(__perf_guest_state, cbs->state);
+	static_call_update(__perf_guest_get_ip, cbs->get_ip);
+
+	/* Implementing ->handle_intel_pt_intr is optional. */
+	if (cbs->handle_intel_pt_intr)
+		static_call_update(__perf_guest_handle_intel_pt_intr,
+				   cbs->handle_intel_pt_intr);
 }
 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
 
@@ -6500,6 +6511,10 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, NULL);
+	static_call_update(__perf_guest_state, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_handle_intel_pt_intr,
+			   (void *)&__static_call_return0);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
-- 
2.33.0.464.g1972c5931b-goog


WARNING: multiple messages have this Message-ID (diff)
From: Sean Christopherson <seanjc@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	 Arnaldo Carvalho de Melo <acme@kernel.org>,
	Will Deacon <will@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	 Marc Zyngier <maz@kernel.org>, Guo Ren <guoren@kernel.org>,
	Nick Hu <nickhu@andestech.com>,
	 Greentime Hu <green.hu@gmail.com>,
	Vincent Chen <deanbo422@gmail.com>,
	 Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	 Albert Ou <aou@eecs.berkeley.edu>,
	Paolo Bonzini <pbonzini@redhat.com>,
	 Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>,  Namhyung Kim <namhyung@kernel.org>,
	James Morse <james.morse@arm.com>,
	 Alexandru Elisei <alexandru.elisei@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	 Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>,
	 Joerg Roedel <joro@8bytes.org>,
	Stefano Stabellini <sstabellini@kernel.org>,
	 linux-arm-kernel@lists.infradead.org,
	linux-perf-users@vger.kernel.org,  linux-kernel@vger.kernel.org,
	kvmarm@lists.cs.columbia.edu,  linux-csky@vger.kernel.org,
	linux-riscv@lists.infradead.org,  kvm@vger.kernel.org,
	xen-devel@lists.xenproject.org,
	 Artem Kashkanov <artem.kashkanov@intel.com>,
	Like Xu <like.xu.linux@gmail.com>,
	Zhu Lingshan <lingshan.zhu@intel.com>
Subject: [PATCH v3 09/16] perf/core: Use static_call to optimize perf_guest_info_callbacks
Date: Tue, 21 Sep 2021 17:05:26 -0700	[thread overview]
Message-ID: <20210922000533.713300-10-seanjc@google.com> (raw)
In-Reply-To: <20210922000533.713300-1-seanjc@google.com>

Use static_call to optimize perf's guest callbacks on arm64 and x86,
which are now the only architectures that define the callbacks.  Use
DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as
the callback semantics are that a return value '0' means "not in guest".

static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is
also advantageous versus other solutions, e.g. per-cpu callbacks, in that
a per-cpu memory load is not needed to detect the !guest case.

Based on code from Peter and Like.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/perf_event.h | 28 ++++++----------------------
 kernel/events/core.c       | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index eefa197d5354..d582dfeb4e20 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1240,37 +1240,21 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 
 #ifdef CONFIG_GUEST_PERF_EVENTS
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
-static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
-{
-	/* Reg/unreg perf_guest_cbs waits for readers via synchronize_rcu(). */
-	lockdep_assert_preemption_disabled();
+DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
+DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
 
-	/* Prevent reloading between a !NULL check and dereferences. */
-	return READ_ONCE(perf_guest_cbs);
-}
 static inline unsigned int perf_guest_state(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	return guest_cbs ? guest_cbs->state() : 0;
+	return static_call(__perf_guest_state)();
 }
 static inline unsigned long perf_guest_get_ip(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	/*
-	 * Arbitrarily return '0' in the unlikely scenario that the callbacks
-	 * are unregistered between checking guest state and getting the IP.
-	 */
-	return guest_cbs ? guest_cbs->get_ip() : 0;
+	return static_call(__perf_guest_get_ip)();
 }
 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	if (guest_cbs && guest_cbs->handle_intel_pt_intr)
-		return guest_cbs->handle_intel_pt_intr();
-	return 0;
+	return static_call(__perf_guest_handle_intel_pt_intr)();
 }
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6ec05809f54..79c8ee1778a4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6485,12 +6485,23 @@ static void perf_pending_event(struct irq_work *entry)
 #ifdef CONFIG_GUEST_PERF_EVENTS
 struct perf_guest_info_callbacks *perf_guest_cbs;
 
+DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
+DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
 	if (WARN_ON_ONCE(perf_guest_cbs))
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, cbs);
+	static_call_update(__perf_guest_state, cbs->state);
+	static_call_update(__perf_guest_get_ip, cbs->get_ip);
+
+	/* Implementing ->handle_intel_pt_intr is optional. */
+	if (cbs->handle_intel_pt_intr)
+		static_call_update(__perf_guest_handle_intel_pt_intr,
+				   cbs->handle_intel_pt_intr);
 }
 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
 
@@ -6500,6 +6511,10 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, NULL);
+	static_call_update(__perf_guest_state, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_handle_intel_pt_intr,
+			   (void *)&__static_call_return0);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
-- 
2.33.0.464.g1972c5931b-goog


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

WARNING: multiple messages have this Message-ID (diff)
From: Sean Christopherson <seanjc@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	 Arnaldo Carvalho de Melo <acme@kernel.org>,
	Will Deacon <will@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	 Marc Zyngier <maz@kernel.org>, Guo Ren <guoren@kernel.org>,
	Nick Hu <nickhu@andestech.com>,
	 Greentime Hu <green.hu@gmail.com>,
	Vincent Chen <deanbo422@gmail.com>,
	 Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	 Albert Ou <aou@eecs.berkeley.edu>,
	Paolo Bonzini <pbonzini@redhat.com>,
	 Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>,  Namhyung Kim <namhyung@kernel.org>,
	James Morse <james.morse@arm.com>,
	 Alexandru Elisei <alexandru.elisei@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	 Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>,
	 Joerg Roedel <joro@8bytes.org>,
	Stefano Stabellini <sstabellini@kernel.org>,
	 linux-arm-kernel@lists.infradead.org,
	linux-perf-users@vger.kernel.org,  linux-kernel@vger.kernel.org,
	kvmarm@lists.cs.columbia.edu,  linux-csky@vger.kernel.org,
	linux-riscv@lists.infradead.org,  kvm@vger.kernel.org,
	xen-devel@lists.xenproject.org,
	 Artem Kashkanov <artem.kashkanov@intel.com>,
	Like Xu <like.xu.linux@gmail.com>,
	Zhu Lingshan <lingshan.zhu@intel.com>
Subject: [PATCH v3 09/16] perf/core: Use static_call to optimize perf_guest_info_callbacks
Date: Tue, 21 Sep 2021 17:05:26 -0700	[thread overview]
Message-ID: <20210922000533.713300-10-seanjc@google.com> (raw)
In-Reply-To: <20210922000533.713300-1-seanjc@google.com>

Use static_call to optimize perf's guest callbacks on arm64 and x86,
which are now the only architectures that define the callbacks.  Use
DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as
the callback semantics are that a return value '0' means "not in guest".

static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is
also advantageous versus other solutions, e.g. per-cpu callbacks, in that
a per-cpu memory load is not needed to detect the !guest case.

Based on code from Peter and Like.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/perf_event.h | 28 ++++++----------------------
 kernel/events/core.c       | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index eefa197d5354..d582dfeb4e20 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1240,37 +1240,21 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 
 #ifdef CONFIG_GUEST_PERF_EVENTS
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
-static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
-{
-	/* Reg/unreg perf_guest_cbs waits for readers via synchronize_rcu(). */
-	lockdep_assert_preemption_disabled();
+DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
+DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
 
-	/* Prevent reloading between a !NULL check and dereferences. */
-	return READ_ONCE(perf_guest_cbs);
-}
 static inline unsigned int perf_guest_state(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	return guest_cbs ? guest_cbs->state() : 0;
+	return static_call(__perf_guest_state)();
 }
 static inline unsigned long perf_guest_get_ip(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	/*
-	 * Arbitrarily return '0' in the unlikely scenario that the callbacks
-	 * are unregistered between checking guest state and getting the IP.
-	 */
-	return guest_cbs ? guest_cbs->get_ip() : 0;
+	return static_call(__perf_guest_get_ip)();
 }
 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	if (guest_cbs && guest_cbs->handle_intel_pt_intr)
-		return guest_cbs->handle_intel_pt_intr();
-	return 0;
+	return static_call(__perf_guest_handle_intel_pt_intr)();
 }
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6ec05809f54..79c8ee1778a4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6485,12 +6485,23 @@ static void perf_pending_event(struct irq_work *entry)
 #ifdef CONFIG_GUEST_PERF_EVENTS
 struct perf_guest_info_callbacks *perf_guest_cbs;
 
+DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
+DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
 	if (WARN_ON_ONCE(perf_guest_cbs))
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, cbs);
+	static_call_update(__perf_guest_state, cbs->state);
+	static_call_update(__perf_guest_get_ip, cbs->get_ip);
+
+	/* Implementing ->handle_intel_pt_intr is optional. */
+	if (cbs->handle_intel_pt_intr)
+		static_call_update(__perf_guest_handle_intel_pt_intr,
+				   cbs->handle_intel_pt_intr);
 }
 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
 
@@ -6500,6 +6511,10 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, NULL);
+	static_call_update(__perf_guest_state, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_handle_intel_pt_intr,
+			   (void *)&__static_call_return0);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
-- 
2.33.0.464.g1972c5931b-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

WARNING: multiple messages have this Message-ID (diff)
From: Sean Christopherson <seanjc@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	 Arnaldo Carvalho de Melo <acme@kernel.org>,
	Will Deacon <will@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	 Marc Zyngier <maz@kernel.org>, Guo Ren <guoren@kernel.org>,
	Nick Hu <nickhu@andestech.com>,
	 Greentime Hu <green.hu@gmail.com>,
	Vincent Chen <deanbo422@gmail.com>,
	 Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	 Albert Ou <aou@eecs.berkeley.edu>,
	Paolo Bonzini <pbonzini@redhat.com>,
	 Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>
Cc: Wanpeng Li <wanpengli@tencent.com>,
	kvm@vger.kernel.org,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	linux-riscv@lists.infradead.org, Jiri Olsa <jolsa@redhat.com>,
	kvmarm@lists.cs.columbia.edu,
	Stefano Stabellini <sstabellini@kernel.org>,
	Joerg Roedel <joro@8bytes.org>,
	linux-csky@vger.kernel.org, xen-devel@lists.xenproject.org,
	Zhu Lingshan <lingshan.zhu@intel.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Artem Kashkanov <artem.kashkanov@intel.com>,
	linux-arm-kernel@lists.infradead.org,
	Jim Mattson <jmattson@google.com>,
	Like Xu <like.xu.linux@gmail.com>,
	Sean Christopherson <seanjc@google.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Vitaly Kuznetsov <vkuznets@redhat.com>
Subject: [PATCH v3 09/16] perf/core: Use static_call to optimize perf_guest_info_callbacks
Date: Tue, 21 Sep 2021 17:05:26 -0700	[thread overview]
Message-ID: <20210922000533.713300-10-seanjc@google.com> (raw)
In-Reply-To: <20210922000533.713300-1-seanjc@google.com>

Use static_call to optimize perf's guest callbacks on arm64 and x86,
which are now the only architectures that define the callbacks.  Use
DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as
the callback semantics are that a return value '0' means "not in guest".

static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is
also advantageous versus other solutions, e.g. per-cpu callbacks, in that
a per-cpu memory load is not needed to detect the !guest case.

Based on code from Peter and Like.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/perf_event.h | 28 ++++++----------------------
 kernel/events/core.c       | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index eefa197d5354..d582dfeb4e20 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1240,37 +1240,21 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 
 #ifdef CONFIG_GUEST_PERF_EVENTS
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
-static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
-{
-	/* Reg/unreg perf_guest_cbs waits for readers via synchronize_rcu(). */
-	lockdep_assert_preemption_disabled();
+DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
+DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
 
-	/* Prevent reloading between a !NULL check and dereferences. */
-	return READ_ONCE(perf_guest_cbs);
-}
 static inline unsigned int perf_guest_state(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	return guest_cbs ? guest_cbs->state() : 0;
+	return static_call(__perf_guest_state)();
 }
 static inline unsigned long perf_guest_get_ip(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	/*
-	 * Arbitrarily return '0' in the unlikely scenario that the callbacks
-	 * are unregistered between checking guest state and getting the IP.
-	 */
-	return guest_cbs ? guest_cbs->get_ip() : 0;
+	return static_call(__perf_guest_get_ip)();
 }
 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	if (guest_cbs && guest_cbs->handle_intel_pt_intr)
-		return guest_cbs->handle_intel_pt_intr();
-	return 0;
+	return static_call(__perf_guest_handle_intel_pt_intr)();
 }
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6ec05809f54..79c8ee1778a4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6485,12 +6485,23 @@ static void perf_pending_event(struct irq_work *entry)
 #ifdef CONFIG_GUEST_PERF_EVENTS
 struct perf_guest_info_callbacks *perf_guest_cbs;
 
+DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
+DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
 	if (WARN_ON_ONCE(perf_guest_cbs))
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, cbs);
+	static_call_update(__perf_guest_state, cbs->state);
+	static_call_update(__perf_guest_get_ip, cbs->get_ip);
+
+	/* Implementing ->handle_intel_pt_intr is optional. */
+	if (cbs->handle_intel_pt_intr)
+		static_call_update(__perf_guest_handle_intel_pt_intr,
+				   cbs->handle_intel_pt_intr);
 }
 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
 
@@ -6500,6 +6511,10 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 		return;
 
 	WRITE_ONCE(perf_guest_cbs, NULL);
+	static_call_update(__perf_guest_state, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
+	static_call_update(__perf_guest_handle_intel_pt_intr,
+			   (void *)&__static_call_return0);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
-- 
2.33.0.464.g1972c5931b-goog

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

  parent reply	other threads:[~2021-09-22  0:06 UTC|newest]

Thread overview: 189+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-22  0:05 [PATCH v3 00/16] perf: KVM: Fix, optimize, and clean up callbacks Sean Christopherson
2021-09-22  0:05 ` Sean Christopherson
2021-09-22  0:05 ` Sean Christopherson
2021-09-22  0:05 ` Sean Christopherson
2021-09-22  0:05 ` Sean Christopherson
2021-09-22  0:05 ` [PATCH v3 01/16] perf: Ensure perf_guest_cbs aren't reloaded between !NULL check and deref Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-11-04  9:32   ` Like Xu
2021-11-04  9:32     ` Like Xu
2021-11-04  9:32     ` Like Xu
2021-11-04  9:32     ` Like Xu
2021-11-04 14:18     ` Sean Christopherson
2021-11-04 14:18       ` Sean Christopherson
2021-11-04 14:18       ` Sean Christopherson
2021-11-04 14:18       ` Sean Christopherson
2021-11-10 11:07       ` Paolo Bonzini
2021-11-10 11:07         ` Paolo Bonzini
2021-11-10 11:07         ` Paolo Bonzini
2021-11-10 11:07         ` Paolo Bonzini
2021-11-11  0:39         ` Sean Christopherson
2021-11-11  0:39           ` Sean Christopherson
2021-11-11  0:39           ` Sean Christopherson
2021-11-11  0:39           ` Sean Christopherson
2021-09-22  0:05 ` [PATCH v3 02/16] KVM: x86: Register perf callbacks after calling vendor's hardware_setup() Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:23   ` Paolo Bonzini
2021-09-22  6:23     ` Paolo Bonzini
2021-09-22  6:23     ` Paolo Bonzini
2021-09-22  6:23     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 03/16] KVM: x86: Register Processor Trace interrupt hook iff PT enabled in guest Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:24   ` Paolo Bonzini
2021-09-22  6:24     ` Paolo Bonzini
2021-09-22  6:24     ` Paolo Bonzini
2021-09-22  6:24     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 04/16] perf: Stop pretending that perf can handle multiple guest callbacks Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:25   ` Paolo Bonzini
2021-09-22  6:25     ` Paolo Bonzini
2021-09-22  6:25     ` Paolo Bonzini
2021-09-22  6:25     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 05/16] perf: Drop dead and useless guest "support" from arm, csky, nds32 and riscv Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:26   ` Paolo Bonzini
2021-09-22  6:26     ` Paolo Bonzini
2021-09-22  6:26     ` Paolo Bonzini
2021-09-22  6:26     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 06/16] perf/core: Rework guest callbacks to prepare for static_call support Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:28   ` Paolo Bonzini
2021-09-22  6:28     ` Paolo Bonzini
2021-09-22  6:28     ` Paolo Bonzini
2021-09-22  6:28     ` Paolo Bonzini
2021-09-22 18:31   ` Boris Ostrovsky
2021-09-22 18:31     ` Boris Ostrovsky
2021-09-22 18:31     ` Boris Ostrovsky
2021-09-22 18:31     ` Boris Ostrovsky
2021-09-22  0:05 ` [PATCH v3 07/16] perf: Add wrappers for invoking guest callbacks Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:29   ` Paolo Bonzini
2021-09-22  6:29     ` Paolo Bonzini
2021-09-22  6:29     ` Paolo Bonzini
2021-09-22  6:29     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 08/16] perf: Force architectures to opt-in to " Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:32   ` Paolo Bonzini
2021-09-22  6:32     ` Paolo Bonzini
2021-09-22  6:32     ` Paolo Bonzini
2021-09-22  6:32     ` Paolo Bonzini
2021-09-22 14:48     ` Sean Christopherson
2021-09-22 14:48       ` Sean Christopherson
2021-09-22 14:48       ` Sean Christopherson
2021-09-22 14:48       ` Sean Christopherson
2021-11-09 23:46       ` Sean Christopherson
2021-11-09 23:46         ` Sean Christopherson
2021-11-09 23:46         ` Sean Christopherson
2021-11-09 23:46         ` Sean Christopherson
2021-09-22  0:05 ` Sean Christopherson [this message]
2021-09-22  0:05   ` [PATCH v3 09/16] perf/core: Use static_call to optimize perf_guest_info_callbacks Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:33   ` Paolo Bonzini
2021-09-22  6:33     ` Paolo Bonzini
2021-09-22  6:33     ` Paolo Bonzini
2021-09-22  6:33     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 10/16] KVM: x86: Drop current_vcpu for kvm_running_vcpu + kvm_arch_vcpu variable Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:40   ` Paolo Bonzini
2021-09-22  6:40     ` Paolo Bonzini
2021-09-22  6:40     ` Paolo Bonzini
2021-09-22  6:40     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 11/16] KVM: x86: More precisely identify NMI from guest when handling PMI Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:38   ` Paolo Bonzini
2021-09-22  6:38     ` Paolo Bonzini
2021-09-22  6:38     ` Paolo Bonzini
2021-09-22  6:38     ` Paolo Bonzini
2021-09-22  0:05 ` [PATCH v3 12/16] KVM: Move x86's perf guest info callbacks to generic KVM Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:41   ` Paolo Bonzini
2021-09-22  6:41     ` Paolo Bonzini
2021-09-22  6:41     ` Paolo Bonzini
2021-09-22  6:41     ` Paolo Bonzini
2021-10-11  9:35   ` Marc Zyngier
2021-10-11  9:35     ` Marc Zyngier
2021-10-11  9:35     ` Marc Zyngier
2021-10-11  9:35     ` Marc Zyngier
2021-10-11 14:46     ` Sean Christopherson
2021-10-11 14:46       ` Sean Christopherson
2021-10-11 14:46       ` Sean Christopherson
2021-10-11 14:46       ` Sean Christopherson
2021-10-11 15:33       ` Marc Zyngier
2021-10-11 15:33         ` Marc Zyngier
2021-10-11 15:33         ` Marc Zyngier
2021-10-11 15:33         ` Marc Zyngier
2021-09-22  0:05 ` [PATCH v3 13/16] KVM: x86: Move Intel Processor Trace interrupt handler to vmx.c Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05 ` [PATCH v3 14/16] KVM: arm64: Convert to the generic perf callbacks Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-10-11  9:38   ` Marc Zyngier
2021-10-11  9:38     ` Marc Zyngier
2021-10-11  9:38     ` Marc Zyngier
2021-10-11  9:38     ` Marc Zyngier
2021-09-22  0:05 ` [PATCH v3 15/16] KVM: arm64: Drop perf.c and fold its tiny bits of code into arm.c / pmu.c Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-10-11  9:44   ` Marc Zyngier
2021-10-11  9:44     ` Marc Zyngier
2021-10-11  9:44     ` Marc Zyngier
2021-10-11  9:44     ` Marc Zyngier
2021-11-09 23:16     ` Sean Christopherson
2021-11-09 23:16       ` Sean Christopherson
2021-11-09 23:16       ` Sean Christopherson
2021-11-09 23:16       ` Sean Christopherson
2021-09-22  0:05 ` [PATCH v3 16/16] perf: Drop guest callback (un)register stubs Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  0:05   ` Sean Christopherson
2021-09-22  6:29   ` Paolo Bonzini
2021-09-22  6:29     ` Paolo Bonzini
2021-09-22  6:29     ` Paolo Bonzini
2021-09-22  6:29     ` Paolo Bonzini
2021-09-22  6:42 ` [PATCH v3 00/16] perf: KVM: Fix, optimize, and clean up callbacks Paolo Bonzini
2021-09-22  6:42   ` Paolo Bonzini
2021-09-22  6:42   ` Paolo Bonzini
2021-09-22  6:42   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210922000533.713300-10-seanjc@google.com \
    --to=seanjc@google.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=alexandru.elisei@arm.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=artem.kashkanov@intel.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=deanbo422@gmail.com \
    --cc=green.hu@gmail.com \
    --cc=guoren@kernel.org \
    --cc=james.morse@arm.com \
    --cc=jgross@suse.com \
    --cc=jmattson@google.com \
    --cc=jolsa@redhat.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=like.xu.linux@gmail.com \
    --cc=lingshan.zhu@intel.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=nickhu@andestech.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sstabellini@kernel.org \
    --cc=suzuki.poulose@arm.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=will@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.