On Thu, 2021-01-28 at 13:43 +0100, Paolo Bonzini wrote:
> Independent of the answer to the above, this is really the only place 
> where you're adding Xen code to a hot path.  Can you please use a 
> STATIC_KEY_FALSE kvm_has_xen_vcpu (and a static inline function) to 
> quickly return from kvm_xen_has_interrupt() if no vCPU has a shared info 
> set up?

Something like this, then?

From 6504c78f76efd8c60630959111bd77c28d43fca7 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 29 Jan 2021 17:30:40 +0000
Subject: [PATCH] KVM: x86/xen: Add static branch to avoid overhead of checking
 Xen upcall vector

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/x86.c |  1 +
 arch/x86/kvm/xen.c | 68 +++++++++++++++++++++++++++-------------------
 arch/x86/kvm/xen.h | 15 +++++++++-
 3 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 870dea74ea94..fb2bc362efd8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10580,6 +10580,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		kvm_x86_ops.vm_destroy(kvm);
 	for (i = 0; i < kvm->arch.msr_filter.count; i++)
 		kfree(kvm->arch.msr_filter.ranges[i].bitmap);
+	kvm_xen_destroy(kvm);
 	kvm_pic_destroy(kvm);
 	kvm_ioapic_destroy(kvm);
 	kvm_free_vcpus(kvm);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 3041f774493e..9b6766e5d84f 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -19,6 +19,8 @@
 
 #include "trace.h"
 
+DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_has_vector, HZ);
+
 static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 {
 	gpa_t gpa = gfn_to_gpa(gfn);
@@ -176,7 +178,7 @@ void kvm_xen_setup_runstate_page(struct kvm_vcpu *v)
 	kvm_xen_update_runstate(v, RUNSTATE_running, steal_time);
 }
 
-int kvm_xen_has_interrupt(struct kvm_vcpu *v)
+int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 {
 	u8 rc = 0;
 
@@ -184,37 +186,42 @@ int kvm_xen_has_interrupt(struct kvm_vcpu *v)
 	 * If the global upcall vector (HVMIRQ_callback_vector) is set and
 	 * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
 	 */
-	if (v->arch.xen.vcpu_info_set && v->kvm->arch.xen.upcall_vector) {
-		struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
-		struct kvm_memslots *slots = kvm_memslots(v->kvm);
-		unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
-
-		/* No need for compat handling here */
-		BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
-			     offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
-		BUILD_BUG_ON(sizeof(rc) !=
-			     sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
-		BUILD_BUG_ON(sizeof(rc) !=
-			     sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
-
-		/*
-		 * For efficiency, this mirrors the checks for using the valid
-		 * cache in kvm_read_guest_offset_cached(), but just uses
-		 * __get_user() instead. And falls back to the slow path.
-		 */
-		if (likely(slots->generation == ghc->generation &&
-			   !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
-			/* Fast path */
-			__get_user(rc, (u8 __user *)ghc->hva + offset);
-		} else {
-			/* Slow path */
-			kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
-						      sizeof(rc));
-		}
+	struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
+	struct kvm_memslots *slots = kvm_memslots(v->kvm);
+	unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
+
+	/* No need for compat handling here */
+	BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
+		     offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
+	BUILD_BUG_ON(sizeof(rc) !=
+		     sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+	BUILD_BUG_ON(sizeof(rc) !=
+		     sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+
+	/*
+	 * For efficiency, this mirrors the checks for using the valid
+	 * cache in kvm_read_guest_offset_cached(), but just uses
+	 * __get_user() instead. And falls back to the slow path.
+	 */
+	if (likely(slots->generation == ghc->generation &&
+		   !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
+		/* Fast path */
+		__get_user(rc, (u8 __user *)ghc->hva + offset);
+	} else {
+		/* Slow path */
+		kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
+					     sizeof(rc));
 	}
+
 	return rc;
 }
 
+void kvm_xen_destroy(struct kvm *kvm)
+{
+	if (kvm->arch.xen.upcall_vector)
+		static_branch_slow_dec_deferred(&kvm_xen_has_vector);
+}
+
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 {
 	struct kvm_vcpu *v;
@@ -300,6 +307,11 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 			break;
 		}
 
+		if (data->u.vector && !kvm->arch.xen.upcall_vector)
+			static_branch_inc(&kvm_xen_has_vector.key);
+		else if (kvm->arch.xen.upcall_vector && !data->u.vector)
+			static_branch_slow_dec_deferred(&kvm_xen_has_vector);
+
 		kvm->arch.xen.upcall_vector = data->u.vector;
 		r = 0;
 		break;
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index d64916ac4a12..d49cd8ea8da9 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,13 +9,16 @@
 #ifndef __ARCH_X86_KVM_XEN_H__
 #define __ARCH_X86_KVM_XEN_H__
 
+#include <linux/jump_label_ratelimit.h>
+
 void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
 void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
-int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
+int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_config(struct kvm_vcpu *vcpu, u64 data);
+void kvm_xen_destroy(struct kvm *kvm);
 
 static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
 {
@@ -23,6 +26,16 @@ static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
 		KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL;
 }
 
+extern struct static_key_false_deferred kvm_xen_has_vector;
+
+static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	if (static_branch_unlikely(&kvm_xen_has_vector.key) &&
+	    vcpu->arch.xen.vcpu_info_set && vcpu->kvm->arch.xen.upcall_vector)
+		return __kvm_xen_has_interrupt(vcpu);
+
+	return 0;
+}
 
 /* 32-bit compatibility definitions, also used natively in 32-bit build */
 #include <asm/pvclock-abi.h>
-- 
2.17.1