[PATCH 2/2] x86:kvm:hyperv: guest->host event signaling via eventfd

From: Roman Kagan <rkagan@virtuozzo.com>
To: kvm@vger.kernel.org, "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>
Cc: "Denis V. Lunev" <den@openvz.org>
Subject: [PATCH 2/2] x86:kvm:hyperv: guest->host event signaling via eventfd
Date: Mon,  4 Dec 2017 22:00:44 +0300	[thread overview]
Message-ID: <20171204190044.14125-3-rkagan@virtuozzo.com> (raw)
In-Reply-To: <20171204190044.14125-1-rkagan@virtuozzo.com>

In Hyper-V, the fast guest->host notification mechanism is the
SIGNAL_EVENT hypercall, with a single parameter of the connection ID to
signal.

Currently this hypercall incurs a user exit and requires the userspace
to decode the parameters and trigger the notification of the potentially
different I/O context.

To avoid the costly user exit, process this hypercall and signal the
corresponding eventfd in KVM, similar to ioeventfd.  The association
between the connection id and the eventfd is established via the newly
introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an
(srcu-protected) IDR.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
---
 Documentation/virtual/kvm/api.txt |  23 ++++++++
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/kvm/hyperv.h             |   1 +
 include/uapi/linux/kvm.h          |  13 +++++
 arch/x86/kvm/hyperv.c             | 115 +++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c                |  10 ++++
 6 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f670e4b9e7f3..e4f319add8b7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3394,6 +3394,29 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
 or if no page table is present for the addresses (e.g. when using
 hugepages).
 
+4.109 KVM_HYPERV_EVENTFD
+
+Capability: KVM_CAP_HYPERV_EVENTFD
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_hyperv_eventfd (in)
+Returns: 0 on success, !0 on error
+
+This ioctl (un)registers an eventfd to receive notifications from the guest on
+the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
+causing a user exit.
+
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+The acceptable values for the flags field:
+
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bfb99770c34..0d37eb837991 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -739,6 +739,8 @@ struct kvm_hv {
 	u64 hv_crash_ctl;
 
 	HV_REFERENCE_TSC_PAGE tsc_ref;
+
+	struct idr conn_to_evt;
 };
 
 enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index cc2468244ca2..837465d69c6d 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -90,5 +90,6 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 282d7613fce8..465f45c13cdc 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
 #define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_HYPERV_EVENTFD 151
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1359,6 +1360,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
 
+#define KVM_HYPERV_EVENTFD	_IOW(KVMIO,  0xba, struct kvm_hyperv_eventfd)
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
@@ -1419,4 +1422,14 @@ struct kvm_assigned_msix_entry {
 #define KVM_ARM_DEV_EL1_PTIMER		(1 << 1)
 #define KVM_ARM_DEV_PMU			(1 << 2)
 
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_BITS		24
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
 #endif /* __LINUX_KVM_H */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 015fb06c7522..d2e8915546b1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
+#include <linux/eventfd.h>
 
 #include <asm/apicdef.h>
 #include <trace/events/kvm.h>
@@ -1226,6 +1227,54 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id)
+{
+	struct page *page;
+	void *pg;
+	struct hv_input_signal_event *msg;
+
+	if ((gpa & (__alignof__(*msg) - 1)) ||
+	    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
+		return HV_STATUS_INVALID_ALIGNMENT;
+
+	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return HV_STATUS_INSUFFICIENT_MEMORY;
+
+	pg = kmap_atomic(page);
+	msg = pg + offset_in_page(gpa);
+	*conn_id = msg->connectionid.u.id + msg->flag_number;
+	kunmap_atomic(pg);
+	return HV_STATUS_SUCCESS;
+}
+
+static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 ingpa)
+{
+	u16 ret;
+	u32 conn_id;
+	int idx;
+	struct eventfd_ctx *eventfd;
+
+	if (likely(fast))
+		conn_id = (ingpa & 0xffffffff) + ((ingpa >> 32) & 0xffff);
+	else {
+		ret = hvcall_sigevent_param(vcpu, ingpa, &conn_id);
+		if (ret != HV_STATUS_SUCCESS)
+			return ret;
+	}
+
+	if (conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1))
+		return HV_STATUS_INVALID_CONNECTION_ID;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id);
+	if (eventfd)
+		eventfd_signal(eventfd, 1);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID;
+}
+
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	u64 param, ingpa, outgpa, ret;
@@ -1276,8 +1325,12 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		kvm_vcpu_on_spin(vcpu, true);
 		break;
-	case HVCALL_POST_MESSAGE:
 	case HVCALL_SIGNAL_EVENT:
+		res = kvm_hvcall_signal_event(vcpu, fast, ingpa);
+		if (res != HV_STATUS_INVALID_CONNECTION_ID)
+			break;
+		/* maybe userspace knows this conn_id: fall through */
+	case HVCALL_POST_MESSAGE:
 		/* don't bother userspace if it has no way to handle it */
 		if (!vcpu_to_synic(vcpu)->active) {
 			res = HV_STATUS_INVALID_HYPERCALL_CODE;
@@ -1305,8 +1358,68 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 void kvm_hv_init_vm(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hyperv.hv_lock);
+	idr_init(&kvm->arch.hyperv.conn_to_evt);
 }
 
 void kvm_hv_destroy_vm(struct kvm *kvm)
 {
+	int i;
+	struct eventfd_ctx *eventfd;
+
+	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
+		eventfd_ctx_put(eventfd);
+	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
+}
+
+static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd)
+{
+	int ret;
+	struct eventfd_ctx *eventfd;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	eventfd = eventfd_ctx_fdget(fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	mutex_lock(&hv->hv_lock);
+	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
+			GFP_KERNEL);
+	mutex_unlock(&hv->hv_lock);
+
+	if (ret >= 0)
+		return 0;
+
+	if (ret == -ENOSPC)
+		ret = -EEXIST;
+	eventfd_ctx_put(eventfd);
+	return ret;
+}
+
+static int kvm_hv_eventfd_deassign(struct kvm *kvm, int conn_id)
+{
+	int ret;
+	struct eventfd_ctx *eventfd;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	mutex_lock(&hv->hv_lock);
+	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
+	mutex_unlock(&hv->hv_lock);
+
+	if (!eventfd)
+		return -ENOENT;
+
+	synchronize_srcu(&kvm->srcu);
+	eventfd_ctx_put(eventfd);
+	return ret;
+}
+
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
+{
+	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
+	    (args->conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1)))
+		return -EINVAL;
+
+	return args->flags == KVM_HYPERV_EVENTFD_DEASSIGN ?
+		kvm_hv_eventfd_deassign(kvm, args->conn_id) :
+		kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fe479d4b82c..2c786682f6f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2686,6 +2686,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_SYNIC:
 	case KVM_CAP_HYPERV_SYNIC2:
 	case KVM_CAP_HYPERV_VP_INDEX:
+	case KVM_CAP_HYPERV_EVENTFD:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -4281,6 +4282,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
+	case KVM_HYPERV_EVENTFD: {
+		struct kvm_hyperv_eventfd hvevfd;
+
+		r = -EFAULT;
+		if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
+			goto out;
+		r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
-- 
2.14.3