linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nuno Das Neves <nunodasneves@linux.microsoft.com>
To: linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org,
	mikelley@microsoft.com, viremana@linux.microsoft.com,
	sunilmut@microsoft.com, wei.liu@kernel.org, vkuznets@redhat.com,
	ligrassi@microsoft.com, kys@microsoft.com,
	sthemmin@microsoft.com, anbelski@linux.microsoft.com
Subject: [PATCH v3 12/19] drivers/hv: run vp ioctl and isr
Date: Tue, 28 Sep 2021 11:31:08 -0700	[thread overview]
Message-ID: <1632853875-20261-13-git-send-email-nunodasneves@linux.microsoft.com> (raw)
In-Reply-To: <1632853875-20261-1-git-send-email-nunodasneves@linux.microsoft.com>

Introduce an ioctl for running a vp and an isr to copy messages from
the synic page to the vp data structure.

Add synchronization primitives to ensure that the isr is finished
when the run vp ioctl is entered.

Co-developed-by: Lillian Grassin-Drake <ligrassi@microsoft.com>
Signed-off-by: Lillian Grassin-Drake <ligrassi@microsoft.com>
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
 Documentation/virt/mshv/api.rst |  14 +++
 arch/x86/kernel/cpu/mshyperv.c  |  16 +++
 drivers/hv/hv_synic.c           |  96 ++++++++++++++++++
 drivers/hv/mshv.h               |   1 +
 drivers/hv/mshv_main.c          | 173 +++++++++++++++++++++++++++++++-
 include/asm-generic/mshyperv.h  |   3 +
 include/linux/mshv.h            |   7 ++
 include/uapi/linux/mshv.h       |   1 +
 8 files changed, 310 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst
index f0631236c063..9deddcd7de54 100644
--- a/Documentation/virt/mshv/api.rst
+++ b/Documentation/virt/mshv/api.rst
@@ -106,4 +106,18 @@ Get/set vp registers. See asm/hyperv-tlfs.h for the complete set of registers.
 Includes general purpose platform registers, MSRs, and virtual registers that
 are part of Microsoft Hypervisor platform and not directly exposed to the guest.
 
+3.6 MSHV_RUN_VP
+---------------
+:Type: vp ioctl
+:Parameters: struct hv_message
+:Returns: 0 on success
+
+Run the vp, returning when it triggers an intercept, or if the calling thread
+is interrupted by a signal. In this case errno will be set to EINTR.
+
+On return, the vp will be suspended.
+This ioctl will fail on any vp that's already running (not suspended).
+
+Information about the intercept is returned in the hv_message struct.
+
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 22f13343b5da..3efc4c3dbb7e 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,6 +41,7 @@ struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
 
 #if IS_ENABLED(CONFIG_HYPERV)
+static void (*mshv_handler)(void);
 static void (*vmbus_handler)(void);
 static void (*hv_stimer0_handler)(void);
 static void (*hv_kexec_handler)(void);
@@ -51,6 +52,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	inc_irq_stat(irq_hv_callback_count);
+	if (mshv_handler)
+		mshv_handler();
+
 	if (vmbus_handler)
 		vmbus_handler();
 
@@ -60,6 +64,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
 	set_irq_regs(old_regs);
 }
 
+void hv_setup_mshv_irq(void (*handler)(void))
+{
+	mshv_handler = handler;
+}
+EXPORT_SYMBOL_GPL(hv_setup_mshv_irq);
+
+void hv_remove_mshv_irq(void)
+{
+	mshv_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_mshv_irq);
+
 void hv_setup_vmbus_handler(void (*handler)(void))
 {
 	vmbus_handler = handler;
diff --git a/drivers/hv/hv_synic.c b/drivers/hv/hv_synic.c
index c6546ae54ea9..27d52c480043 100644
--- a/drivers/hv/hv_synic.c
+++ b/drivers/hv/hv_synic.c
@@ -11,11 +11,107 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/io.h>
+#include <linux/random.h>
 #include <linux/mshv.h>
 #include <asm/mshyperv.h>
 
 #include "mshv.h"
 
+void mshv_isr(void)
+{
+	struct hv_message_page **msg_page =
+			this_cpu_ptr(mshv.synic_message_page);
+	struct hv_message *msg;
+	u32 message_type;
+	struct mshv_partition *partition;
+	struct mshv_vp *vp;
+	u64 partition_id;
+	u32 vp_index;
+	int i;
+	unsigned long flags;
+	struct task_struct *task;
+
+	if (unlikely(!(*msg_page))) {
+		pr_err("%s: Missing synic page!\n", __func__);
+		return;
+	}
+
+	msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]);
+
+	/*
+	 * If the type isn't set, there isn't really a message;
+	 * it may be some other hyperv interrupt
+	 */
+	message_type = msg->header.message_type;
+	if (message_type == HVMSG_NONE)
+		return;
+
+	/* Look for the partition */
+	partition_id = msg->header.sender;
+
+	/* Hold this lock for the rest of the isr, because the partition could
+	 * be released anytime.
+	 * e.g. the MSHV_RUN_VP thread could wake on another cpu; it could
+	 * release the partition unless we hold this!
+	 */
+	spin_lock_irqsave(&mshv.partitions.lock, flags);
+
+	for (i = 0; i < MSHV_MAX_PARTITIONS; i++) {
+		partition = mshv.partitions.array[i];
+		if (partition && partition->id == partition_id)
+			break;
+	}
+
+	if (unlikely(i == MSHV_MAX_PARTITIONS)) {
+		pr_err("%s: failed to find partition\n", __func__);
+		goto unlock_out;
+	}
+
+	/*
+	 * Since we directly index the vp, and it has to exist for us to be here
+	 * (because the vp is only deleted when the partition is), no additional
+	 * locking is needed here
+	 */
+	vp_index = ((struct hv_x64_intercept_message_header *)msg->u.payload)->vp_index;
+	vp = partition->vps.array[vp_index];
+	if (unlikely(!vp)) {
+		pr_err("%s: failed to find vp\n", __func__);
+		goto unlock_out;
+	}
+
+	memcpy(vp->run.intercept_message, msg, sizeof(struct hv_message));
+
+	if (unlikely(!vp->run.task)) {
+		pr_err("%s: vp run task not set\n", __func__);
+		goto unlock_out;
+	}
+
+	/* Save the task and reset it so we can wake without racing */
+	task = vp->run.task;
+	vp->run.task = NULL;
+
+	/*
+	 * up the semaphore before waking so that we don't race with
+	 * down_trylock
+	 */
+	up(&vp->run.sem);
+
+	/*
+	 * Finally, wake the process. If it wakes the vp and generates
+	 * another intercept then the message will be queued by the hypervisor
+	 */
+	wake_up_process(task);
+
+unlock_out:
+	spin_unlock_irqrestore(&mshv.partitions.lock, flags);
+
+	/* Acknowledge message with hypervisor */
+	msg->header.message_type = HVMSG_NONE;
+	wrmsrl(HV_X64_MSR_EOM, 0);
+
+	add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
+}
+
 int mshv_synic_init(unsigned int cpu)
 {
 	union hv_synic_simp simp;
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index b8fece9fe80d..014352a11190 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h
@@ -29,6 +29,7 @@
 
 extern struct mshv mshv;
 
+void mshv_isr(void);
 int mshv_synic_init(unsigned int cpu);
 int mshv_synic_cleanup(unsigned int cpu);
 
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index 1b32cf7ad9f3..390ccf893dd1 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/cpuhotplug.h>
+#include <linux/random.h>
 #include <linux/mshv.h>
 #include <asm/mshyperv.h>
 
@@ -64,6 +65,141 @@ static struct miscdevice mshv_dev = {
 	.mode = 0600,
 };
 
+static long
+mshv_vp_ioctl_run_vp(struct mshv_vp *vp, void __user *ret_message)
+{
+	long ret;
+	u32 msg_type;
+	struct hv_register_assoc suspend_registers[2] = {
+		{ .name = HV_REGISTER_EXPLICIT_SUSPEND },
+		{ .name = HV_REGISTER_INTERCEPT_SUSPEND }
+	};
+	/* Pointers to values for convenience */
+	union hv_explicit_suspend_register *explicit_suspend =
+				&suspend_registers[0].value.explicit_suspend;
+	union hv_intercept_suspend_register *intercept_suspend =
+				&suspend_registers[1].value.intercept_suspend;
+
+	/* Check that the VP is suspended */
+	ret = hv_call_get_vp_registers(
+			vp->index,
+			vp->partition->id,
+			2,
+			suspend_registers);
+	if (ret)
+		return ret;
+
+	if (!explicit_suspend->suspended &&
+	    !intercept_suspend->suspended) {
+		pr_err("%s: vp not suspended!\n", __func__);
+		return -EBADFD;
+	}
+
+	/*
+	 * If intercept_suspend is set, we missed a message and need to
+	 * wait for mshv_isr to complete
+	 */
+	if (intercept_suspend->suspended) {
+		if (down_interruptible(&vp->run.sem))
+			return -EINTR;
+		if (copy_to_user(ret_message, vp->run.intercept_message,
+				 sizeof(struct hv_message)))
+			return -EFAULT;
+		intercept_suspend->suspended = 0;
+		explicit_suspend->suspended = 1;
+		ret = hv_call_set_vp_registers(
+				vp->index,
+				vp->partition->id,
+				2,
+				suspend_registers);
+		if (ret) {
+			pr_err("%s: failed to set suspend bits\n", __func__);
+			return ret;
+		}
+		return 0;
+	}
+
+	/*
+	 * At this point the semaphore ensures that mshv_isr is done,
+	 * and the mutex ensures that no other threads are touching this vp
+	 */
+	vp->run.task = current;
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	/* Now actually start the vp running */
+	explicit_suspend->suspended = 0;
+	intercept_suspend->suspended = 0;
+	ret = hv_call_set_vp_registers(
+			vp->index,
+			vp->partition->id,
+			2,
+			suspend_registers);
+	if (ret) {
+		pr_err("%s: failed to clear suspend bits\n", __func__);
+		set_current_state(TASK_RUNNING);
+		vp->run.task = NULL;
+		return ret;
+	}
+
+	schedule();
+
+	/* Explicitly suspend the vp to make sure it's stopped */
+	explicit_suspend->suspended = 1;
+	ret = hv_call_set_vp_registers(
+		vp->index,
+		vp->partition->id,
+		1,
+		&suspend_registers[0]);
+	if (ret) {
+		pr_err("%s: failed to set explicit suspend bit\n", __func__);
+		return -EBADFD;
+	}
+
+	/*
+	 * Check if woken up by a signal
+	 * Note that if the signal came after being woken by mshv_isr(),
+	 * we will still get the message correctly on re-entry
+	 */
+	if (signal_pending(current)) {
+		pr_debug("%s: woke up, received signal\n", __func__);
+		return -EINTR;
+	}
+
+	/*
+	 * No signal pending, so we were woken by hv_host_isr()
+	 * The isr can't be running now, and the intercept_suspend bit is set
+	 * We use it as a flag to tell if we missed a message due to a signal,
+	 * so we must clear it here and reset the semaphore
+	 */
+	intercept_suspend->suspended = 0;
+	ret = hv_call_set_vp_registers(
+		vp->index,
+		vp->partition->id,
+		1,
+		&suspend_registers[1]);
+	if (ret) {
+		pr_err("%s: failed to clear intercept suspend bit\n", __func__);
+		return -EBADFD;
+	}
+	if (down_trylock(&vp->run.sem)) {
+		pr_err("%s: semaphore in unexpected state\n", __func__);
+		return -EBADFD;
+	}
+
+	msg_type = vp->run.intercept_message->header.message_type;
+
+	if (msg_type == HVMSG_NONE) {
+		pr_err("%s: woke up, but no message\n", __func__);
+		return -ENOMSG;
+	}
+
+	if (copy_to_user(ret_message, vp->run.intercept_message,
+			 sizeof(struct hv_message)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static long
 mshv_vp_ioctl_get_regs(struct mshv_vp *vp, void __user *user_args)
 {
@@ -110,6 +246,7 @@ mshv_vp_ioctl_set_regs(struct mshv_vp *vp, void __user *user_args)
 	struct mshv_vp_registers args;
 	struct hv_register_assoc *registers;
 	long ret;
+	int i;
 
 	if (copy_from_user(&args, user_args, sizeof(args)))
 		return -EFAULT;
@@ -129,6 +266,20 @@ mshv_vp_ioctl_set_regs(struct mshv_vp *vp, void __user *user_args)
 		goto free_return;
 	}
 
+	for (i = 0; i < args.count; i++) {
+		/*
+		 * Disallow setting suspend registers to ensure run vp state
+		 * is consistent
+		 */
+		if (registers[i].name == HV_REGISTER_EXPLICIT_SUSPEND ||
+		    registers[i].name == HV_REGISTER_INTERCEPT_SUSPEND) {
+			pr_err("%s: not allowed to set suspend registers\n",
+			       __func__);
+			ret = -EINVAL;
+			goto free_return;
+		}
+	}
+
 	ret = hv_call_set_vp_registers(vp->index, vp->partition->id,
 				       args.count, registers);
 
@@ -147,6 +298,9 @@ mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 		return -EINTR;
 
 	switch (ioctl) {
+	case MSHV_RUN_VP:
+		r = mshv_vp_ioctl_run_vp(vp, (void __user *)arg);
+		break;
 	case MSHV_GET_VP_REGISTERS:
 		r = mshv_vp_ioctl_get_regs(vp, (void __user *)arg);
 		break;
@@ -197,12 +351,20 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
 		return -ENOMEM;
 
 	mutex_init(&vp->mutex);
+	sema_init(&vp->run.sem, 0);
+
+	vp->run.intercept_message =
+		(struct hv_message *)get_zeroed_page(GFP_KERNEL);
+	if (!vp->run.intercept_message) {
+		ret = -ENOMEM;
+		goto free_vp;
+	}
 
 	vp->index = args.vp_index;
 	vp->partition = mshv_partition_get(partition);
 	if (!vp->partition) {
 		ret = -EBADF;
-		goto free_vp;
+		goto free_message;
 	}
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
@@ -240,6 +402,8 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
 	put_unused_fd(fd);
 put_partition:
 	mshv_partition_put(partition);
+free_message:
+	free_page((unsigned long)vp->run.intercept_message);
 free_vp:
 	kfree(vp);
 
@@ -456,6 +620,9 @@ destroy_partition(struct mshv_partition *partition)
 		mshv.partitions.array[i] = NULL;
 	}
 
+	if (!mshv.partitions.count)
+		hv_remove_mshv_irq();
+
 	spin_unlock_irqrestore(&mshv.partitions.lock, flags);
 
 	/*
@@ -474,6 +641,7 @@ destroy_partition(struct mshv_partition *partition)
 		vp = partition->vps.array[i];
 		if (!vp)
 			continue;
+		free_page((unsigned long)vp->run.intercept_message);
 		kfree(vp);
 	}
 
@@ -537,6 +705,9 @@ add_partition(struct mshv_partition *partition)
 	mshv.partitions.count++;
 	mshv.partitions.array[i] = partition;
 
+	if (mshv.partitions.count == 1)
+		hv_setup_mshv_irq(mshv_isr);
+
 out_unlock:
 	spin_unlock_irqrestore(&mshv.partitions.lock, flags);
 
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 672b08f79dae..36412d27ecc7 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -143,6 +143,9 @@ void hv_remove_vmbus_handler(void);
 void hv_setup_stimer0_handler(void (*handler)(void));
 void hv_remove_stimer0_handler(void);
 
+void hv_setup_mshv_irq(void (*handler)(void));
+void hv_remove_mshv_irq(void);
+
 void hv_setup_kexec_handler(void (*handler)(void));
 void hv_remove_kexec_handler(void);
 void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 7709aaa1e064..3933d80294f1 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -8,6 +8,8 @@
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/sched.h>
 #include <uapi/linux/mshv.h>
 
 #define MSHV_MAX_PARTITIONS		128
@@ -18,6 +20,11 @@ struct mshv_vp {
 	u32 index;
 	struct mshv_partition *partition;
 	struct mutex mutex;
+	struct {
+		struct semaphore sem;
+		struct task_struct *task;
+		struct hv_message *intercept_message;
+	} run;
 };
 
 struct mshv_mem_region {
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index 7a4e0c340dd4..229abac9502f 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -55,5 +55,6 @@ struct mshv_vp_registers {
 /* vp device */
 #define MSHV_GET_VP_REGISTERS   _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers)
 #define MSHV_SET_VP_REGISTERS   _IOW(MSHV_IOCTL, 0x06, struct mshv_vp_registers)
+#define MSHV_RUN_VP		_IOR(MSHV_IOCTL, 0x07, struct hv_message)
 
 #endif
-- 
2.23.4


  parent reply	other threads:[~2021-09-28 18:31 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-28 18:30 [PATCH v3 00/19] Microsoft Hypervisor root partition ioctl interface Nuno Das Neves
2021-09-28 18:30 ` [PATCH v3 01/19] x86/hyperv: convert hyperv statuses to linux error codes Nuno Das Neves
2021-09-28 18:30 ` [PATCH v3 02/19] x86/hyperv: convert hyperv statuses to strings Nuno Das Neves
2021-09-28 18:30 ` [PATCH v3 03/19] drivers/hv: minimal mshv module (/dev/mshv/) Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 04/19] drivers/hv: check extension ioctl Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 05/19] drivers/hv: create partition ioctl Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 06/19] drivers/hv: create, initialize, finalize, delete partition hypercalls Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 07/19] drivers/hv: withdraw memory hypercall Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 08/19] drivers/hv: map and unmap guest memory Nuno Das Neves
2021-09-28 21:27   ` Olaf Hering
2021-09-30 14:17     ` Wei Liu
2021-10-04 17:08       ` Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 09/19] drivers/hv: create vcpu ioctl Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 10/19] drivers/hv: get and set vcpu registers ioctls Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 11/19] drivers/hv: set up synic pages for intercept messages Nuno Das Neves
2021-09-28 21:38   ` Olaf Hering
2021-09-28 18:31 ` Nuno Das Neves [this message]
2021-09-28 18:31 ` [PATCH v3 13/19] drivers/hv: install intercept ioctl Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 14/19] drivers/hv: assert interrupt ioctl Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 15/19] drivers/hv: get and set vp state ioctls Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 16/19] drivers/hv: mmap vp register page Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 17/19] drivers/hv: get and set partition property ioctls Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 18/19] drivers/hv: Add enlightenment bits to create partition ioctl Nuno Das Neves
2021-09-28 18:31 ` [PATCH v3 19/19] drivers/hv: Translate GVA to GPA Nuno Das Neves

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1632853875-20261-13-git-send-email-nunodasneves@linux.microsoft.com \
    --to=nunodasneves@linux.microsoft.com \
    --cc=anbelski@linux.microsoft.com \
    --cc=kys@microsoft.com \
    --cc=ligrassi@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=sthemmin@microsoft.com \
    --cc=sunilmut@microsoft.com \
    --cc=viremana@linux.microsoft.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=vkuznets@redhat.com \
    --cc=wei.liu@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).