* [PATCH v3 1/3] Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3
2021-04-16 14:34 [PATCH v3 0/3] Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Andrea Parri (Microsoft)
@ 2021-04-16 14:34 ` Andrea Parri (Microsoft)
2021-04-16 15:07 ` Michael Kelley
2021-04-16 14:34 ` [PATCH v3 2/3] Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Andrea Parri (Microsoft)
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Andrea Parri (Microsoft) @ 2021-04-16 14:34 UTC (permalink / raw)
To: linux-kernel
Cc: kys, haiyangz, sthemmin, wei.liu, linux-hyperv, mikelley,
Andrea Parri (Microsoft)
Hyper-V has added VMBus protocol version 5.3. Allow Linux guests to
negotiate the new version on version of Hyper-V that support it.
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
---
drivers/hv/connection.c | 3 ++-
include/linux/hyperv.h | 2 ++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 350e8c5cafa8c..dc19d5ae4373c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -45,6 +45,7 @@ EXPORT_SYMBOL_GPL(vmbus_proto_version);
* Table of VMBus versions listed from newest to oldest.
*/
static __u32 vmbus_versions[] = {
+ VERSION_WIN10_V5_3,
VERSION_WIN10_V5_2,
VERSION_WIN10_V5_1,
VERSION_WIN10_V5,
@@ -60,7 +61,7 @@ static __u32 vmbus_versions[] = {
* Maximal VMBus protocol version guests can negotiate. Useful to cap the
* VMBus version for testing and debugging purpose.
*/
-static uint max_version = VERSION_WIN10_V5_2;
+static uint max_version = VERSION_WIN10_V5_3;
module_param(max_version, uint, S_IRUGO);
MODULE_PARM_DESC(max_version,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2c18c8e768efe..3ce36bbb398e9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -234,6 +234,7 @@ static inline u32 hv_get_avail_to_write_percent(
* 5 . 0 (Newer Windows 10)
* 5 . 1 (Windows 10 RS4)
* 5 . 2 (Windows Server 2019, RS5)
+ * 5 . 3 (Windows Server 2022)
*/
#define VERSION_WS2008 ((0 << 16) | (13))
@@ -245,6 +246,7 @@ static inline u32 hv_get_avail_to_write_percent(
#define VERSION_WIN10_V5 ((5 << 16) | (0))
#define VERSION_WIN10_V5_1 ((5 << 16) | (1))
#define VERSION_WIN10_V5_2 ((5 << 16) | (2))
+#define VERSION_WIN10_V5_3 ((5 << 16) | (3))
/* Make maximum size of pipe payload of 16K */
#define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384)
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 2/3] Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE
2021-04-16 14:34 [PATCH v3 0/3] Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Andrea Parri (Microsoft)
2021-04-16 14:34 ` [PATCH v3 1/3] Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3 Andrea Parri (Microsoft)
@ 2021-04-16 14:34 ` Andrea Parri (Microsoft)
2021-04-16 14:34 ` [PATCH v3 3/3] Drivers: hv: vmbus: Check for pending channel interrupts before taking a CPU offline Andrea Parri (Microsoft)
2021-04-18 13:04 ` [PATCH v3 0/3] Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Wei Liu
3 siblings, 0 replies; 7+ messages in thread
From: Andrea Parri (Microsoft) @ 2021-04-16 14:34 UTC (permalink / raw)
To: linux-kernel
Cc: kys, haiyangz, sthemmin, wei.liu, linux-hyperv, mikelley,
Andrea Parri (Microsoft)
Introduce the CHANNELMSG_MODIFYCHANNEL_RESPONSE message type, and code
to receive and process such a message.
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/hv/channel.c | 99 ++++++++++++++++++++++++++++++++-------
drivers/hv/channel_mgmt.c | 42 +++++++++++++++++
drivers/hv/hv_trace.h | 15 ++++++
drivers/hv/vmbus_drv.c | 4 +-
include/linux/hyperv.h | 11 ++++-
5 files changed, 152 insertions(+), 19 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index db30be8f9ccea..aa4ef75d8dee2 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -209,31 +209,96 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
}
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
+static int send_modifychannel_without_ack(struct vmbus_channel *channel, u32 target_vp)
+{
+ struct vmbus_channel_modifychannel msg;
+ int ret;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+ msg.child_relid = channel->offermsg.child_relid;
+ msg.target_vp = target_vp;
+
+ ret = vmbus_post_msg(&msg, sizeof(msg), true);
+ trace_vmbus_send_modifychannel(&msg, ret);
+
+ return ret;
+}
+
+static int send_modifychannel_with_ack(struct vmbus_channel *channel, u32 target_vp)
+{
+ struct vmbus_channel_modifychannel *msg;
+ struct vmbus_channel_msginfo *info;
+ unsigned long flags;
+ int ret;
+
+ info = kzalloc(sizeof(struct vmbus_channel_msginfo) +
+ sizeof(struct vmbus_channel_modifychannel),
+ GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ init_completion(&info->waitevent);
+ info->waiting_channel = channel;
+
+ msg = (struct vmbus_channel_modifychannel *)info->msg;
+ msg->header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+ msg->child_relid = channel->offermsg.child_relid;
+ msg->target_vp = target_vp;
+
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ list_add_tail(&info->msglistentry, &vmbus_connection.chn_msg_list);
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+ ret = vmbus_post_msg(msg, sizeof(*msg), true);
+ trace_vmbus_send_modifychannel(msg, ret);
+ if (ret != 0) {
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ list_del(&info->msglistentry);
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+ goto free_info;
+ }
+
+ /*
+ * Release channel_mutex; otherwise, vmbus_onoffer_rescind() could block on
+ * the mutex and be unable to signal the completion.
+ *
+ * See the caller target_cpu_store() for information about the usage of the
+ * mutex.
+ */
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ wait_for_completion(&info->waitevent);
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ list_del(&info->msglistentry);
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+ if (info->response.modify_response.status)
+ ret = -EAGAIN;
+
+free_info:
+ kfree(info);
+ return ret;
+}
+
/*
* Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
*
- * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. Also, Hyper-V does not
- * ACK such messages. IOW we can't know when the host will stop interrupting
- * the "old" vCPU and start interrupting the "new" vCPU for the given channel.
+ * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. When VMbus version 5.3
+ * or later is negotiated, Hyper-V always sends an ACK in response to such a
+ * message. For VMbus version 5.2 and earlier, it never sends an ACK. With-
+ * out an ACK, we can not know when the host will stop interrupting the "old"
+ * vCPU and start interrupting the "new" vCPU for the given channel.
*
* The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
* VERSION_WIN10_V4_1.
*/
-int vmbus_send_modifychannel(u32 child_relid, u32 target_vp)
+int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp)
{
- struct vmbus_channel_modifychannel conn_msg;
- int ret;
-
- memset(&conn_msg, 0, sizeof(conn_msg));
- conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
- conn_msg.child_relid = child_relid;
- conn_msg.target_vp = target_vp;
-
- ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
-
- trace_vmbus_send_modifychannel(&conn_msg, ret);
-
- return ret;
+ if (vmbus_proto_version >= VERSION_WIN10_V5_3)
+ return send_modifychannel_with_ack(channel, target_vp);
+ return send_modifychannel_without_ack(channel, target_vp);
}
EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f3cf4af01e102..4c9e45d1f462c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -1311,6 +1311,46 @@ static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
}
+/*
+ * vmbus_onmodifychannel_response - Modify Channel response handler.
+ *
+ * This is invoked when we received a response to our channel modify request.
+ * Find the matching request, copy the response and signal the requesting thread.
+ */
+static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
+{
+ struct vmbus_channel_modifychannel_response *response;
+ struct vmbus_channel_msginfo *msginfo;
+ unsigned long flags;
+
+ response = (struct vmbus_channel_modifychannel_response *)hdr;
+
+ trace_vmbus_onmodifychannel_response(response);
+
+ /*
+ * Find the modify msg, copy the response and signal/unblock the wait event.
+ */
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+
+ list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
+ struct vmbus_channel_message_header *responseheader =
+ (struct vmbus_channel_message_header *)msginfo->msg;
+
+ if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
+ struct vmbus_channel_modifychannel *modifymsg;
+
+ modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
+ if (modifymsg->child_relid == response->child_relid) {
+ memcpy(&msginfo->response.modify_response, response,
+ sizeof(*response));
+ complete(&msginfo->waitevent);
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+}
+
/*
* vmbus_ongpadl_torndown - GPADL torndown handler.
*
@@ -1428,6 +1468,8 @@ channel_message_table[CHANNELMSG_COUNT] = {
{ CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0},
{ CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0},
{ CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0},
+ { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response,
+ sizeof(struct vmbus_channel_modifychannel_response)},
};
/*
diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
index 6063bb21bb137..c02a1719e92f2 100644
--- a/drivers/hv/hv_trace.h
+++ b/drivers/hv/hv_trace.h
@@ -103,6 +103,21 @@ TRACE_EVENT(vmbus_ongpadl_created,
)
);
+TRACE_EVENT(vmbus_onmodifychannel_response,
+ TP_PROTO(const struct vmbus_channel_modifychannel_response *response),
+ TP_ARGS(response),
+ TP_STRUCT__entry(
+ __field(u32, child_relid)
+ __field(u32, status)
+ ),
+ TP_fast_assign(__entry->child_relid = response->child_relid;
+ __entry->status = response->status;
+ ),
+ TP_printk("child_relid 0x%x, status %d",
+ __entry->child_relid, __entry->status
+ )
+ );
+
TRACE_EVENT(vmbus_ongpadl_torndown,
TP_PROTO(const struct vmbus_channel_gpadl_torndown *gpadltorndown),
TP_ARGS(gpadltorndown),
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 51c40d5e3c8ac..b12d6827b222b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1848,13 +1848,15 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (target_cpu == origin_cpu)
goto cpu_store_unlock;
- if (vmbus_send_modifychannel(channel->offermsg.child_relid,
+ if (vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu))) {
ret = -EIO;
goto cpu_store_unlock;
}
/*
+ * For version before VERSION_WIN10_V5_3, the following warning holds:
+ *
* Warning. At this point, there is *no* guarantee that the host will
* have successfully processed the vmbus_send_modifychannel() request.
* See the header comment of vmbus_send_modifychannel() for more info.
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3ce36bbb398e9..9c2373a1cb2d1 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -477,6 +477,7 @@ enum vmbus_channel_message_type {
CHANNELMSG_TL_CONNECT_REQUEST = 21,
CHANNELMSG_MODIFYCHANNEL = 22,
CHANNELMSG_TL_CONNECT_RESULT = 23,
+ CHANNELMSG_MODIFYCHANNEL_RESPONSE = 24,
CHANNELMSG_COUNT
};
@@ -590,6 +591,13 @@ struct vmbus_channel_open_result {
u32 status;
} __packed;
+/* Modify Channel Result parameters */
+struct vmbus_channel_modifychannel_response {
+ struct vmbus_channel_message_header header;
+ u32 child_relid;
+ u32 status;
+} __packed;
+
/* Close channel parameters; */
struct vmbus_channel_close_channel {
struct vmbus_channel_message_header header;
@@ -722,6 +730,7 @@ struct vmbus_channel_msginfo {
struct vmbus_channel_gpadl_torndown gpadl_torndown;
struct vmbus_channel_gpadl_created gpadl_created;
struct vmbus_channel_version_response version_response;
+ struct vmbus_channel_modifychannel_response modify_response;
} response;
u32 msgsize;
@@ -1596,7 +1605,7 @@ extern __u32 vmbus_proto_version;
int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
const guid_t *shv_host_servie_id);
-int vmbus_send_modifychannel(u32 child_relid, u32 target_vp);
+int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
void vmbus_set_event(struct vmbus_channel *channel);
/* Get the start of the ring buffer. */
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 3/3] Drivers: hv: vmbus: Check for pending channel interrupts before taking a CPU offline
2021-04-16 14:34 [PATCH v3 0/3] Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Andrea Parri (Microsoft)
2021-04-16 14:34 ` [PATCH v3 1/3] Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3 Andrea Parri (Microsoft)
2021-04-16 14:34 ` [PATCH v3 2/3] Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Andrea Parri (Microsoft)
@ 2021-04-16 14:34 ` Andrea Parri (Microsoft)
2021-04-16 15:11 ` Michael Kelley
2021-04-18 13:04 ` [PATCH v3 0/3] Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Wei Liu
3 siblings, 1 reply; 7+ messages in thread
From: Andrea Parri (Microsoft) @ 2021-04-16 14:34 UTC (permalink / raw)
To: linux-kernel
Cc: kys, haiyangz, sthemmin, wei.liu, linux-hyperv, mikelley,
Andrea Parri (Microsoft)
Check that enough time has passed such that the modify channel message
has been processed before taking a CPU offline.
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
---
drivers/hv/hv.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 53 insertions(+), 3 deletions(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 3e6ff83adff42..e0c522d143a37 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -15,6 +15,7 @@
#include <linux/hyperv.h>
#include <linux/random.h>
#include <linux/clockchips.h>
+#include <linux/delay.h>
#include <linux/interrupt.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
@@ -292,12 +293,50 @@ void hv_synic_disable_regs(unsigned int cpu)
disable_percpu_irq(vmbus_irq);
}
+#define HV_MAX_TRIES 3
+/*
+ * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one
+ * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times.
+ * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
+ *
+ * If a bit is set, that means there is a pending channel interrupt. The expectation is
+ * that the normal interrupt handling mechanism will find and process the channel interrupt
+ * "very soon", and in the process clear the bit.
+ */
+static bool hv_synic_event_pending(void)
+{
+ struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
+ union hv_synic_event_flags *event =
+ (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
+ unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
+ bool pending;
+ u32 relid;
+ int tries = 0;
+
+retry:
+ pending = false;
+ for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
+ /* Special case - VMBus channel protocol messages */
+ if (relid == 0)
+ continue;
+ pending = true;
+ break;
+ }
+ if (pending && tries++ < HV_MAX_TRIES) {
+ usleep_range(10000, 20000);
+ goto retry;
+ }
+ return pending;
+}
int hv_synic_cleanup(unsigned int cpu)
{
struct vmbus_channel *channel, *sc;
bool channel_found = false;
+ if (vmbus_connection.conn_state != CONNECTED)
+ goto always_cleanup;
+
/*
* Hyper-V does not provide a way to change the connect CPU once
* it is set; we must prevent the connect CPU from going offline
@@ -305,8 +344,7 @@ int hv_synic_cleanup(unsigned int cpu)
* path where the vmbus is already disconnected, the CPU must be
* allowed to shut down.
*/
- if (cpu == VMBUS_CONNECT_CPU &&
- vmbus_connection.conn_state == CONNECTED)
+ if (cpu == VMBUS_CONNECT_CPU)
return -EBUSY;
/*
@@ -333,9 +371,21 @@ int hv_synic_cleanup(unsigned int cpu)
}
mutex_unlock(&vmbus_connection.channel_mutex);
- if (channel_found && vmbus_connection.conn_state == CONNECTED)
+ if (channel_found)
+ return -EBUSY;
+
+ /*
+ * channel_found == false means that any channels that were previously
+ * assigned to the CPU have been reassigned elsewhere with a call of
+ * vmbus_send_modifychannel(). Scan the event flags page looking for
+ * bits that are set and waiting with a timeout for vmbus_chan_sched()
+ * to process such bits. If bits are still set after this operation
+ * and VMBus is connected, fail the CPU offlining operation.
+ */
+ if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
return -EBUSY;
+always_cleanup:
hv_stimer_legacy_cleanup(cpu);
hv_synic_disable_regs(cpu);
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread