linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 4/4] hv_utils: Add the support of hibernation
@ 2020-01-13  6:32 Dexuan Cui
  2020-01-22 18:27 ` Michael Kelley
  0 siblings, 1 reply; 4+ messages in thread
From: Dexuan Cui @ 2020-01-13  6:32 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	Sasha Levin, linux-hyperv, Michael Kelley, vkuznets,
	linux-kernel


Add util_pre_suspend() and util_pre_resume() for some hv_utils devices
(e.g. kvp/vss/fcopy), because they need special handling before
util_suspend() calls vmbus_close().

For kvp, all the possible pending work items should be cancelled.

For vss and fcopy, extra clean-up needs to be done, i.e. fake a THAW
message for hv_vss_daemon and fake a CANCEL_FCOPY message for
hv_fcopy_daemonemon, otherwise when the VM resums back, the daemons
can end up in an inconsistent state (i.e. the file systems are
frozen but will never be thawed; the file transmitted via fcopy
may not be complete). Note: there is an extra patch for the daemons:
"Tools: hv: Reopen the devices if read() or write() returns errors",
because the hv_utils driver can not guarantee the whole transaction
finishes completely once util_suspend() starts to run (at this time,
all the userspace processes are frozen).

util_probe() disables channel->callback_event to avoid the race with
the the channel callback.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/hv_fcopy.c     | 58 ++++++++++++++++++++++++++++++++++++-
 drivers/hv/hv_kvp.c       | 44 ++++++++++++++++++++++++++--
 drivers/hv/hv_snapshot.c  | 60 +++++++++++++++++++++++++++++++++++++--
 drivers/hv/hv_util.c      | 60 ++++++++++++++++++++++++++++++++++++++-
 drivers/hv/hyperv_vmbus.h |  6 ++++
 include/linux/hyperv.h    |  2 ++
 6 files changed, 224 insertions(+), 6 deletions(-)

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 08fa4a5de644..d63853f16356 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -346,9 +346,65 @@ int hv_fcopy_init(struct hv_util_service *srv)
 	return 0;
 }
 
+static void hv_fcopy_cancel_work(void)
+{
+	cancel_delayed_work_sync(&fcopy_timeout_work);
+	cancel_work_sync(&fcopy_send_work);
+}
+
+int hv_fcopy_pre_suspend(void)
+{
+	struct vmbus_channel *channel = fcopy_transaction.recv_channel;
+	struct hv_fcopy_hdr *fcopy_msg;
+
+	tasklet_disable(&channel->callback_event);
+
+	/*
+	 * Fake a CANCEL_FCOPY message to the user space daemon in case the
+	 * daemon is in the middle of copying some file. It doesn't matter if
+	 * there is already a message pending to be delivered to the user
+	 * space: we force fcopy_transaction.state to be HVUTIL_READY, so the
+	 * user space daemon's write() will fail with -EINVAL (see
+	 * fcopy_on_msg()), and the daemon will reset the device by closing and
+	 * re-opening it.
+	 */
+	fcopy_msg = kzalloc(sizeof(*fcopy_msg), GFP_KERNEL);
+	if (!fcopy_msg)
+		goto err;
+
+	fcopy_msg->operation = CANCEL_FCOPY;
+
+	hv_fcopy_cancel_work();
+
+	/* We don't care about the return value. */
+	hvutil_transport_send(hvt, fcopy_msg, sizeof(*fcopy_msg), NULL);
+
+	kfree(fcopy_msg);
+
+	fcopy_transaction.state = HVUTIL_READY;
+
+	/* tasklet_enable() will be called in hv_fcopy_pre_resume(). */
+
+	return 0;
+err:
+	tasklet_enable(&channel->callback_event);
+	return -ENOMEM;
+}
+
+int hv_fcopy_pre_resume(void)
+{
+	struct vmbus_channel *channel = fcopy_transaction.recv_channel;
+
+	tasklet_enable(&channel->callback_event);
+
+	return 0;
+}
+
 void hv_fcopy_deinit(void)
 {
 	fcopy_transaction.state = HVUTIL_DEVICE_DYING;
-	cancel_delayed_work_sync(&fcopy_timeout_work);
+
+	hv_fcopy_cancel_work();
+
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index ae7c028dc5a8..ca03f68df5d0 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -758,11 +758,51 @@ hv_kvp_init(struct hv_util_service *srv)
 	return 0;
 }
 
-void hv_kvp_deinit(void)
+static void hv_kvp_cancel_work(void)
 {
-	kvp_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&kvp_host_handshake_work);
 	cancel_delayed_work_sync(&kvp_timeout_work);
 	cancel_work_sync(&kvp_sendkey_work);
+}
+
+int hv_kvp_pre_suspend(void)
+{
+	struct vmbus_channel *channel = kvp_transaction.recv_channel;
+
+	tasklet_disable(&channel->callback_event);
+
+	/*
+	 * If there is a pending transtion, it's unnecessary to tell the host
+	 * that the tranction will fail, becasue that is implied when
+	 * util_suspend() calls vmbus_close() later.
+	 */
+	hv_kvp_cancel_work();
+
+	/*
+	 * Forece the state to READY to handle the ICMSGTYPE_NEGOTIATE message
+	 * later. The user space daemon may go out of order and its write()
+	 * may get an EINVAL error: this doesn't matter since the daemon will
+	 * reset the device by closing and re-opening the device.
+	 */
+	kvp_transaction.state = HVUTIL_READY;
+
+	return 0;
+}
+
+int hv_kvp_pre_resume(void)
+{
+	struct vmbus_channel *channel = kvp_transaction.recv_channel;
+
+	tasklet_enable(&channel->callback_event);
+
+	return 0;
+}
+
+void hv_kvp_deinit(void)
+{
+	kvp_transaction.state = HVUTIL_DEVICE_DYING;
+
+	hv_kvp_cancel_work();
+
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 03b6454268b3..eb766ff8841b 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -229,6 +229,7 @@ static void vss_handle_request(struct work_struct *dummy)
 		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
 		vss_send_op();
 		return;
+
 	case VSS_OP_GET_DM_INFO:
 		vss_transaction.msg->dm_info.flags = 0;
 		break;
@@ -379,10 +380,65 @@ hv_vss_init(struct hv_util_service *srv)
 	return 0;
 }
 
-void hv_vss_deinit(void)
+static void hv_vss_cancel_work(void)
 {
-	vss_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&vss_timeout_work);
 	cancel_work_sync(&vss_handle_request_work);
+}
+
+int hv_vss_pre_suspend(void)
+{
+	struct vmbus_channel *channel = vss_transaction.recv_channel;
+	struct hv_vss_msg *vss_msg;
+
+	tasklet_disable(&channel->callback_event);
+
+	/*
+	 * Fake a THAW message for the user space daemon in case the daemon
+	 * has frozen the file systems. It doesn't matter if there is already
+	 * a message pending to be delivered to the user space: we force
+	 * vss_transaction.state to be HVUTIL_READY, so the user space daemon's
+	 * write() will fail with -EINVAL (see vss_on_msg()), and the daemon
+	 * will reset the device by closing and re-opening it.
+	 */
+	vss_msg = kzalloc(sizeof(*vss_msg), GFP_KERNEL);
+	if (!vss_msg)
+		goto err;
+
+	vss_msg->vss_hdr.operation = VSS_OP_THAW;
+
+	/* Cancel any possible pending work. */
+	hv_vss_cancel_work();
+
+	/* We don't care about the return value. */
+	hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
+
+	kfree(vss_msg);
+
+	vss_transaction.state = HVUTIL_READY;
+
+	/* tasklet_enable() will be called in hv_vss_pre_resume(). */
+
+	return 0;
+err:
+	tasklet_enable(&channel->callback_event);
+	return -ENOMEM;
+}
+
+int hv_vss_pre_resume(void)
+{
+	struct vmbus_channel *channel = vss_transaction.recv_channel;
+
+	tasklet_enable(&channel->callback_event);
+
+	return 0;
+}
+
+void hv_vss_deinit(void)
+{
+	vss_transaction.state = HVUTIL_DEVICE_DYING;
+
+	hv_vss_cancel_work();
+
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index d5216af62788..255faa3d657c 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -123,12 +123,14 @@ static struct hv_util_service util_shutdown = {
 };
 
 static int hv_timesync_init(struct hv_util_service *srv);
+static int hv_timesync_pre_suspend(void);
 static void hv_timesync_deinit(void);
 
 static void timesync_onchannelcallback(void *context);
 static struct hv_util_service util_timesynch = {
 	.util_cb = timesync_onchannelcallback,
 	.util_init = hv_timesync_init,
+	.util_pre_suspend = hv_timesync_pre_suspend,
 	.util_deinit = hv_timesync_deinit,
 };
 
@@ -140,18 +142,24 @@ static struct hv_util_service util_heartbeat = {
 static struct hv_util_service util_kvp = {
 	.util_cb = hv_kvp_onchannelcallback,
 	.util_init = hv_kvp_init,
+	.util_pre_suspend = hv_kvp_pre_suspend,
+	.util_pre_resume = hv_kvp_pre_resume,
 	.util_deinit = hv_kvp_deinit,
 };
 
 static struct hv_util_service util_vss = {
 	.util_cb = hv_vss_onchannelcallback,
 	.util_init = hv_vss_init,
+	.util_pre_suspend = hv_vss_pre_suspend,
+	.util_pre_resume = hv_vss_pre_resume,
 	.util_deinit = hv_vss_deinit,
 };
 
 static struct hv_util_service util_fcopy = {
 	.util_cb = hv_fcopy_onchannelcallback,
 	.util_init = hv_fcopy_init,
+	.util_pre_suspend = hv_fcopy_pre_suspend,
+	.util_pre_resume = hv_fcopy_pre_resume,
 	.util_deinit = hv_fcopy_deinit,
 };
 
@@ -512,6 +520,41 @@ static int util_remove(struct hv_device *dev)
 	return 0;
 }
 
+static int util_suspend(struct hv_device *dev)
+{
+	struct hv_util_service *srv = hv_get_drvdata(dev);
+	int ret = 0;
+
+	if (srv->util_pre_suspend) {
+		ret = srv->util_pre_suspend();
+
+		if (ret)
+			return ret;
+	}
+
+	vmbus_close(dev->channel);
+
+	return 0;
+}
+
+static int util_resume(struct hv_device *dev)
+{
+	struct hv_util_service *srv = hv_get_drvdata(dev);
+	int ret = 0;
+
+	if (srv->util_pre_resume) {
+		ret = srv->util_pre_resume();
+
+		if (ret)
+			return ret;
+	}
+
+	ret = vmbus_open(dev->channel, 4 * HV_HYP_PAGE_SIZE,
+			 4 * HV_HYP_PAGE_SIZE, NULL, 0, srv->util_cb,
+			 dev->channel);
+	return ret;
+}
+
 static const struct hv_vmbus_device_id id_table[] = {
 	/* Shutdown guid */
 	{ HV_SHUTDOWN_GUID,
@@ -548,6 +591,8 @@ static  struct hv_driver util_drv = {
 	.id_table = id_table,
 	.probe =  util_probe,
 	.remove =  util_remove,
+	.suspend = util_suspend,
+	.resume =  util_resume,
 	.driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
@@ -617,11 +662,24 @@ static int hv_timesync_init(struct hv_util_service *srv)
 	return 0;
 }
 
+static void hv_timesync_cancel_work(void)
+{
+	cancel_work_sync(&adj_time_work);
+}
+
+static int hv_timesync_pre_suspend(void)
+{
+	hv_timesync_cancel_work();
+
+	return 0;
+}
+
 static void hv_timesync_deinit(void)
 {
 	if (hv_ptp_clock)
 		ptp_clock_unregister(hv_ptp_clock);
-	cancel_work_sync(&adj_time_work);
+
+	hv_timesync_cancel_work();
 }
 
 static int __init init_hyperv_utils(void)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 20edcfd3b96c..f5fa3b3c9baf 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -352,14 +352,20 @@ void vmbus_on_msg_dpc(unsigned long data);
 
 int hv_kvp_init(struct hv_util_service *srv);
 void hv_kvp_deinit(void);
+int hv_kvp_pre_suspend(void);
+int hv_kvp_pre_resume(void);
 void hv_kvp_onchannelcallback(void *context);
 
 int hv_vss_init(struct hv_util_service *srv);
 void hv_vss_deinit(void);
+int hv_vss_pre_suspend(void);
+int hv_vss_pre_resume(void);
 void hv_vss_onchannelcallback(void *context);
 
 int hv_fcopy_init(struct hv_util_service *srv);
 void hv_fcopy_deinit(void);
+int hv_fcopy_pre_suspend(void);
+int hv_fcopy_pre_resume(void);
 void hv_fcopy_onchannelcallback(void *context);
 void vmbus_initiate_unload(bool crash);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 41c58011431e..692c89ccf5df 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1435,6 +1435,8 @@ struct hv_util_service {
 	void (*util_cb)(void *);
 	int (*util_init)(struct hv_util_service *);
 	void (*util_deinit)(void);
+	int (*util_pre_suspend)(void);
+	int (*util_pre_resume)(void);
 };
 
 struct vmbuspipe_hdr {
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH v2 4/4] hv_utils: Add the support of hibernation
  2020-01-13  6:32 [PATCH v2 4/4] hv_utils: Add the support of hibernation Dexuan Cui
@ 2020-01-22 18:27 ` Michael Kelley
  2020-01-23  8:11   ` Dexuan Cui
  0 siblings, 1 reply; 4+ messages in thread
From: Michael Kelley @ 2020-01-22 18:27 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, Sasha Levin, linux-hyperv, vkuznets, linux-kernel

From: Dexuan Cui <decui@microsoft.com> Sent: Sunday, January 12, 2020 10:32 PM
> 
> Add util_pre_suspend() and util_pre_resume() for some hv_utils devices
> (e.g. kvp/vss/fcopy), because they need special handling before
> util_suspend() calls vmbus_close().
> 
> For kvp, all the possible pending work items should be cancelled.
> 
> For vss and fcopy, extra clean-up needs to be done, i.e. fake a THAW
> message for hv_vss_daemon and fake a CANCEL_FCOPY message for
> hv_fcopy_daemonemon, otherwise when the VM resums back, the daemons
> can end up in an inconsistent state (i.e. the file systems are
> frozen but will never be thawed; the file transmitted via fcopy
> may not be complete). Note: there is an extra patch for the daemons:
> "Tools: hv: Reopen the devices if read() or write() returns errors",
> because the hv_utils driver can not guarantee the whole transaction
> finishes completely once util_suspend() starts to run (at this time,
> all the userspace processes are frozen).
> 
> util_probe() disables channel->callback_event to avoid the race with
> the the channel callback.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/hv_fcopy.c     | 58 ++++++++++++++++++++++++++++++++++++-
>  drivers/hv/hv_kvp.c       | 44 ++++++++++++++++++++++++++--
>  drivers/hv/hv_snapshot.c  | 60 +++++++++++++++++++++++++++++++++++++--
>  drivers/hv/hv_util.c      | 60 ++++++++++++++++++++++++++++++++++++++-
>  drivers/hv/hyperv_vmbus.h |  6 ++++
>  include/linux/hyperv.h    |  2 ++
>  6 files changed, 224 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
> index 08fa4a5de644..d63853f16356 100644
> --- a/drivers/hv/hv_fcopy.c
> +++ b/drivers/hv/hv_fcopy.c
> @@ -346,9 +346,65 @@ int hv_fcopy_init(struct hv_util_service *srv)
>  	return 0;
>  }
> 
> +static void hv_fcopy_cancel_work(void)
> +{
> +	cancel_delayed_work_sync(&fcopy_timeout_work);
> +	cancel_work_sync(&fcopy_send_work);
> +}
> +
> +int hv_fcopy_pre_suspend(void)
> +{
> +	struct vmbus_channel *channel = fcopy_transaction.recv_channel;
> +	struct hv_fcopy_hdr *fcopy_msg;
> +
> +	tasklet_disable(&channel->callback_event);
> +
> +	/*
> +	 * Fake a CANCEL_FCOPY message to the user space daemon in case the
> +	 * daemon is in the middle of copying some file. It doesn't matter if
> +	 * there is already a message pending to be delivered to the user
> +	 * space: we force fcopy_transaction.state to be HVUTIL_READY, so the
> +	 * user space daemon's write() will fail with -EINVAL (see
> +	 * fcopy_on_msg()), and the daemon will reset the device by closing and
> +	 * re-opening it.
> +	 */
> +	fcopy_msg = kzalloc(sizeof(*fcopy_msg), GFP_KERNEL);
> +	if (!fcopy_msg)
> +		goto err;
> +
> +	fcopy_msg->operation = CANCEL_FCOPY;
> +
> +	hv_fcopy_cancel_work();
> +
> +	/* We don't care about the return value. */
> +	hvutil_transport_send(hvt, fcopy_msg, sizeof(*fcopy_msg), NULL);
> +
> +	kfree(fcopy_msg);
> +
> +	fcopy_transaction.state = HVUTIL_READY;

Is the ordering correct here?  It seems like the fcopy daemon could receive
the cancel message and do the write before the state is forced to
HVUTIL_READY.

> +
> +	/* tasklet_enable() will be called in hv_fcopy_pre_resume(). */
> +
> +	return 0;
> +err:
> +	tasklet_enable(&channel->callback_event);

A nit, but if you did the memory allocation first, you could return -ENOMEM
immediately on error and avoid the err: label and re-enabling the tasklet.

> +	return -ENOMEM;
> +}
> +
> +int hv_fcopy_pre_resume(void)
> +{
> +	struct vmbus_channel *channel = fcopy_transaction.recv_channel;
> +
> +	tasklet_enable(&channel->callback_event);
> +
> +	return 0;
> +}
> +
>  void hv_fcopy_deinit(void)
>  {
>  	fcopy_transaction.state = HVUTIL_DEVICE_DYING;
> -	cancel_delayed_work_sync(&fcopy_timeout_work);
> +
> +	hv_fcopy_cancel_work();
> +
>  	hvutil_transport_destroy(hvt);
>  }
> diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
> index ae7c028dc5a8..ca03f68df5d0 100644
> --- a/drivers/hv/hv_kvp.c
> +++ b/drivers/hv/hv_kvp.c
> @@ -758,11 +758,51 @@ hv_kvp_init(struct hv_util_service *srv)
>  	return 0;
>  }
> 
> -void hv_kvp_deinit(void)
> +static void hv_kvp_cancel_work(void)
>  {
> -	kvp_transaction.state = HVUTIL_DEVICE_DYING;
>  	cancel_delayed_work_sync(&kvp_host_handshake_work);
>  	cancel_delayed_work_sync(&kvp_timeout_work);
>  	cancel_work_sync(&kvp_sendkey_work);
> +}
> +
> +int hv_kvp_pre_suspend(void)
> +{
> +	struct vmbus_channel *channel = kvp_transaction.recv_channel;
> +
> +	tasklet_disable(&channel->callback_event);
> +
> +	/*
> +	 * If there is a pending transtion, it's unnecessary to tell the host
> +	 * that the tranction will fail, becasue that is implied when
> +	 * util_suspend() calls vmbus_close() later.
> +	 */
> +	hv_kvp_cancel_work();
> +
> +	/*
> +	 * Forece the state to READY to handle the ICMSGTYPE_NEGOTIATE message
> +	 * later. The user space daemon may go out of order and its write()
> +	 * may get an EINVAL error: this doesn't matter since the daemon will
> +	 * reset the device by closing and re-opening the device.
> +	 */
> +	kvp_transaction.state = HVUTIL_READY;
> +
> +	return 0;
> +}
> +
> +int hv_kvp_pre_resume(void)
> +{
> +	struct vmbus_channel *channel = kvp_transaction.recv_channel;
> +
> +	tasklet_enable(&channel->callback_event);
> +
> +	return 0;
> +}
> +
> +void hv_kvp_deinit(void)
> +{
> +	kvp_transaction.state = HVUTIL_DEVICE_DYING;
> +
> +	hv_kvp_cancel_work();
> +
>  	hvutil_transport_destroy(hvt);
>  }
> diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
> index 03b6454268b3..eb766ff8841b 100644
> --- a/drivers/hv/hv_snapshot.c
> +++ b/drivers/hv/hv_snapshot.c
> @@ -229,6 +229,7 @@ static void vss_handle_request(struct work_struct *dummy)
>  		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
>  		vss_send_op();
>  		return;
> +

Gratuitous blank line added?

>  	case VSS_OP_GET_DM_INFO:
>  		vss_transaction.msg->dm_info.flags = 0;
>  		break;
> @@ -379,10 +380,65 @@ hv_vss_init(struct hv_util_service *srv)
>  	return 0;
>  }
> 
> -void hv_vss_deinit(void)
> +static void hv_vss_cancel_work(void)
>  {
> -	vss_transaction.state = HVUTIL_DEVICE_DYING;
>  	cancel_delayed_work_sync(&vss_timeout_work);
>  	cancel_work_sync(&vss_handle_request_work);
> +}
> +
> +int hv_vss_pre_suspend(void)
> +{
> +	struct vmbus_channel *channel = vss_transaction.recv_channel;
> +	struct hv_vss_msg *vss_msg;
> +
> +	tasklet_disable(&channel->callback_event);
> +
> +	/*
> +	 * Fake a THAW message for the user space daemon in case the daemon
> +	 * has frozen the file systems. It doesn't matter if there is already
> +	 * a message pending to be delivered to the user space: we force
> +	 * vss_transaction.state to be HVUTIL_READY, so the user space daemon's
> +	 * write() will fail with -EINVAL (see vss_on_msg()), and the daemon
> +	 * will reset the device by closing and re-opening it.
> +	 */
> +	vss_msg = kzalloc(sizeof(*vss_msg), GFP_KERNEL);
> +	if (!vss_msg)
> +		goto err;
> +
> +	vss_msg->vss_hdr.operation = VSS_OP_THAW;
> +
> +	/* Cancel any possible pending work. */
> +	hv_vss_cancel_work();
> +
> +	/* We don't care about the return value. */
> +	hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
> +
> +	kfree(vss_msg);
> +
> +	vss_transaction.state = HVUTIL_READY;

Same comment about the ordering.

> +
> +	/* tasklet_enable() will be called in hv_vss_pre_resume(). */
> +
> +	return 0;
> +err:
> +	tasklet_enable(&channel->callback_event);
> +	return -ENOMEM;

Same comment about simplifying the error handling applies here.

> +}
> +
> +int hv_vss_pre_resume(void)
> +{
> +	struct vmbus_channel *channel = vss_transaction.recv_channel;
> +
> +	tasklet_enable(&channel->callback_event);
> +
> +	return 0;
> +}
> +
> +void hv_vss_deinit(void)
> +{
> +	vss_transaction.state = HVUTIL_DEVICE_DYING;
> +
> +	hv_vss_cancel_work();
> +
>  	hvutil_transport_destroy(hvt);
>  }
> diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
> index d5216af62788..255faa3d657c 100644
> --- a/drivers/hv/hv_util.c
> +++ b/drivers/hv/hv_util.c
> @@ -123,12 +123,14 @@ static struct hv_util_service util_shutdown = {
>  };
> 
>  static int hv_timesync_init(struct hv_util_service *srv);
> +static int hv_timesync_pre_suspend(void);
>  static void hv_timesync_deinit(void);
> 
>  static void timesync_onchannelcallback(void *context);
>  static struct hv_util_service util_timesynch = {
>  	.util_cb = timesync_onchannelcallback,
>  	.util_init = hv_timesync_init,
> +	.util_pre_suspend = hv_timesync_pre_suspend,
>  	.util_deinit = hv_timesync_deinit,
>  };
> 
> @@ -140,18 +142,24 @@ static struct hv_util_service util_heartbeat = {
>  static struct hv_util_service util_kvp = {
>  	.util_cb = hv_kvp_onchannelcallback,
>  	.util_init = hv_kvp_init,
> +	.util_pre_suspend = hv_kvp_pre_suspend,
> +	.util_pre_resume = hv_kvp_pre_resume,
>  	.util_deinit = hv_kvp_deinit,
>  };
> 
>  static struct hv_util_service util_vss = {
>  	.util_cb = hv_vss_onchannelcallback,
>  	.util_init = hv_vss_init,
> +	.util_pre_suspend = hv_vss_pre_suspend,
> +	.util_pre_resume = hv_vss_pre_resume,
>  	.util_deinit = hv_vss_deinit,
>  };
> 
>  static struct hv_util_service util_fcopy = {
>  	.util_cb = hv_fcopy_onchannelcallback,
>  	.util_init = hv_fcopy_init,
> +	.util_pre_suspend = hv_fcopy_pre_suspend,
> +	.util_pre_resume = hv_fcopy_pre_resume,
>  	.util_deinit = hv_fcopy_deinit,
>  };
> 
> @@ -512,6 +520,41 @@ static int util_remove(struct hv_device *dev)
>  	return 0;
>  }
> 
> +static int util_suspend(struct hv_device *dev)
> +{
> +	struct hv_util_service *srv = hv_get_drvdata(dev);
> +	int ret = 0;
> +
> +	if (srv->util_pre_suspend) {
> +		ret = srv->util_pre_suspend();
> +

Unneeded blank line?

> +		if (ret)
> +			return ret;
> +	}
> +
> +	vmbus_close(dev->channel);
> +
> +	return 0;
> +}
> +
> +static int util_resume(struct hv_device *dev)
> +{
> +	struct hv_util_service *srv = hv_get_drvdata(dev);
> +	int ret = 0;
> +
> +	if (srv->util_pre_resume) {
> +		ret = srv->util_pre_resume();
> +

Unneeded blank line?

> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = vmbus_open(dev->channel, 4 * HV_HYP_PAGE_SIZE,
> +			 4 * HV_HYP_PAGE_SIZE, NULL, 0, srv->util_cb,
> +			 dev->channel);
> +	return ret;
> +}
> +
>  static const struct hv_vmbus_device_id id_table[] = {
>  	/* Shutdown guid */
>  	{ HV_SHUTDOWN_GUID,
> @@ -548,6 +591,8 @@ static  struct hv_driver util_drv = {
>  	.id_table = id_table,
>  	.probe =  util_probe,
>  	.remove =  util_remove,
> +	.suspend = util_suspend,
> +	.resume =  util_resume,
>  	.driver = {
>  		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
>  	},
> @@ -617,11 +662,24 @@ static int hv_timesync_init(struct hv_util_service *srv)
>  	return 0;
>  }
> 
> +static void hv_timesync_cancel_work(void)
> +{
> +	cancel_work_sync(&adj_time_work);
> +}
> +
> +static int hv_timesync_pre_suspend(void)
> +{
> +	hv_timesync_cancel_work();
> +
> +	return 0;
> +}
> +
>  static void hv_timesync_deinit(void)
>  {
>  	if (hv_ptp_clock)
>  		ptp_clock_unregister(hv_ptp_clock);
> -	cancel_work_sync(&adj_time_work);
> +
> +	hv_timesync_cancel_work();
>  }
> 
>  static int __init init_hyperv_utils(void)
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index 20edcfd3b96c..f5fa3b3c9baf 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -352,14 +352,20 @@ void vmbus_on_msg_dpc(unsigned long data);
> 
>  int hv_kvp_init(struct hv_util_service *srv);
>  void hv_kvp_deinit(void);
> +int hv_kvp_pre_suspend(void);
> +int hv_kvp_pre_resume(void);
>  void hv_kvp_onchannelcallback(void *context);
> 
>  int hv_vss_init(struct hv_util_service *srv);
>  void hv_vss_deinit(void);
> +int hv_vss_pre_suspend(void);
> +int hv_vss_pre_resume(void);
>  void hv_vss_onchannelcallback(void *context);
> 
>  int hv_fcopy_init(struct hv_util_service *srv);
>  void hv_fcopy_deinit(void);
> +int hv_fcopy_pre_suspend(void);
> +int hv_fcopy_pre_resume(void);
>  void hv_fcopy_onchannelcallback(void *context);
>  void vmbus_initiate_unload(bool crash);
> 
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 41c58011431e..692c89ccf5df 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1435,6 +1435,8 @@ struct hv_util_service {
>  	void (*util_cb)(void *);
>  	int (*util_init)(struct hv_util_service *);
>  	void (*util_deinit)(void);
> +	int (*util_pre_suspend)(void);
> +	int (*util_pre_resume)(void);
>  };
> 
>  struct vmbuspipe_hdr {
> --
> 2.19.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v2 4/4] hv_utils: Add the support of hibernation
  2020-01-22 18:27 ` Michael Kelley
@ 2020-01-23  8:11   ` Dexuan Cui
  2020-01-23 15:31     ` Michael Kelley
  0 siblings, 1 reply; 4+ messages in thread
From: Dexuan Cui @ 2020-01-23  8:11 UTC (permalink / raw)
  To: Michael Kelley, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, Sasha Levin, linux-hyperv, vkuznets, linux-kernel

> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Wednesday, January 22, 2020 10:28 AM
> ...
> > +int hv_fcopy_pre_suspend(void)
> > +{
> > +	struct vmbus_channel *channel = fcopy_transaction.recv_channel;
> > +	struct hv_fcopy_hdr *fcopy_msg;
> > +
> > +	tasklet_disable(&channel->callback_event);
> > + ...
> > +	fcopy_msg = kzalloc(sizeof(*fcopy_msg), GFP_KERNEL);
> > +	if (!fcopy_msg)
> > +		goto err;
> > +
> > +	fcopy_msg->operation = CANCEL_FCOPY;
> > +
> > +	hv_fcopy_cancel_work();
> > +
> > +	/* We don't care about the return value. */
> > +	hvutil_transport_send(hvt, fcopy_msg, sizeof(*fcopy_msg), NULL);
> > +
> > +	kfree(fcopy_msg);
> > +
> > +	fcopy_transaction.state = HVUTIL_READY;
> 
> Is the ordering correct here?  

This is intentional. I'll add a comment (please see the below).

> It seems like the fcopy daemon could receive
> the cancel message and do the write before the state is forced to
> HVUTIL_READY.

This can not happen, because when we're here from util_suspend(), all the
userspace processes have been frozen (please refer to another mail from me
https://lkml.org/lkml/2020/1/13/1021). The userspace is thawed only after
util_resume() and the other part of the resume procedure finish.

When we're here in util_suspend(), we can be in any of the below states:

#1: hv_utils has not queued any message to the userspace daemon.
Now hv_fcopy_pre_suspend() queues a message to the daemon, and forces
the state to HVUTIL_READY; the daemon should read the message without 
any error; later when the daemon calls write(), the write() returns -1 because 
fcopy_transaction.state != HVUTIL_USERSPACE_REQ and fcopy_on_msg() 
returns -EINVAL; the daemon responds to the write() error by closing and 
re-opening the dev file, which triggers a reset in the hv_utils driver: see
hvt_op_release() -> hvt_reset() -> fcopy_on_reset(), and later the daemon
registers itself to the hv_utils driver, and everything comes back to normal.

#2: hv_utils has queued a message to the userspace daemon.
Now hv_fcopy_pre_suspend() fails to queue an extra message to the 
daemon, but still forces the state to HVUTIL_READY.

#2.1 the userspace has not read the message.
The daemon reads the queued message and later the write() fails, so the
daemon closes and re-opens the dev file.

#2.2 the userspace has read the message, but has not called write() yet.
The write() fails, so the daemon closes and re-opens the dev file.

#2.3 the userspace has read the message, and has called write().
This is actualy the same as #1.

So, IMO the patch should be handling the scenarios correctly.
 
> > +
> > +	/* tasklet_enable() will be called in hv_fcopy_pre_resume(). */
> > +
> > +	return 0;
> > +err:
> > +	tasklet_enable(&channel->callback_event);
> 
> A nit, but if you did the memory allocation first, you could return -ENOMEM
> immediately on error and avoid the err: label and re-enabling the tasklet.

Good idea! I'll fix this.

> > +	return -ENOMEM;
> > +}
> > ...
> > --- a/drivers/hv/hv_snapshot.c
> > +++ b/drivers/hv/hv_snapshot.c
> > @@ -229,6 +229,7 @@ static void vss_handle_request(struct work_struct
> *dummy)
> >  		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
> >  		vss_send_op();
> >  		return;
> > +
> 
> Gratuitous blank line added?

Will remove it. I probably tried to make the "return;" more noticeable. 
 
> >  	case VSS_OP_GET_DM_INFO:
> >  		vss_transaction.msg->dm_info.flags = 0;
> >  		break;
> > ...
> > +int hv_vss_pre_suspend(void)
> > +{
> > ...
> > +	/* We don't care about the return value. */
> > +	hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
> > +
> > +	kfree(vss_msg);
> > +
> > +	vss_transaction.state = HVUTIL_READY;
> 
> Same comment about the ordering.

I'll add a comment for this. I may add a long comment in util_suspend()
and add a short comment here.

> > +
> > +	/* tasklet_enable() will be called in hv_vss_pre_resume(). */
> > +
> > +	return 0;
> > +err:
> > +	tasklet_enable(&channel->callback_event);
> > +	return -ENOMEM;
> 
> Same comment about simplifying the error handling applies here.

Will fix this.
 
> > +static int util_suspend(struct hv_device *dev)
> > +{
> > +	struct hv_util_service *srv = hv_get_drvdata(dev);
> > +	int ret = 0;
> > +
> > +	if (srv->util_pre_suspend) {
> > +		ret = srv->util_pre_suspend();
> > +
> 
> Unneeded blank line?

Will remove this.

> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	vmbus_close(dev->channel);
> > +
> > +	return 0;
> > +}
> > +
> > +static int util_resume(struct hv_device *dev)
> > +{
> > +	struct hv_util_service *srv = hv_get_drvdata(dev);
> > +	int ret = 0;
> > +
> > +	if (srv->util_pre_resume) {
> > +		ret = srv->util_pre_resume();
> > +
> 
> Unneeded blank line?

Will remove this.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v2 4/4] hv_utils: Add the support of hibernation
  2020-01-23  8:11   ` Dexuan Cui
@ 2020-01-23 15:31     ` Michael Kelley
  0 siblings, 0 replies; 4+ messages in thread
From: Michael Kelley @ 2020-01-23 15:31 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, Sasha Levin, linux-hyperv, vkuznets, linux-kernel

From: Dexuan Cui <decui@microsoft.com> Sent: Thursday, January 23, 2020 12:12 AM
> 
> > From: Michael Kelley <mikelley@microsoft.com>
> > Sent: Wednesday, January 22, 2020 10:28 AM
> > ...
> > > +int hv_fcopy_pre_suspend(void)
> > > +{
> > > +	struct vmbus_channel *channel = fcopy_transaction.recv_channel;
> > > +	struct hv_fcopy_hdr *fcopy_msg;
> > > +
> > > +	tasklet_disable(&channel->callback_event);
> > > + ...
> > > +	fcopy_msg = kzalloc(sizeof(*fcopy_msg), GFP_KERNEL);
> > > +	if (!fcopy_msg)
> > > +		goto err;
> > > +
> > > +	fcopy_msg->operation = CANCEL_FCOPY;
> > > +
> > > +	hv_fcopy_cancel_work();
> > > +
> > > +	/* We don't care about the return value. */
> > > +	hvutil_transport_send(hvt, fcopy_msg, sizeof(*fcopy_msg), NULL);
> > > +
> > > +	kfree(fcopy_msg);
> > > +
> > > +	fcopy_transaction.state = HVUTIL_READY;
> >
> > Is the ordering correct here?
> 
> This is intentional. I'll add a comment (please see the below).
> 
> > It seems like the fcopy daemon could receive
> > the cancel message and do the write before the state is forced to
> > HVUTIL_READY.
> 
> This can not happen, because when we're here from util_suspend(), all the
> userspace processes have been frozen (please refer to another mail from me
> https://lkml.org/lkml/2020/1/13/1021). The userspace is thawed only after
> util_resume() and the other part of the resume procedure finish.

Oh, right.  That makes sense now.

> 
> When we're here in util_suspend(), we can be in any of the below states:
> 
> #1: hv_utils has not queued any message to the userspace daemon.
> Now hv_fcopy_pre_suspend() queues a message to the daemon, and forces
> the state to HVUTIL_READY; the daemon should read the message without
> any error; later when the daemon calls write(), the write() returns -1 because
> fcopy_transaction.state != HVUTIL_USERSPACE_REQ and fcopy_on_msg()
> returns -EINVAL; the daemon responds to the write() error by closing and
> re-opening the dev file, which triggers a reset in the hv_utils driver: see
> hvt_op_release() -> hvt_reset() -> fcopy_on_reset(), and later the daemon
> registers itself to the hv_utils driver, and everything comes back to normal.
> 
> #2: hv_utils has queued a message to the userspace daemon.
> Now hv_fcopy_pre_suspend() fails to queue an extra message to the
> daemon, but still forces the state to HVUTIL_READY.
> 
> #2.1 the userspace has not read the message.
> The daemon reads the queued message and later the write() fails, so the
> daemon closes and re-opens the dev file.
> 
> #2.2 the userspace has read the message, but has not called write() yet.
> The write() fails, so the daemon closes and re-opens the dev file.
> 
> #2.3 the userspace has read the message, and has called write().
> This is actualy the same as #1.
> 
> So, IMO the patch should be handling the scenarios correctly.
> 
> > > +
> > > +	/* tasklet_enable() will be called in hv_fcopy_pre_resume(). */
> > > +
> > > +	return 0;
> > > +err:
> > > +	tasklet_enable(&channel->callback_event);
> >
> > A nit, but if you did the memory allocation first, you could return -ENOMEM
> > immediately on error and avoid the err: label and re-enabling the tasklet.
> 
> Good idea! I'll fix this.
> 
> > > +	return -ENOMEM;
> > > +}
> > > ...
> > > --- a/drivers/hv/hv_snapshot.c
> > > +++ b/drivers/hv/hv_snapshot.c
> > > @@ -229,6 +229,7 @@ static void vss_handle_request(struct work_struct
> > *dummy)
> > >  		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
> > >  		vss_send_op();
> > >  		return;
> > > +
> >
> > Gratuitous blank line added?
> 
> Will remove it. I probably tried to make the "return;" more noticeable.
> 
> > >  	case VSS_OP_GET_DM_INFO:
> > >  		vss_transaction.msg->dm_info.flags = 0;
> > >  		break;
> > > ...
> > > +int hv_vss_pre_suspend(void)
> > > +{
> > > ...
> > > +	/* We don't care about the return value. */
> > > +	hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
> > > +
> > > +	kfree(vss_msg);
> > > +
> > > +	vss_transaction.state = HVUTIL_READY;
> >
> > Same comment about the ordering.
> 
> I'll add a comment for this. I may add a long comment in util_suspend()
> and add a short comment here.
> 
> > > +
> > > +	/* tasklet_enable() will be called in hv_vss_pre_resume(). */
> > > +
> > > +	return 0;
> > > +err:
> > > +	tasklet_enable(&channel->callback_event);
> > > +	return -ENOMEM;
> >
> > Same comment about simplifying the error handling applies here.
> 
> Will fix this.
> 
> > > +static int util_suspend(struct hv_device *dev)
> > > +{
> > > +	struct hv_util_service *srv = hv_get_drvdata(dev);
> > > +	int ret = 0;
> > > +
> > > +	if (srv->util_pre_suspend) {
> > > +		ret = srv->util_pre_suspend();
> > > +
> >
> > Unneeded blank line?
> 
> Will remove this.
> 
> > > +		if (ret)
> > > +			return ret;
> > > +	}
> > > +
> > > +	vmbus_close(dev->channel);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int util_resume(struct hv_device *dev)
> > > +{
> > > +	struct hv_util_service *srv = hv_get_drvdata(dev);
> > > +	int ret = 0;
> > > +
> > > +	if (srv->util_pre_resume) {
> > > +		ret = srv->util_pre_resume();
> > > +
> >
> > Unneeded blank line?
> 
> Will remove this.
> 
> Thanks,
> -- Dexuan

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-01-23 15:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-13  6:32 [PATCH v2 4/4] hv_utils: Add the support of hibernation Dexuan Cui
2020-01-22 18:27 ` Michael Kelley
2020-01-23  8:11   ` Dexuan Cui
2020-01-23 15:31     ` Michael Kelley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).