linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Enhance hv_utils to support hibernation
@ 2019-09-11 23:38 Dexuan Cui
  2019-09-11 23:38 ` [PATCH 1/3] hv_utils: Add the support of hibernation Dexuan Cui
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Dexuan Cui @ 2019-09-11 23:38 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley
  Cc: Dexuan Cui

This patch is basically a pure Hyper-V specific change and it has a
build dependency on the commit 271b2224d42f ("Drivers: hv: vmbus: Implement
suspend/resume for VSC drivers for hibernation"), which is on Sasha Levin's
Hyper-V tree's hyperv-next branch:
https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git/log/?h=hyperv-next

I request this patch should go through Sasha's tree rather than the
char-misc tree.

Dexuan Cui (3):
  hv_utils: Add the support of hibernation
  hv_utils: Support host-initiated hibernation request
  hv_utils: Support host-initiated restart request

 drivers/hv/hv_fcopy.c     |   9 +++-
 drivers/hv/hv_kvp.c       |  11 +++-
 drivers/hv/hv_snapshot.c  |  11 +++-
 drivers/hv/hv_util.c      | 124 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/hv/hyperv_vmbus.h |   3 ++
 include/linux/hyperv.h    |   1 +
 6 files changed, 152 insertions(+), 7 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-11 23:38 [PATCH 0/3] Enhance hv_utils to support hibernation Dexuan Cui
@ 2019-09-11 23:38 ` Dexuan Cui
  2019-09-12 16:36   ` Vitaly Kuznetsov
  2019-09-11 23:38 ` [PATCH 2/3] hv_utils: Support host-initiated hibernation request Dexuan Cui
  2019-09-11 23:39 ` [PATCH 3/3] hv_utils: Support host-initiated restart request Dexuan Cui
  2 siblings, 1 reply; 12+ messages in thread
From: Dexuan Cui @ 2019-09-11 23:38 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley
  Cc: Dexuan Cui

On hibernation, Linux can not guarantee the host side utils operations
still succeed without any issue, so let's simply cancel the work items.
The host is supposed to retry the operations, if necessary.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/hv_fcopy.c     |  9 ++++++++-
 drivers/hv/hv_kvp.c       | 11 +++++++++--
 drivers/hv/hv_snapshot.c  | 11 +++++++++--
 drivers/hv/hv_util.c      | 37 ++++++++++++++++++++++++++++++++++++-
 drivers/hv/hyperv_vmbus.h |  3 +++
 include/linux/hyperv.h    |  1 +
 6 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 7e30ae0..f44df3d 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -345,9 +345,16 @@ int hv_fcopy_init(struct hv_util_service *srv)
 	return 0;
 }
 
+void hv_fcopy_cancel_work(void)
+{
+	cancel_delayed_work_sync(&fcopy_timeout_work);
+}
+
 void hv_fcopy_deinit(void)
 {
 	fcopy_transaction.state = HVUTIL_DEVICE_DYING;
-	cancel_delayed_work_sync(&fcopy_timeout_work);
+
+	hv_fcopy_cancel_work();
+
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 5054d11..064c384 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -757,11 +757,18 @@ static void kvp_on_reset(void)
 	return 0;
 }
 
-void hv_kvp_deinit(void)
+void hv_kvp_cancel_work(void)
 {
-	kvp_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&kvp_host_handshake_work);
 	cancel_delayed_work_sync(&kvp_timeout_work);
 	cancel_work_sync(&kvp_sendkey_work);
+}
+
+void hv_kvp_deinit(void)
+{
+	kvp_transaction.state = HVUTIL_DEVICE_DYING;
+
+	hv_kvp_cancel_work();
+
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 20ba95b..0eb718a 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -378,10 +378,17 @@ static void vss_on_reset(void)
 	return 0;
 }
 
-void hv_vss_deinit(void)
+void hv_vss_cancel_work(void)
 {
-	vss_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&vss_timeout_work);
 	cancel_work_sync(&vss_handle_request_work);
+}
+
+void hv_vss_deinit(void)
+{
+	vss_transaction.state = HVUTIL_DEVICE_DYING;
+
+	hv_vss_cancel_work();
+
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index e32681e..039c752 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -81,12 +81,14 @@
 };
 
 static int hv_timesync_init(struct hv_util_service *srv);
+static void hv_timesync_cancel_work(void);
 static void hv_timesync_deinit(void);
 
 static void timesync_onchannelcallback(void *context);
 static struct hv_util_service util_timesynch = {
 	.util_cb = timesync_onchannelcallback,
 	.util_init = hv_timesync_init,
+	.util_cancel_work = hv_timesync_cancel_work,
 	.util_deinit = hv_timesync_deinit,
 };
 
@@ -98,18 +100,21 @@
 static struct hv_util_service util_kvp = {
 	.util_cb = hv_kvp_onchannelcallback,
 	.util_init = hv_kvp_init,
+	.util_cancel_work = hv_kvp_cancel_work,
 	.util_deinit = hv_kvp_deinit,
 };
 
 static struct hv_util_service util_vss = {
 	.util_cb = hv_vss_onchannelcallback,
 	.util_init = hv_vss_init,
+	.util_cancel_work = hv_vss_cancel_work,
 	.util_deinit = hv_vss_deinit,
 };
 
 static struct hv_util_service util_fcopy = {
 	.util_cb = hv_fcopy_onchannelcallback,
 	.util_init = hv_fcopy_init,
+	.util_cancel_work = hv_fcopy_cancel_work,
 	.util_deinit = hv_fcopy_deinit,
 };
 
@@ -440,6 +445,28 @@ static int util_remove(struct hv_device *dev)
 	return 0;
 }
 
+static int util_suspend(struct hv_device *dev)
+{
+	struct hv_util_service *srv = hv_get_drvdata(dev);
+
+	if (srv->util_cancel_work)
+		srv->util_cancel_work();
+
+	vmbus_close(dev->channel);
+
+	return 0;
+}
+
+static int util_resume(struct hv_device *dev)
+{
+	struct hv_util_service *srv = hv_get_drvdata(dev);
+	int ret;
+
+	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE,
+			 NULL, 0, srv->util_cb, dev->channel);
+	return ret;
+}
+
 static const struct hv_vmbus_device_id id_table[] = {
 	/* Shutdown guid */
 	{ HV_SHUTDOWN_GUID,
@@ -476,6 +503,8 @@ static int util_remove(struct hv_device *dev)
 	.id_table = id_table,
 	.probe =  util_probe,
 	.remove =  util_remove,
+	.suspend = util_suspend,
+	.resume =  util_resume,
 	.driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
@@ -545,11 +574,17 @@ static int hv_timesync_init(struct hv_util_service *srv)
 	return 0;
 }
 
+static void hv_timesync_cancel_work(void)
+{
+	cancel_work_sync(&adj_time_work);
+}
+
 static void hv_timesync_deinit(void)
 {
 	if (hv_ptp_clock)
 		ptp_clock_unregister(hv_ptp_clock);
-	cancel_work_sync(&adj_time_work);
+
+	hv_timesync_cancel_work();
 }
 
 static int __init init_hyperv_utils(void)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index f7a5f56..dc280fa 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -353,14 +353,17 @@ int vmbus_add_channel_kobj(struct hv_device *device_obj,
 void vmbus_on_msg_dpc(unsigned long data);
 
 int hv_kvp_init(struct hv_util_service *srv);
+void hv_kvp_cancel_work(void);
 void hv_kvp_deinit(void);
 void hv_kvp_onchannelcallback(void *context);
 
 int hv_vss_init(struct hv_util_service *srv);
+void hv_vss_cancel_work(void);
 void hv_vss_deinit(void);
 void hv_vss_onchannelcallback(void *context);
 
 int hv_fcopy_init(struct hv_util_service *srv);
+void hv_fcopy_cancel_work(void);
 void hv_fcopy_deinit(void);
 void hv_fcopy_onchannelcallback(void *context);
 void vmbus_initiate_unload(bool crash);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a3aa9e9..b4e2768 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1412,6 +1412,7 @@ struct hv_util_service {
 	void (*util_cb)(void *);
 	int (*util_init)(struct hv_util_service *);
 	void (*util_deinit)(void);
+	void (*util_cancel_work)(void);
 };
 
 struct vmbuspipe_hdr {
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] hv_utils: Support host-initiated hibernation request
  2019-09-11 23:38 [PATCH 0/3] Enhance hv_utils to support hibernation Dexuan Cui
  2019-09-11 23:38 ` [PATCH 1/3] hv_utils: Add the support of hibernation Dexuan Cui
@ 2019-09-11 23:38 ` Dexuan Cui
  2019-09-12 16:26   ` Vitaly Kuznetsov
  2019-09-11 23:39 ` [PATCH 3/3] hv_utils: Support host-initiated restart request Dexuan Cui
  2 siblings, 1 reply; 12+ messages in thread
From: Dexuan Cui @ 2019-09-11 23:38 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley
  Cc: Dexuan Cui

Update the Shutdown IC version to 3.2, which is required for the host to
send the hibernation request.

The user is expected to create the program "/sbin/hyperv-hibernate", which
is called on the host-initiated hibernation request.

The program can be a script like

 test@localhost:~$ cat /sbin/hyperv-hibernate
 #!/bin/bash
 echo disk > /sys/power/state

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/hv_util.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 039c752..9e98c5d 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -24,6 +24,8 @@
 
 #define SD_MAJOR	3
 #define SD_MINOR	0
+#define SD_MINOR_2	2
+#define SD_VERSION_3_2	(SD_MAJOR << 16 | SD_MINOR_2)
 #define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
 
 #define SD_MAJOR_1	1
@@ -50,8 +52,9 @@
 static int ts_srv_version;
 static int hb_srv_version;
 
-#define SD_VER_COUNT 2
+#define SD_VER_COUNT 3
 static const int sd_versions[] = {
+	SD_VERSION_3_2,
 	SD_VERSION,
 	SD_VERSION_1
 };
@@ -75,9 +78,30 @@
 	UTIL_WS2K8_FW_VERSION
 };
 
+static bool execute_hibernate;
+static int hv_shutdown_init(struct hv_util_service *srv)
+{
+#if 0
+	/*
+	 * The patch to implement hv_is_hibernation_supported() is going
+	 * through the tip tree. For now, let's hardcode execute_hibernate
+	 * to true -- this doesn't break anything since hibernation for
+	 * Linux VM on Hyper-V never worked before. We'll remove the
+	 * conditional compilation as soon as hv_is_hibernation_supported()
+	 * is available in the mainline tree.
+	 */
+	execute_hibernate = hv_is_hibernation_supported();
+#else
+	execute_hibernate = true;
+#endif
+
+	return 0;
+}
+
 static void shutdown_onchannelcallback(void *context);
 static struct hv_util_service util_shutdown = {
 	.util_cb = shutdown_onchannelcallback,
+	.util_init = hv_shutdown_init,
 };
 
 static int hv_timesync_init(struct hv_util_service *srv);
@@ -123,11 +147,38 @@ static void perform_shutdown(struct work_struct *dummy)
 	orderly_poweroff(true);
 }
 
+static void perform_hibernation(struct work_struct *dummy)
+{
+	/*
+	 * The user is expected to create the program, which can be a simple
+	 * script containing two lines:
+	 * #!/bin/bash
+	 * echo disk > /sys/power/state
+	 */
+	static char hibernate_cmd[PATH_MAX] = "/sbin/hyperv-hibernate";
+
+	static char *envp[] = {
+		NULL,
+	};
+
+	static char *argv[] = {
+		hibernate_cmd,
+		NULL,
+	};
+
+	call_usermodehelper(hibernate_cmd, argv, envp, UMH_NO_WAIT);
+}
+
 /*
  * Perform the shutdown operation in a thread context.
  */
 static DECLARE_WORK(shutdown_work, perform_shutdown);
 
+/*
+ * Perform the hibernation operation in a thread context.
+ */
+static DECLARE_WORK(hibernate_work, perform_hibernation);
+
 static void shutdown_onchannelcallback(void *context)
 {
 	struct vmbus_channel *channel = context;
@@ -171,6 +222,19 @@ static void shutdown_onchannelcallback(void *context)
 				pr_info("Shutdown request received -"
 					    " graceful shutdown initiated\n");
 				break;
+			case 4:
+			case 5:
+				pr_info("Hibernation request received -"
+					    " hibernation %sinitiated\n",
+					execute_hibernate ? "" : "not ");
+
+				if (execute_hibernate) {
+					icmsghdrp->status = HV_S_OK;
+					schedule_work(&hibernate_work);
+				} else {
+					icmsghdrp->status = HV_E_FAIL;
+				}
+				break;
 			default:
 				icmsghdrp->status = HV_E_FAIL;
 				execute_shutdown = false;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] hv_utils: Support host-initiated restart request
  2019-09-11 23:38 [PATCH 0/3] Enhance hv_utils to support hibernation Dexuan Cui
  2019-09-11 23:38 ` [PATCH 1/3] hv_utils: Add the support of hibernation Dexuan Cui
  2019-09-11 23:38 ` [PATCH 2/3] hv_utils: Support host-initiated hibernation request Dexuan Cui
@ 2019-09-11 23:39 ` Dexuan Cui
  2 siblings, 0 replies; 12+ messages in thread
From: Dexuan Cui @ 2019-09-11 23:39 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley
  Cc: Dexuan Cui

To test the code, we should run this command on the host:

Restart-VM $vm -Type Reboot

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/hv_util.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 9e98c5d..6d642f5 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -24,7 +24,9 @@
 
 #define SD_MAJOR	3
 #define SD_MINOR	0
+#define SD_MINOR_1	1
 #define SD_MINOR_2	2
+#define SD_VERSION_3_1	(SD_MAJOR << 16 | SD_MINOR_1)
 #define SD_VERSION_3_2	(SD_MAJOR << 16 | SD_MINOR_2)
 #define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
 
@@ -52,9 +54,10 @@
 static int ts_srv_version;
 static int hb_srv_version;
 
-#define SD_VER_COUNT 3
+#define SD_VER_COUNT 4
 static const int sd_versions[] = {
 	SD_VERSION_3_2,
+	SD_VERSION_3_1,
 	SD_VERSION,
 	SD_VERSION_1
 };
@@ -147,6 +150,11 @@ static void perform_shutdown(struct work_struct *dummy)
 	orderly_poweroff(true);
 }
 
+static void perform_restart(struct work_struct *dummy)
+{
+	orderly_reboot();
+}
+
 static void perform_hibernation(struct work_struct *dummy)
 {
 	/*
@@ -175,6 +183,11 @@ static void perform_hibernation(struct work_struct *dummy)
 static DECLARE_WORK(shutdown_work, perform_shutdown);
 
 /*
+ * Perform the restart operation in a thread context.
+ */
+static DECLARE_WORK(restart_work, perform_restart);
+
+/*
  * Perform the hibernation operation in a thread context.
  */
 static DECLARE_WORK(hibernate_work, perform_hibernation);
@@ -222,6 +235,14 @@ static void shutdown_onchannelcallback(void *context)
 				pr_info("Shutdown request received -"
 					    " graceful shutdown initiated\n");
 				break;
+			case 2:
+			case 3:
+				pr_info("Restart request received -"
+					    " graceful restart initiated\n");
+				icmsghdrp->status = HV_S_OK;
+
+				schedule_work(&restart_work);
+				break;
 			case 4:
 			case 5:
 				pr_info("Hibernation request received -"
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] hv_utils: Support host-initiated hibernation request
  2019-09-11 23:38 ` [PATCH 2/3] hv_utils: Support host-initiated hibernation request Dexuan Cui
@ 2019-09-12 16:26   ` Vitaly Kuznetsov
  2019-09-13 16:42     ` Dexuan Cui
  0 siblings, 1 reply; 12+ messages in thread
From: Vitaly Kuznetsov @ 2019-09-12 16:26 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

Dexuan Cui <decui@microsoft.com> writes:

> +static void perform_hibernation(struct work_struct *dummy)
> +{
> +	/*
> +	 * The user is expected to create the program, which can be a simple
> +	 * script containing two lines:
> +	 * #!/bin/bash
> +	 * echo disk > /sys/power/state

'systemctl hibernate' is what people do nowadays :-)

> +	 */
> +	static char hibernate_cmd[PATH_MAX] = "/sbin/hyperv-hibernate";
> +

Let's not do that (I remember when we were triggering network restart
from netvsc and it was a lot of pain).

Receiving hybernation request from the host is similar to pushing power
button on your desktop: an ACPI event is going to be generated and your
userspace will somehow react to it. I see two options:
1) We try to hook up some existing userspace (udev?)
2) We write a new hyperv-daemon handling the request (with a config file
instead of hardcoding please).

-- 
Vitaly

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-11 23:38 ` [PATCH 1/3] hv_utils: Add the support of hibernation Dexuan Cui
@ 2019-09-12 16:36   ` Vitaly Kuznetsov
  2019-09-13 19:15     ` Dexuan Cui
  0 siblings, 1 reply; 12+ messages in thread
From: Vitaly Kuznetsov @ 2019-09-12 16:36 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

Dexuan Cui <decui@microsoft.com> writes:

> On hibernation, Linux can not guarantee the host side utils operations
> still succeed without any issue, so let's simply cancel the work items.
> The host is supposed to retry the operations, if necessary.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/hv_fcopy.c     |  9 ++++++++-
>  drivers/hv/hv_kvp.c       | 11 +++++++++--
>  drivers/hv/hv_snapshot.c  | 11 +++++++++--
>  drivers/hv/hv_util.c      | 37 ++++++++++++++++++++++++++++++++++++-
>  drivers/hv/hyperv_vmbus.h |  3 +++
>  include/linux/hyperv.h    |  1 +
>  6 files changed, 66 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
> index 7e30ae0..f44df3d 100644
> --- a/drivers/hv/hv_fcopy.c
> +++ b/drivers/hv/hv_fcopy.c
> @@ -345,9 +345,16 @@ int hv_fcopy_init(struct hv_util_service *srv)
>  	return 0;
>  }
>  
> +void hv_fcopy_cancel_work(void)
> +{
> +	cancel_delayed_work_sync(&fcopy_timeout_work);
> +}
> +
>  void hv_fcopy_deinit(void)
>  {
>  	fcopy_transaction.state = HVUTIL_DEVICE_DYING;
> -	cancel_delayed_work_sync(&fcopy_timeout_work);
> +
> +	hv_fcopy_cancel_work();
> +
>  	hvutil_transport_destroy(hvt);
>  }
> diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
> index 5054d11..064c384 100644
> --- a/drivers/hv/hv_kvp.c
> +++ b/drivers/hv/hv_kvp.c
> @@ -757,11 +757,18 @@ static void kvp_on_reset(void)
>  	return 0;
>  }
>  
> -void hv_kvp_deinit(void)
> +void hv_kvp_cancel_work(void)
>  {
> -	kvp_transaction.state = HVUTIL_DEVICE_DYING;
>  	cancel_delayed_work_sync(&kvp_host_handshake_work);
>  	cancel_delayed_work_sync(&kvp_timeout_work);
>  	cancel_work_sync(&kvp_sendkey_work);
> +}
> +
> +void hv_kvp_deinit(void)
> +{
> +	kvp_transaction.state = HVUTIL_DEVICE_DYING;
> +
> +	hv_kvp_cancel_work();
> +
>  	hvutil_transport_destroy(hvt);
>  }
> diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
> index 20ba95b..0eb718a 100644
> --- a/drivers/hv/hv_snapshot.c
> +++ b/drivers/hv/hv_snapshot.c
> @@ -378,10 +378,17 @@ static void vss_on_reset(void)
>  	return 0;
>  }
>  
> -void hv_vss_deinit(void)
> +void hv_vss_cancel_work(void)
>  {
> -	vss_transaction.state = HVUTIL_DEVICE_DYING;
>  	cancel_delayed_work_sync(&vss_timeout_work);
>  	cancel_work_sync(&vss_handle_request_work);
> +}
> +
> +void hv_vss_deinit(void)
> +{
> +	vss_transaction.state = HVUTIL_DEVICE_DYING;
> +
> +	hv_vss_cancel_work();
> +
>  	hvutil_transport_destroy(hvt);
>  }
> diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
> index e32681e..039c752 100644
> --- a/drivers/hv/hv_util.c
> +++ b/drivers/hv/hv_util.c
> @@ -81,12 +81,14 @@
>  };
>  
>  static int hv_timesync_init(struct hv_util_service *srv);
> +static void hv_timesync_cancel_work(void);
>  static void hv_timesync_deinit(void);
>  
>  static void timesync_onchannelcallback(void *context);
>  static struct hv_util_service util_timesynch = {
>  	.util_cb = timesync_onchannelcallback,
>  	.util_init = hv_timesync_init,
> +	.util_cancel_work = hv_timesync_cancel_work,
>  	.util_deinit = hv_timesync_deinit,
>  };
>  
> @@ -98,18 +100,21 @@
>  static struct hv_util_service util_kvp = {
>  	.util_cb = hv_kvp_onchannelcallback,
>  	.util_init = hv_kvp_init,
> +	.util_cancel_work = hv_kvp_cancel_work,
>  	.util_deinit = hv_kvp_deinit,
>  };
>  
>  static struct hv_util_service util_vss = {
>  	.util_cb = hv_vss_onchannelcallback,
>  	.util_init = hv_vss_init,
> +	.util_cancel_work = hv_vss_cancel_work,
>  	.util_deinit = hv_vss_deinit,
>  };
>  
>  static struct hv_util_service util_fcopy = {
>  	.util_cb = hv_fcopy_onchannelcallback,
>  	.util_init = hv_fcopy_init,
> +	.util_cancel_work = hv_fcopy_cancel_work,
>  	.util_deinit = hv_fcopy_deinit,
>  };
>  
> @@ -440,6 +445,28 @@ static int util_remove(struct hv_device *dev)
>  	return 0;
>  }
>  
> +static int util_suspend(struct hv_device *dev)
> +{
> +	struct hv_util_service *srv = hv_get_drvdata(dev);
> +
> +	if (srv->util_cancel_work)
> +		srv->util_cancel_work();
> +
> +	vmbus_close(dev->channel);

And what happens if you receive a real reply from userspace after you
close the channel? You've only cancelled timeout work so the driver
will not try to reply by itself but this doesn't mean we won't try to
write to the channel on legitimate replies from daemons.

I think you need to block all userspace requests (hang in kernel until
util_resume()).

While I'm not sure we can't get away without it but I'd suggest we add a
new HVUTIL_SUSPENDED state to the hvutil state machine.

> +
> +	return 0;
> +}
> +
> +static int util_resume(struct hv_device *dev)
> +{
> +	struct hv_util_service *srv = hv_get_drvdata(dev);
> +	int ret;
> +
> +	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE,
> +			 NULL, 0, srv->util_cb, dev->channel);
> +	return ret;
> +}
> +
>  static const struct hv_vmbus_device_id id_table[] = {
>  	/* Shutdown guid */
>  	{ HV_SHUTDOWN_GUID,
> @@ -476,6 +503,8 @@ static int util_remove(struct hv_device *dev)
>  	.id_table = id_table,
>  	.probe =  util_probe,
>  	.remove =  util_remove,
> +	.suspend = util_suspend,
> +	.resume =  util_resume,
>  	.driver = {
>  		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
>  	},
> @@ -545,11 +574,17 @@ static int hv_timesync_init(struct hv_util_service *srv)
>  	return 0;
>  }
>  
> +static void hv_timesync_cancel_work(void)
> +{
> +	cancel_work_sync(&adj_time_work);
> +}
> +
>  static void hv_timesync_deinit(void)
>  {
>  	if (hv_ptp_clock)
>  		ptp_clock_unregister(hv_ptp_clock);
> -	cancel_work_sync(&adj_time_work);
> +
> +	hv_timesync_cancel_work();
>  }
>  
>  static int __init init_hyperv_utils(void)
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index f7a5f56..dc280fa 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -353,14 +353,17 @@ int vmbus_add_channel_kobj(struct hv_device *device_obj,
>  void vmbus_on_msg_dpc(unsigned long data);
>  
>  int hv_kvp_init(struct hv_util_service *srv);
> +void hv_kvp_cancel_work(void);
>  void hv_kvp_deinit(void);
>  void hv_kvp_onchannelcallback(void *context);
>  
>  int hv_vss_init(struct hv_util_service *srv);
> +void hv_vss_cancel_work(void);
>  void hv_vss_deinit(void);
>  void hv_vss_onchannelcallback(void *context);
>  
>  int hv_fcopy_init(struct hv_util_service *srv);
> +void hv_fcopy_cancel_work(void);
>  void hv_fcopy_deinit(void);
>  void hv_fcopy_onchannelcallback(void *context);
>  void vmbus_initiate_unload(bool crash);
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index a3aa9e9..b4e2768 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1412,6 +1412,7 @@ struct hv_util_service {
>  	void (*util_cb)(void *);
>  	int (*util_init)(struct hv_util_service *);
>  	void (*util_deinit)(void);
> +	void (*util_cancel_work)(void);
>  };
>  
>  struct vmbuspipe_hdr {

-- 
Vitaly

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 2/3] hv_utils: Support host-initiated hibernation request
  2019-09-12 16:26   ` Vitaly Kuznetsov
@ 2019-09-13 16:42     ` Dexuan Cui
  0 siblings, 0 replies; 12+ messages in thread
From: Dexuan Cui @ 2019-09-13 16:42 UTC (permalink / raw)
  To: vkuznets
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Thursday, September 12, 2019 9:27 AM
> 
> Dexuan Cui <decui@microsoft.com> writes:
> 
> > +static void perform_hibernation(struct work_struct *dummy)
> > +{
> > +	/*
> > +	 * The user is expected to create the program, which can be a simple
> > +	 * script containing two lines:
> > +	 * #!/bin/bash
> > +	 * echo disk > /sys/power/state
> 
> 'systemctl hibernate' is what people do nowadays :-)

Thanks for sharing this command! 
 
> > +	 */
> > +	static char hibernate_cmd[PATH_MAX] = "/sbin/hyperv-hibernate";
> > +
> 
> Let's not do that (I remember when we were triggering network restart
> from netvsc and it was a lot of pain).
> 
> Receiving hybernation request from the host is similar to pushing power
> button on your desktop: an ACPI event is going to be generated and your
> userspace will somehow react to it. I see two options:
> 1) We try to hook up some existing userspace (udev?)
> 2) We write a new hyperv-daemon handling the request (with a config file
> instead of hardcoding please).
> 
> Vitaly

Thanks for the suggestions! 
I prefer the udev method, e.g. something like this:

	char *uevent_env[2] = { "EVENT=hibernate", NULL };
	kobject_uevent_env(&ctx->dev->device.kobj, KOBJ_CHANGE, uevent_env);

Then the user is expected to create the below udev rule file, which is applied
upon the host-initiated hibernation request:

root@localhost:~# cat /usr/lib/udev/rules.d/40-vm-hibernation.rules
SUBSYSTEM=="vmbus", ACTION=="change", DRIVER=="hv_utils", ENV{EVENT}=="hibernate", RUN+="/usr/bin/systemctl hibernate"

The full patch is here:
https://github.com/dcui/linux/commit/0d92b53f48a8dca92bbd3493ea9c5bd098c99623

I'll post it as v2.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-12 16:36   ` Vitaly Kuznetsov
@ 2019-09-13 19:15     ` Dexuan Cui
  2019-09-16  8:45       ` Vitaly Kuznetsov
  0 siblings, 1 reply; 12+ messages in thread
From: Dexuan Cui @ 2019-09-13 19:15 UTC (permalink / raw)
  To: vkuznets
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Thursday, September 12, 2019 9:37 AM

> > +static int util_suspend(struct hv_device *dev)
> > +{
> > +	struct hv_util_service *srv = hv_get_drvdata(dev);
> > +
> > +	if (srv->util_cancel_work)
> > +		srv->util_cancel_work();
> > +
> > +	vmbus_close(dev->channel);
> 
> And what happens if you receive a real reply from userspace after you
> close the channel? You've only cancelled timeout work so the driver
> will not try to reply by itself but this doesn't mean we won't try to
> write to the channel on legitimate replies from daemons.
> 
> I think you need to block all userspace requests (hang in kernel until
> util_resume()).
> 
> While I'm not sure we can't get away without it but I'd suggest we add a
> new HVUTIL_SUSPENDED state to the hvutil state machine.
> Vitaly

When we reach util_suspend(), all the userspace processes have been
frozen: see kernel/power/hibernate.c: hibernate() -> freeze_processes(),
so here we can not receive a reply from the userspace daemon.

However, it looks there is indeed some tricky corner cases we need to deal
with: in util_resume(), before we call vmbus_open(), all the userspace
processes are still frozen, and the userspace daemon (e.g. hv_kvp_daemon) 
can be in any of these states:

1. the driver has not buffered any message for the daemon. This is good.

2. the driver has buffered a message for the daemon, and
kvp_transaction.state is HVUTIL_USERSPACE_REQ. Later, the kvp daemon
writes the response to the driver, and in kvp_on_msg() 
kvp_transaction.state is moved to HVUTIL_USERSPACE_RECV, but since
cancel_delayed_work_sync(&kvp_timeout_work) is false (the work has
been cancelled by util_suspend()), the driver reports nothing to the host,
which is good as the VM has undergone a hibernation event and IMO the
response may be stale and I believe the host is not expecting this 
response from the VM at all (the host side application that requested the
KVP must have failed or timed out), but the bad thing is: the "state"
remains in HVUTIL_USERSPACE_RECV, preventing
hv_kvp_onchannelcallback() from reading from the channel ringbuffer.

I suggest we work around this race condition by the below patch:

--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -250,8 +250,8 @@ static int kvp_on_msg(void *msg, int len)
         */
        if (cancel_delayed_work_sync(&kvp_timeout_work)) {
                kvp_respond_to_host(message, error);
-               hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
        }
+       hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);

        return 0;
 }

How do you like this?

I can imagine there is still a small chance that the state machine can run
out of order, and the kvp daemon may exit due to the return values of
read()/write() being -EINVAL, but the chance should be small enough in
practice, and IMO the issue even exists in the current code when
hibernation is not involved, e.g. kvp_timeout_func() and kvp_on_msg() 
can run concurrently; if kvp_on_msg() runs slowly due to some reason 
(e.g. the kvp daemon is stopped as I'm gdb'ing it), kvp_timeout_func()
fires and moves the state to HVUTIL_READY; later kvp_on_msg() starts
to run and returns -EINVAL, and the kvp daemon will exit().

IMHO here it looks extremely difficult to make things flawless (if that's
even possible), so it's necessary to ask the daemons to auto-restart once
they exit() unexpectedly. This can be achieved by configuring systemd
properly for the kvp/vss/fcopy services. 

I'm not sure introducing a HVUTIL_SUSPENDED state would solve all
of the corner cases, but I'm sure that would further complicate the
current code, at least to me. :-)

Please share your thoughts. I believe you know all of these much
better than me, as you're the author of the state machine. :-)

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-13 19:15     ` Dexuan Cui
@ 2019-09-16  8:45       ` Vitaly Kuznetsov
  2019-09-19  6:34         ` Dexuan Cui
  0 siblings, 1 reply; 12+ messages in thread
From: Vitaly Kuznetsov @ 2019-09-16  8:45 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

Dexuan Cui <decui@microsoft.com> writes:

>> From: Vitaly Kuznetsov <vkuznets@redhat.com>
>> Sent: Thursday, September 12, 2019 9:37 AM
>
>> > +static int util_suspend(struct hv_device *dev)
>> > +{
>> > +	struct hv_util_service *srv = hv_get_drvdata(dev);
>> > +
>> > +	if (srv->util_cancel_work)
>> > +		srv->util_cancel_work();
>> > +
>> > +	vmbus_close(dev->channel);
>> 
>> And what happens if you receive a real reply from userspace after you
>> close the channel? You've only cancelled timeout work so the driver
>> will not try to reply by itself but this doesn't mean we won't try to
>> write to the channel on legitimate replies from daemons.
>> 
>> I think you need to block all userspace requests (hang in kernel until
>> util_resume()).
>> 
>> While I'm not sure we can't get away without it but I'd suggest we add a
>> new HVUTIL_SUSPENDED state to the hvutil state machine.
>> Vitaly
>
> When we reach util_suspend(), all the userspace processes have been
> frozen: see kernel/power/hibernate.c: hibernate() -> freeze_processes(),
> so here we can not receive a reply from the userspace daemon.
>

Let's add a WARN() or something then as if this ever happens this is
going to be realy tricky to catch.

> However, it looks there is indeed some tricky corner cases we need to deal
> with: in util_resume(), before we call vmbus_open(), all the userspace
> processes are still frozen, and the userspace daemon (e.g. hv_kvp_daemon) 
> can be in any of these states:
>
> 1. the driver has not buffered any message for the daemon. This is good.
>
> 2. the driver has buffered a message for the daemon, and
> kvp_transaction.state is HVUTIL_USERSPACE_REQ. Later, the kvp daemon
> writes the response to the driver, and in kvp_on_msg() 
> kvp_transaction.state is moved to HVUTIL_USERSPACE_RECV, but since
> cancel_delayed_work_sync(&kvp_timeout_work) is false (the work has
> been cancelled by util_suspend()), the driver reports nothing to the host,
> which is good as the VM has undergone a hibernation event and IMO the
> response may be stale and I believe the host is not expecting this 
> response from the VM at all (the host side application that requested the
> KVP must have failed or timed out), but the bad thing is: the "state"
> remains in HVUTIL_USERSPACE_RECV, preventing
> hv_kvp_onchannelcallback() from reading from the channel ringbuffer.
>
> I suggest we work around this race condition by the below patch:
>
> --- a/drivers/hv/hv_kvp.c
> +++ b/drivers/hv/hv_kvp.c
> @@ -250,8 +250,8 @@ static int kvp_on_msg(void *msg, int len)
>          */
>         if (cancel_delayed_work_sync(&kvp_timeout_work)) {
>                 kvp_respond_to_host(message, error);
> -               hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
>         }
> +       hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
>
>         return 0;
>  }
>
> How do you like this?
>

Is it safe to call hv_poll_channel() simultaneously on several CPUs? It
seems it is as we're doing 

smp_call_function_single(channel->target_cpu, cb, channel, true);

(I'm asking because if it's not, than doing what you suggest will open
the following window: timeout function kvp_timeout_func() is already
running but the daemon is trying to reply at the same moment).

> I can imagine there is still a small chance that the state machine can run
> out of order, and the kvp daemon may exit due to the return values of
> read()/write() being -EINVAL, but the chance should be small enough in
> practice, and IMO the issue even exists in the current code when
> hibernation is not involved, e.g. kvp_timeout_func() and kvp_on_msg() 
> can run concurrently; if kvp_on_msg() runs slowly due to some reason 
> (e.g. the kvp daemon is stopped as I'm gdb'ing it), kvp_timeout_func()
> fires and moves the state to HVUTIL_READY; later kvp_on_msg() starts
> to run and returns -EINVAL, and the kvp daemon will exit().
>
> IMHO here it looks extremely difficult to make things flawless (if that's
> even possible), so it's necessary to ask the daemons to auto-restart once
> they exit() unexpectedly. This can be achieved by configuring systemd
> properly for the kvp/vss/fcopy services. 

I think we can also teach daemons to ignore -EINVAL or switch to
something like -EAGAIN in non-fatal cases.
 
>
> I'm not sure introducing a HVUTIL_SUSPENDED state would solve all
> of the corner cases, but I'm sure that would further complicate the
> current code, at least to me. :-)
>

Maybe, if we don't need it than we don't. Basically, I see the only
advantage in having such state: it makes our tricks to support
hibernation more visible in the code.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-16  8:45       ` Vitaly Kuznetsov
@ 2019-09-19  6:34         ` Dexuan Cui
  2019-09-19 10:27           ` Vitaly Kuznetsov
  0 siblings, 1 reply; 12+ messages in thread
From: Dexuan Cui @ 2019-09-19  6:34 UTC (permalink / raw)
  To: vkuznets
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Monday, September 16, 2019 1:46 AM
> Dexuan Cui <decui@microsoft.com> writes:
> 
> >> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> >> Sent: Thursday, September 12, 2019 9:37 AM
> >
> >> > +static int util_suspend(struct hv_device *dev)
> >> > +{
> >> > +	struct hv_util_service *srv = hv_get_drvdata(dev);
> >> > +
> >> > +	if (srv->util_cancel_work)
> >> > +		srv->util_cancel_work();
> >> > +
> >> > +	vmbus_close(dev->channel);
> >>
> >> And what happens if you receive a real reply from userspace after you
> >> close the channel? You've only cancelled timeout work so the driver
> >> will not try to reply by itself but this doesn't mean we won't try to
> >> write to the channel on legitimate replies from daemons.
> >>
> >> I think you need to block all userspace requests (hang in kernel until
> >> util_resume()).
> >>
> >> While I'm not sure we can't get away without it but I'd suggest we add a
> >> new HVUTIL_SUSPENDED state to the hvutil state machine.
> >> Vitaly
> >
> > When we reach util_suspend(), all the userspace processes have been
> > frozen: see kernel/power/hibernate.c: hibernate() -> freeze_processes(),
> > so here we can not receive a reply from the userspace daemon.
> >
> 
> Let's add a WARN() or something then as if this ever happens this is
> going to be realy tricky to catch.

Looking at the path hibernate() -> freeze_processes() -> 
try_to_freeze_tasks(true) -> freeze_task() -> fake_signal_wake_up(), I'm
sure when try_to_freeze_tasks(true) returns 0, all the user-space processes
must be frozen in do_signal() -> get_signal() -> try_to_freeze() -> ... -> 
__refrigerator().

hibernate () -> hibernation_snapshot () -> dpm_suspend() -> ... -> 
util_suspend() only runs after hibernate() -> freeze_processes(), so I'm
pretty sure we're safe here.

> > However, it looks there is indeed some tricky corner cases we need to deal
> > with: in util_resume(), before we call vmbus_open(), all the userspace
> > processes are still frozen, and the userspace daemon (e.g. hv_kvp_daemon)
> > can be in any of these states:
> >
> > 1. the driver has not buffered any message for the daemon. This is good.
> >
> > 2. the driver has buffered a message for the daemon, and
> > kvp_transaction.state is HVUTIL_USERSPACE_REQ. Later, the kvp daemon
> > writes the response to the driver, and in kvp_on_msg()
> > kvp_transaction.state is moved to HVUTIL_USERSPACE_RECV, but since
> > cancel_delayed_work_sync(&kvp_timeout_work) is false (the work has
> > been cancelled by util_suspend()), the driver reports nothing to the host,
> > which is good as the VM has undergone a hibernation event and IMO the
> > response may be stale and I believe the host is not expecting this
> > response from the VM at all (the host side application that requested the
> > KVP must have failed or timed out), but the bad thing is: the "state"
> > remains in HVUTIL_USERSPACE_RECV, preventing
> > hv_kvp_onchannelcallback() from reading from the channel ringbuffer.
> >
> > I suggest we work around this race condition by the below patch:
> >
> > --- a/drivers/hv/hv_kvp.c
> > +++ b/drivers/hv/hv_kvp.c
> > @@ -250,8 +250,8 @@ static int kvp_on_msg(void *msg, int len)
> >          */
> >         if (cancel_delayed_work_sync(&kvp_timeout_work)) {
> >                 kvp_respond_to_host(message, error);
> > -               hv_poll_channel(kvp_transaction.recv_channel,
> kvp_poll_wrapper);
> >         }
> > +       hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
> >
> >         return 0;
> >  }
> >
> > How do you like this?
> >
> 
> Is it safe to call hv_poll_channel() simultaneously on several CPUs? It
> seems it is as we're doing
> 
> smp_call_function_single(channel->target_cpu, cb, channel, true);

Looks safe to me. The code has been there for years. :-)
 
> (I'm asking because if it's not, than doing what you suggest will open
> the following window: timeout function kvp_timeout_func() is already
> running but the daemon is trying to reply at the same moment).
> 
> > I can imagine there is still a small chance that the state machine can run
> > out of order, and the kvp daemon may exit due to the return values of
> > read()/write() being -EINVAL, but the chance should be small enough in
> > practice, and IMO the issue even exists in the current code when
> > hibernation is not involved, e.g. kvp_timeout_func() and kvp_on_msg()
> > can run concurrently; if kvp_on_msg() runs slowly due to some reason
> > (e.g. the kvp daemon is stopped as I'm gdb'ing it), kvp_timeout_func()
> > fires and moves the state to HVUTIL_READY; later kvp_on_msg() starts
> > to run and returns -EINVAL, and the kvp daemon will exit().
> >
> > IMHO here it looks extremely difficult to make things flawless (if that's
> > even possible), so it's necessary to ask the daemons to auto-restart once
> > they exit() unexpectedly. This can be achieved by configuring systemd
> > properly for the kvp/vss/fcopy services.
> 
> I think we can also teach daemons to ignore -EINVAL or switch to
> something like -EAGAIN in non-fatal cases.

Maybe the driver should return 0 rather than -EINVAL for the kvp daemon
in some scenarios, since kvp is never 100% reliable.

> > I'm not sure introducing a HVUTIL_SUSPENDED state would solve all
> > of the corner cases, but I'm sure that would further complicate the
> > current code, at least to me. :-)
> >
> 
> Maybe, if we don't need it than we don't. Basically, I see the only
> advantage in having such state: it makes our tricks to support
> hibernation more visible in the code.
> Vitaly

Let me think about this. 

BTW, for vss, maybe the VM should not hibernate if there is a backup 
ongoing? -- if the file system is frozen by hv_vss_daemon, and the VM
hibernates, then when the VM resumes back, it's almost always true that 
the VM won't receive the host's VSS_OP_THAW request, and the VM will
end up in an unusable state.

Thanks,
-- Dexuan


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-19  6:34         ` Dexuan Cui
@ 2019-09-19 10:27           ` Vitaly Kuznetsov
  2019-09-21  7:26             ` Dexuan Cui
  0 siblings, 1 reply; 12+ messages in thread
From: Vitaly Kuznetsov @ 2019-09-19 10:27 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

Dexuan Cui <decui@microsoft.com> writes:

> BTW, for vss, maybe the VM should not hibernate if there is a backup 
> ongoing? -- if the file system is frozen by hv_vss_daemon, and the VM
> hibernates, then when the VM resumes back, it's almost always true that 
> the VM won't receive the host's VSS_OP_THAW request, and the VM will
> end up in an unusable state.

Makes sense. Or, alternatively, can we postpone hibernation until after
VSS_OP_THAW?

-- 
Vitaly


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 1/3] hv_utils: Add the support of hibernation
  2019-09-19 10:27           ` Vitaly Kuznetsov
@ 2019-09-21  7:26             ` Dexuan Cui
  0 siblings, 0 replies; 12+ messages in thread
From: Dexuan Cui @ 2019-09-21  7:26 UTC (permalink / raw)
  To: vkuznets
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	linux-hyperv, linux-kernel, Michael Kelley

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Thursday, September 19, 2019 3:28 AM
> 
> Dexuan Cui <decui@microsoft.com> writes:
> 
> > BTW, for vss, maybe the VM should not hibernate if there is a backup
> > ongoing? -- if the file system is frozen by hv_vss_daemon, and the VM
> > hibernates, then when the VM resumes back, it's almost always true that
> > the VM won't receive the host's VSS_OP_THAW request, and the VM will
> > end up in an unusable state.
> 
> Makes sense. Or, alternatively, can we postpone hibernation until after
> VSS_OP_THAW?
> 
> Vitaly

It looks we should not postpone that, because:
1. When we're in util_suspend(), all the user space processes have been
frozen, so even if the VM receives the VSS_OP_THAW message form the host,
there is no chance for the hv_vss_daemon to handle it. 

2. Between the window the host sends the VSS_OP_FREEZE message and the
VSS_OP_THAW mesasge, util_suspend() may jump in and close the channel,
and then the host will not send a VSS_OP_THAW.

3. The host doesn't guarantee how soon it sends the VSS_OP_THAW message,
though in practice IIRC the host *usually* sends the message soon. The
hibernation process has a watchdog of 120s set by dpm_watchdog_set(): if
dpm_suspend() (which calls util_probe()) can not finish in 120 seconds, the
hibernation will be aborted.

3 may not look like a strong reason, but generally speaking I'd like to avoid
an indeterminate dependency.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-09-21  7:27 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-11 23:38 [PATCH 0/3] Enhance hv_utils to support hibernation Dexuan Cui
2019-09-11 23:38 ` [PATCH 1/3] hv_utils: Add the support of hibernation Dexuan Cui
2019-09-12 16:36   ` Vitaly Kuznetsov
2019-09-13 19:15     ` Dexuan Cui
2019-09-16  8:45       ` Vitaly Kuznetsov
2019-09-19  6:34         ` Dexuan Cui
2019-09-19 10:27           ` Vitaly Kuznetsov
2019-09-21  7:26             ` Dexuan Cui
2019-09-11 23:38 ` [PATCH 2/3] hv_utils: Support host-initiated hibernation request Dexuan Cui
2019-09-12 16:26   ` Vitaly Kuznetsov
2019-09-13 16:42     ` Dexuan Cui
2019-09-11 23:39 ` [PATCH 3/3] hv_utils: Support host-initiated restart request Dexuan Cui

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).