linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/4] Tools: hv: Reopen the devices if read() or write() returns errors
@ 2020-01-13  6:30 Dexuan Cui
  2020-01-22 16:53 ` Michael Kelley
  0 siblings, 1 reply; 3+ messages in thread
From: Dexuan Cui @ 2020-01-13  6:30 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	Sasha Levin, linux-hyperv, Michael Kelley, vkuznets,
	linux-kernel


The state machine in the hv_utils driver can run out of order in some
corner cases, e.g. if the kvp daemon doesn't call write() fast enough
due to some reason, kvp_timeout_func() can run first and move the state
to HVUTIL_READY; next, when kvp_on_msg() is called it returns -EINVAL
since kvp_transaction.state is smaller than HVUTIL_USERSPACE_REQ; later,
the daemon's write() gets an error -EINVAL, and the daemon will exit().

We can reproduce the issue by sending a SIGSTOP signal to the daemon, wait
for 1 minute, and send a SIGCONT signal to the daemon: the daemon will
exit() quickly.

We can fix the issue by forcing a reset of the device (which means the
daemon can close() and open() the device again) and doing extra necessary
clean-up.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 tools/hv/hv_fcopy_daemon.c | 19 +++++++++++++++----
 tools/hv/hv_kvp_daemon.c   | 25 ++++++++++++++-----------
 tools/hv/hv_vss_daemon.c   | 25 +++++++++++++++++++------
 3 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index aea2d91ab364..a78a5292589b 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -21,7 +21,7 @@
 #include <fcntl.h>
 #include <getopt.h>
 
-static int target_fd;
+static int target_fd = -1;
 static char target_fname[PATH_MAX];
 static unsigned long long filesize;
 
@@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg)
 
 	error = 0;
 done:
+	if (error)
+		memset(target_fname, 0, sizeof(target_fname));
 	return error;
 }
 
@@ -111,12 +113,16 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg)
 static int hv_copy_finished(void)
 {
 	close(target_fd);
+	target_fd = -1;
+	memset(target_fname, 0, sizeof(target_fname));
 	return 0;
 }
 static int hv_copy_cancel(void)
 {
 	close(target_fd);
+	target_fd = -1;
 	unlink(target_fname);
+	memset(target_fname, 0, sizeof(target_fname));
 	return 0;
 
 }
@@ -141,7 +147,7 @@ int main(int argc, char *argv[])
 		struct hv_do_fcopy copy;
 		__u32 kernel_modver;
 	} buffer = { };
-	int in_handshake = 1;
+	int in_handshake;
 
 	static struct option long_options[] = {
 		{"help",	no_argument,	   0,  'h' },
@@ -170,6 +176,9 @@ int main(int argc, char *argv[])
 	openlog("HV_FCOPY", 0, LOG_USER);
 	syslog(LOG_INFO, "starting; pid is:%d", getpid());
 
+reopen_fcopy_fd:
+	hv_copy_cancel();
+	in_handshake = 1;
 	fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
 
 	if (fcopy_fd < 0) {
@@ -196,7 +205,8 @@ int main(int argc, char *argv[])
 		len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
 		if (len < 0) {
 			syslog(LOG_ERR, "pread failed: %s", strerror(errno));
-			exit(EXIT_FAILURE);
+			close(fcopy_fd);
+			goto reopen_fcopy_fd;
 		}
 
 		if (in_handshake) {
@@ -233,7 +243,8 @@ int main(int argc, char *argv[])
 
 		if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
 			syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
-			exit(EXIT_FAILURE);
+			close(fcopy_fd);
+			goto reopen_fcopy_fd;
 		}
 	}
 }
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index e9ef4ca6a655..3282d48c4487 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,7 +76,7 @@ enum {
 	DNS
 };
 
-static int in_hand_shake = 1;
+static int in_hand_shake;
 
 static char *os_name = "";
 static char *os_major = "";
@@ -1400,14 +1400,6 @@ int main(int argc, char *argv[])
 	openlog("KVP", 0, LOG_USER);
 	syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
 
-	kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
-
-	if (kvp_fd < 0) {
-		syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
-			errno, strerror(errno));
-		exit(EXIT_FAILURE);
-	}
-
 	/*
 	 * Retrieve OS release information.
 	 */
@@ -1423,6 +1415,16 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
+reopen_kvp_fd:
+	in_hand_shake = 1;
+	kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
+
+	if (kvp_fd < 0) {
+		syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
+		       errno, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
 	/*
 	 * Register ourselves with the kernel.
 	 */
@@ -1458,7 +1460,7 @@ int main(int argc, char *argv[])
 			       errno, strerror(errno));
 
 			close(kvp_fd);
-			return EXIT_FAILURE;
+			goto reopen_kvp_fd;
 		}
 
 		/*
@@ -1623,7 +1625,8 @@ int main(int argc, char *argv[])
 		if (len != sizeof(struct hv_kvp_msg)) {
 			syslog(LOG_ERR, "write failed; error: %d %s", errno,
 			       strerror(errno));
-			exit(EXIT_FAILURE);
+			close(kvp_fd);
+			goto reopen_kvp_fd;
 		}
 	}
 
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 92902a88f671..e70fed66a5ae 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -28,6 +28,8 @@
 #include <stdbool.h>
 #include <dirent.h>
 
+static bool fs_frozen;
+
 /* Don't use syslog() in the function since that can cause write to disk */
 static int vss_do_freeze(char *dir, unsigned int cmd)
 {
@@ -155,8 +157,11 @@ static int vss_operate(int operation)
 			continue;
 		}
 		error |= vss_do_freeze(ent->mnt_dir, cmd);
-		if (error && operation == VSS_OP_FREEZE)
-			goto err;
+		if (operation == VSS_OP_FREEZE) {
+			if (error)
+				goto err;
+			fs_frozen = true;
+		}
 	}
 
 	endmntent(mounts);
@@ -167,6 +172,9 @@ static int vss_operate(int operation)
 			goto err;
 	}
 
+	if (operation == VSS_OP_THAW && !error)
+		fs_frozen = false;
+
 	goto out;
 err:
 	save_errno = errno;
@@ -175,6 +183,7 @@ static int vss_operate(int operation)
 		endmntent(mounts);
 	}
 	vss_operate(VSS_OP_THAW);
+	fs_frozen = false;
 	/* Call syslog after we thaw all filesystems */
 	if (ent)
 		syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
@@ -202,7 +211,7 @@ int main(int argc, char *argv[])
 	int	op;
 	struct hv_vss_msg vss_msg[1];
 	int daemonize = 1, long_index = 0, opt;
-	int in_handshake = 1;
+	int in_handshake;
 	__u32 kernel_modver;
 
 	static struct option long_options[] = {
@@ -232,6 +241,10 @@ int main(int argc, char *argv[])
 	openlog("Hyper-V VSS", 0, LOG_USER);
 	syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
 
+reopen_vss_fd:
+	if (fs_frozen)
+		vss_operate(VSS_OP_THAW);
+	in_handshake = 1;
 	vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
 	if (vss_fd < 0) {
 		syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
@@ -285,7 +298,7 @@ int main(int argc, char *argv[])
 			syslog(LOG_ERR, "read failed; error:%d %s",
 			       errno, strerror(errno));
 			close(vss_fd);
-			return EXIT_FAILURE;
+			goto reopen_vss_fd;
 		}
 
 		op = vss_msg->vss_hdr.operation;
@@ -318,8 +331,8 @@ int main(int argc, char *argv[])
 			syslog(LOG_ERR, "write failed; error: %d %s", errno,
 			       strerror(errno));
 
-			if (op == VSS_OP_FREEZE)
-				vss_operate(VSS_OP_THAW);
+			close(vss_fd);
+			goto reopen_vss_fd;
 		}
 	}
 
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-01-23  8:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-13  6:30 [PATCH v2 1/4] Tools: hv: Reopen the devices if read() or write() returns errors Dexuan Cui
2020-01-22 16:53 ` Michael Kelley
2020-01-23  8:10   ` Dexuan Cui

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).