All of lore.kernel.org
 help / color / mirror / Atom feed
From: Julien Grall <julien@xen.org>
To: xen-devel@lists.xenproject.org
Cc: raphning@amazon.co.uk, doebel@amazon.de,
	Julien Grall <jgrall@amazon.com>,
	Ian Jackson <iwj@xenproject.org>, Wei Liu <wl@xen.org>,
	Juergen Gross <jgross@suse.com>, Julien Grall <julien@xen.org>
Subject: [PATCH 10/10] tools/xenstored: Delay new transaction while Live-Update is pending
Date: Wed, 16 Jun 2021 15:43:24 +0100	[thread overview]
Message-ID: <20210616144324.31652-11-julien@xen.org> (raw)
In-Reply-To: <20210616144324.31652-1-julien@xen.org>

From: Julien Grall <jgrall@amazon.com>

At the moment, Live-Update will, by default, not proceed if there are
in-flight transactions. It is possible force it by passing -F but this
will break any connection with in-flight transactions.

There are PV drivers out that may never terminate some transaction. On
host running such guest, we would need to use -F. Unfortunately, this
also risks to break well-behaving guests (and even dom0) because
Live-Update will happen as soon as the timeout is hit.

Ideally, we would want to preserve transactions but this requires
some work and a lot of testing to be able to use it in production.

As a stop gap, we want to limit the damage of -F. This patch will delay
any transactions that are started after Live-Update has been requested.

If the request cannot be delayed, the connection will be stalled to
avoid loosing requests.

If the connection has already a pending transaction before Live-Update,
then new transaction will not be delayed. This is to avoid the connection
to stall.

With this stop gap in place, domains with long running transactions will
still break when using -F, but other domains which starts a transaction
in the middle of Live-Update will continue to work.

Signed-off-by: Julien Grall <jgrall@amazon.com>
---
 tools/xenstore/xenstored_control.c | 10 ++++++
 tools/xenstore/xenstored_control.h |  2 ++
 tools/xenstore/xenstored_core.c    | 49 +++++++++++++++++++++++++++++-
 tools/xenstore/xenstored_core.h    |  3 ++
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/tools/xenstore/xenstored_control.c b/tools/xenstore/xenstored_control.c
index 1c24d4869eab..a045f102a420 100644
--- a/tools/xenstore/xenstored_control.c
+++ b/tools/xenstore/xenstored_control.c
@@ -131,6 +131,11 @@ unsigned int lu_write_response(FILE *fp)
 	return sizeof(msg) + msg.len;
 }
 
+bool lu_is_pending(void)
+{
+	return lu_status != NULL;
+}
+
 #else
 struct connection *lu_get_connection(void)
 {
@@ -142,6 +147,11 @@ unsigned int lu_write_response(FILE *fp)
 	/* Unsupported */
 	return 0;
 }
+
+bool lu_is_pending(void)
+{
+	return false;
+}
 #endif
 
 struct cmd_s {
diff --git a/tools/xenstore/xenstored_control.h b/tools/xenstore/xenstored_control.h
index 27d7f19e4b7f..98b6fbcea2b1 100644
--- a/tools/xenstore/xenstored_control.h
+++ b/tools/xenstore/xenstored_control.h
@@ -23,3 +23,5 @@ struct connection *lu_get_connection(void);
 
 /* Write the "OK" response for the live-update command */
 unsigned int lu_write_response(FILE *fp);
+
+bool lu_is_pending(void);
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 9eca58682b51..10b53af76ac5 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -338,7 +338,20 @@ static int destroy_conn(void *_conn)
 
 static bool conn_can_read(struct connection *conn)
 {
-	return conn->funcs->can_read(conn) && !conn->is_ignored;
+	if (!conn->funcs->can_read(conn))
+		return false;
+
+	if (conn->is_ignored)
+		return false;
+
+	/*
+	 * For stalled connection, we want to process the pending
+	 * command as soon as live-update has aborted.
+	 */
+	if (conn->is_stalled)
+		return !lu_is_pending();
+
+	return true;
 }
 
 static bool conn_can_write(struct connection *conn)
@@ -417,6 +430,12 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *ptimeout)
 			if (!list_empty(&conn->out_list))
 				events |= POLLOUT;
 			conn->pollfd_idx = set_fd(conn->fd, events);
+			/*
+			 * For stalled connection, we want to process the
+			 * pending command as soon as live-update has aborted.
+			 */
+			if (conn->is_stalled && !lu_is_pending())
+				*ptimeout = 0;
 		}
 	}
 }
@@ -1524,6 +1543,9 @@ static bool process_delayed_message(struct delayed_request *req)
 	struct connection *conn = req->data;
 	struct buffered_data *saved_in = conn->in;
 
+	if (lu_is_pending())
+		return false;
+
 	/*
 	 * Part of process_message() expects conn->in to contains the
 	 * processed response. So save the current conn->in and restore it
@@ -1543,6 +1565,30 @@ static void consider_message(struct connection *conn)
 			sockmsg_string(conn->in->hdr.msg.type),
 			conn->in->hdr.msg.len, conn);
 
+	conn->is_stalled = false;
+	/*
+	 * Currently, Live-Update is not supported if there is active
+	 * transactions. In order to reduce the number of retry, delay
+	 * any new request to start a transaction if Live-Update is pending
+	 * and there are no transactions in-flight.
+	 *
+	 * If we can't delay the request, then mark the connection as
+	 * stalled. This will ignore new requests until Live-Update happened
+	 * or it was aborted.
+	 */
+	if (lu_is_pending() && conn->transaction_started == 0 &&
+	    conn->in->hdr.msg.type == XS_TRANSACTION_START) {
+		trace("Delaying transaction start for connection %p req_id %u\n",
+		      conn, conn->in->hdr.msg.req_id);
+
+		if (delay_request(conn, conn->in, process_delayed_message,
+				  conn, false) != 0) {
+			trace("Stalling connection %p\n", conn);
+			conn->is_stalled = true;
+		}
+		return;
+	}
+
 	process_message(conn, conn->in);
 
 	assert(conn->in == NULL);
@@ -1629,6 +1675,7 @@ struct connection *new_connection(const struct interface_funcs *funcs)
 	new->pollfd_idx = -1;
 	new->funcs = funcs;
 	new->is_ignored = false;
+	new->is_stalled = false;
 	new->transaction_started = 0;
 	INIT_LIST_HEAD(&new->out_list);
 	INIT_LIST_HEAD(&new->watches);
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index dac517156993..258f6ff38279 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -110,6 +110,9 @@ struct connection
 	/* Is this connection ignored? */
 	bool is_ignored;
 
+	/* Is the connection stalled? */
+	bool is_stalled;
+
 	/* Buffered incoming data. */
 	struct buffered_data *in;
 
-- 
2.17.1



  parent reply	other threads:[~2021-06-16 14:53 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-16 14:43 [PATCH 00/10] tools/xenstored: Bug fixes + Improve Live-Update Julien Grall
2021-06-16 14:43 ` [PATCH 01/10] MAINTAINERS: Add myself as reviewers for tools/xenstore Julien Grall
2021-06-16 15:15   ` Juergen Gross
2021-06-16 14:43 ` [PATCH 02/10] tools/xenstored: Introduce lu_get_connection() and use it Julien Grall
2021-06-21  8:21   ` Luca Fancellu
2021-06-24  7:21   ` Juergen Gross
2021-06-16 14:43 ` [PATCH 03/10] tools/xenstore: Don't assume conn->in points to the LU request Julien Grall
2021-06-21  8:55   ` Luca Fancellu
2021-06-24  8:06     ` Julien Grall
2021-06-24  7:32   ` Juergen Gross
2021-06-24  7:34     ` Juergen Gross
2021-06-24  7:56       ` Luca Fancellu
2021-06-24  8:05         ` Juergen Gross
2021-06-16 14:43 ` [PATCH 04/10] tools/xenstored: Limit the number of requests a connection can delay Julien Grall
2021-06-21  9:02   ` Luca Fancellu
2021-06-24  7:35   ` Juergen Gross
2021-06-16 14:43 ` [PATCH 05/10] tools/xenstored: xenstored_core.h should include fcntl.h Julien Grall
2021-06-21  9:03   ` Luca Fancellu
2021-06-24  7:36   ` Juergen Gross
2021-06-16 14:43 ` [PATCH 06/10] tools/xenstored: Introduce a wrapper for conn->funcs->can_{read, write} Julien Grall
2021-06-21  9:10   ` Luca Fancellu
2021-06-24  7:39   ` Juergen Gross
2021-06-16 14:43 ` [PATCH 07/10] tools/xenstored: delay_request: don't assume conn->in == in Julien Grall
2021-06-21  9:12   ` Luca Fancellu
2021-06-24  7:44   ` Juergen Gross
2021-06-24  7:58     ` Julien Grall
2021-06-16 14:43 ` [PATCH 08/10] tools/xenstored: Extend restore code to handle multiple input buffer Julien Grall
2021-06-21  9:21   ` Luca Fancellu
2021-06-24  8:30   ` Juergen Gross
2021-06-24  8:42     ` Julien Grall
2021-06-24  9:20       ` Juergen Gross
2021-06-16 14:43 ` [PATCH 09/10] tools/xenstored: Dump delayed requests Julien Grall
2021-06-21  9:27   ` Luca Fancellu
2021-06-24  8:41   ` Juergen Gross
2021-06-24 10:28     ` Julien Grall
2021-06-24 10:45       ` Juergen Gross
2021-06-24 10:46         ` Julien Grall
2021-06-24 11:02           ` Juergen Gross
2021-06-24 11:17             ` Julien Grall
2021-06-16 14:43 ` Julien Grall [this message]
2021-06-21  9:30   ` [PATCH 10/10] tools/xenstored: Delay new transaction while Live-Update is pending Luca Fancellu
2021-06-24  9:23   ` Juergen Gross
2021-06-24 10:43 ` [PATCH 00/10] tools/xenstored: Bug fixes + Improve Live-Update Julien Grall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210616144324.31652-11-julien@xen.org \
    --to=julien@xen.org \
    --cc=doebel@amazon.de \
    --cc=iwj@xenproject.org \
    --cc=jgrall@amazon.com \
    --cc=jgross@suse.com \
    --cc=raphning@amazon.co.uk \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    --subject='Re: [PATCH 10/10] tools/xenstored: Delay new transaction while Live-Update is pending' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.