All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Require that xenstored writes to a domain complete in a single chunk
@ 2007-02-26 16:24 David Edmondson
  2007-02-26 17:20 ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: David Edmondson @ 2007-02-26 16:24 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 566 bytes --]

If xenstored is part-way through writing a header+payload into the
buffer shared with a guest domain when the guest domain decides to
suspend, the buffer is corrupted, as xenstored doesn't know that it
has a partial write to complete when the domain revives.  The domain
is expecting proper completion of the partial header+payload and is
disappointed.

The attached patch modifies xenstored such that it checks for
sufficient space for header+payload before making any changes to the
shared buffer.

It is against 3.0.4-1, but the code in unstable looks the same.


[-- Attachment #2: xenstored-complete-writes --]
[-- Type: text/plain, Size: 4725 bytes --]

Require that writes to a domain complete in a single chunk (i.e
both header and payload are written at the same time) to avoid leaving
the ring in an inconsistent state across suspend/resume.

Signed-off-by: David Edmondson <dme@sun.com>

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -264,10 +264,17 @@ static bool write_messages(struct connec
 {
 	int ret;
 	struct buffered_data *out;
+	unsigned int space_required = 0;
+	bool r = false;
 
 	out = list_top(&conn->out_list, struct buffered_data, list);
 	if (out == NULL)
 		return true;
+
+	if (out->inhdr)
+		space_required += sizeof (out->hdr);
+	space_required += out->hdr.msg.len;
+	space_required -= out->used;
 
 	if (out->inhdr) {
 		if (verbose)
@@ -277,36 +284,59 @@ static bool write_messages(struct connec
 				out->buffer, conn);
 		ret = conn->write(conn, out->hdr.raw + out->used,
 				  sizeof(out->hdr) - out->used);
-		if (ret < 0)
-			return false;
-
+		if (ret < 0) {
+			r = false;
+			goto done;
+		}
+
+		space_required -= ret;
 		out->used += ret;
-		if (out->used < sizeof(out->hdr))
-			return true;
+		if (out->used < sizeof(out->hdr)) {
+			r = true;
+			goto done;
+		}
 
 		out->inhdr = false;
 		out->used = 0;
 
 		/* Second write might block if non-zero. */
-		if (out->hdr.msg.len && !conn->domain)
-			return true;
+		if ((out->hdr.msg.len != 0) &&
+		    (conn->domain == NULL)) {
+			r = true;
+			goto done;
+		}
 	}
 
 	ret = conn->write(conn, out->buffer + out->used,
 			  out->hdr.msg.len - out->used);
-	if (ret < 0)
-		return false;
-
+	if (ret < 0) {
+		r = false;
+		goto done;
+	}
+
+	space_required -= ret;
 	out->used += ret;
-	if (out->used != out->hdr.msg.len)
-		return true;
-
+	if (out->used != out->hdr.msg.len) {
+		r = true;
+		goto done;
+	}
+
+	r = true;
 	trace_io(conn, "OUT", out);
 
 	list_del(&out->list);
 	talloc_free(out);
 
-	return true;
+done:
+	if ((conn->domain != NULL) && (space_required != 0))
+		/*
+		 * Not all of the data was consumed.  This can leave
+		 * the shared ring pointers in an inconsistent state
+		 * across suspend/resume.
+		 */
+		eprintf("partial write to shared page of domain");
+
+	return r;
 }
 
 static int destroy_conn(void *_conn)
@@ -1997,7 +2027,7 @@ int main(int argc, char *argv[])
 				goto more;
 			}
 
-			if (domain_can_write(i) && !list_empty(&i->out_list)) {
+			if (domain_can_write(i)) {
 				handle_output(i);
 				goto more;
 			}
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -109,23 +109,29 @@ static int writechn(struct connection *c
 	void *dest;
 	struct xenstore_domain_interface *intf = conn->domain->interface;
 	XENSTORE_RING_IDX cons, prod;
-
-	/* Must read indexes once, and before anything else, and verified. */
-	cons = intf->rsp_cons;
-	prod = intf->rsp_prod;
-	mb();
-	if (!check_indexes(cons, prod)) {
-		errno = EIO;
-		return -1;
-	}
-
-	dest = get_output_chunk(cons, prod, intf->rsp, &avail);
-	if (avail < len)
-		len = avail;
-
-	memcpy(dest, data, len);
-	mb();
-	intf->rsp_prod += len;
+	unsigned int remain = len;
+
+	while (remain > 0) {
+		/* Must read indexes once, and before anything else,
+		 * and verified. */
+		cons = intf->rsp_cons;
+		prod = intf->rsp_prod;
+		mb();
+		if (!check_indexes(cons, prod)) {
+			errno = EIO;
+			return -1;
+		}
+
+		dest = get_output_chunk(cons, prod, intf->rsp, &avail);
+		if (avail > remain)
+			avail = remain;
+		memcpy(dest, data, avail);
+		data += avail;
+		remain -= avail;
+
+		mb();
+		intf->rsp_prod += avail;
+	}
 
 	xc_evtchn_notify(xce_handle, conn->domain->port);
 
@@ -236,7 +242,30 @@ bool domain_can_write(struct connection 
 bool domain_can_write(struct connection *conn)
 {
 	struct xenstore_domain_interface *intf = conn->domain->interface;
-	return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
+	struct buffered_data *out;
+	unsigned int space_required = 0, space_available;
+
+	out = list_top(&conn->out_list, struct buffered_data, list);
+	if (out == NULL)
+		return false;
+
+	if (out->inhdr)
+		space_required += sizeof (out->hdr);
+	space_required += out->hdr.msg.len;
+	space_required -= out->used;
+
+	if (space_required > XENSTORE_RING_SIZE) {
+		eprintf("attempt to write %d bytes "
+			"to domain %d will never succeed",
+			space_required, conn->domain->domid);
+		talloc_free(conn);
+		return false;
+	}
+
+	space_available = XENSTORE_RING_SIZE -
+		(intf->rsp_prod - intf->rsp_cons);
+
+	return (space_available >= space_required);
 }
 
 static char *talloc_domain_path(void *context, unsigned int domid)

[-- Attachment #3: Type: text/plain, Size: 64 bytes --]


dme.
-- 
David Edmondson, Sun Microsystems, http://www.dme.org

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Require that xenstored writes to a domain complete in a single chunk
  2007-02-26 16:24 [PATCH] Require that xenstored writes to a domain complete in a single chunk David Edmondson
@ 2007-02-26 17:20 ` Keir Fraser
  2007-02-26 17:48   ` David Edmondson
  0 siblings, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2007-02-26 17:20 UTC (permalink / raw)
  To: David Edmondson, xen-devel

On 26/2/07 16:24, "David Edmondson" <dme@sun.com> wrote:

> If xenstored is part-way through writing a header+payload into the
> buffer shared with a guest domain when the guest domain decides to
> suspend, the buffer is corrupted, as xenstored doesn't know that it
> has a partial write to complete when the domain revives.  The domain
> is expecting proper completion of the partial header+payload and is
> disappointed.
> 
> The attached patch modifies xenstored such that it checks for
> sufficient space for header+payload before making any changes to the
> shared buffer.
> 
> It is against 3.0.4-1, but the code in unstable looks the same.

This seems dubious. There's no reason we might not have payloads bigger than
the ring size (which is only 1kB).

The right fix would be in the guest, which should already be stopping any
transactions or commands across save/restore. Does this problem occur when
xenstored sends an asynchronous watch-fired message? Probably the
packet-reading thread should be interrupted and put to sleep before
suspending.

For older guest compatibility perhaps we can take a variant of your patch
that only waits for enough space is the entire message fits in the ring in
one go. This would be 'best-effort' at compatibility while not precluding
use of larger messages in general.

 -- Keir

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Require that xenstored writes to a domain complete in a single chunk
  2007-02-26 17:20 ` Keir Fraser
@ 2007-02-26 17:48   ` David Edmondson
  2007-02-26 18:14     ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: David Edmondson @ 2007-02-26 17:48 UTC (permalink / raw)
  To: xen-devel

* keir@xensource.com [2007-02-26 17:20:34]
> This seems dubious. There's no reason we might not have payloads
> bigger than the ring size (which is only 1kB).

Agreed.

> The right fix would be in the guest, which should already be
> stopping any transactions or commands across save/restore. Does this
> problem occur when xenstored sends an asynchronous watch-fired
> message?

All of the cases I examined (a few dozen) were for watch events.

> Probably the packet-reading thread should be interrupted
> and put to sleep before suspending.

I'll look at this.

> For older guest compatibility perhaps we can take a variant of your
> patch that only waits for enough space is the entire message fits in
> the ring in one go. This would be 'best-effort' at compatibility
> while not precluding use of larger messages in general.

Is the implication that you think that this problem could occur with a
Linux guest (I've never seen it, though have tested much less)?

dme.
-- 
David Edmondson, Sun Microsystems, http://www.dme.org

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Re: [PATCH] Require that xenstored writes to a domain complete in a single chunk
  2007-02-26 17:48   ` David Edmondson
@ 2007-02-26 18:14     ` Keir Fraser
  2007-02-26 19:09       ` John Levon
  0 siblings, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2007-02-26 18:14 UTC (permalink / raw)
  To: David Edmondson, xen-devel

On 26/2/07 17:48, "David Edmondson" <dme@sun.com> wrote:

>> For older guest compatibility perhaps we can take a variant of your
>> patch that only waits for enough space is the entire message fits in
>> the ring in one go. This would be 'best-effort' at compatibility
>> while not precluding use of larger messages in general.
> 
> Is the implication that you think that this problem could occur with a
> Linux guest (I've never seen it, though have tested much less)?

The Linux suspend thread does not sync with the xenbus reader thread at all.
I'm not sure why we've never seen any problems on Linux, but I guess it's
rare that a message cannot be sent all in one go. Especially a watch event,
as those are usually fairly short.

Oh..... Wait a minute! On Linux we explicitly tear down watches before
suspend. Or at least we used to, before a patch of a few weeks ago (c/s
13519, Jan 19th 2007). This would save us because no watches registered ->
no watches fire. Do you not have anything similar to this in your xenbus
code (presumably you took the dual-licensed files as a basis for the Solaris
implementation)?

So Linux now needs fixing too, but the bug window here has been only a few
weeks and no supported kernel releases include the bug. This being the case
we should probably just fix this issue in current guest kernels.

 -- Keir

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Re: [PATCH] Require that xenstored writes to a domain complete in a single chunk
  2007-02-26 18:14     ` Keir Fraser
@ 2007-02-26 19:09       ` John Levon
  2007-02-26 23:12         ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: John Levon @ 2007-02-26 19:09 UTC (permalink / raw)
  To: Keir Fraser; +Cc: David Edmondson, xen-devel

On Mon, Feb 26, 2007 at 06:14:05PM +0000, Keir Fraser wrote:

> The Linux suspend thread does not sync with the xenbus reader thread at all.
> I'm not sure why we've never seen any problems on Linux, but I guess it's
> rare that a message cannot be sent all in one go. Especially a watch event,
> as those are usually fairly short.

Presumably we have to make sure we've waited for all threads waiting for
a response to have woken up from the sleep-until-reply too, though; I'm
not sure we do that.

regards
john

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Re: [PATCH] Require that xenstored writes to a domain complete in a single chunk
  2007-02-26 19:09       ` John Levon
@ 2007-02-26 23:12         ` Keir Fraser
  0 siblings, 0 replies; 6+ messages in thread
From: Keir Fraser @ 2007-02-26 23:12 UTC (permalink / raw)
  To: John Levon, Keir Fraser; +Cc: David Edmondson, xen-devel

On 26/2/07 19:09, "John Levon" <levon@movementarian.org> wrote:

>> The Linux suspend thread does not sync with the xenbus reader thread at all.
>> I'm not sure why we've never seen any problems on Linux, but I guess it's
>> rare that a message cannot be sent all in one go. Especially a watch event,
>> as those are usually fairly short.
> 
> Presumably we have to make sure we've waited for all threads waiting for
> a response to have woken up from the sleep-until-reply too, though; I'm
> not sure we do that.

Request-response pairs are serialised on a mutex. This mutex is locked out
across save/restore, which means that save/restore cannot proceed until any
outstanding response has been fully received. There's nothing even halfway
tricky going on here. :-)

 -- Keir

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-02-26 23:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-26 16:24 [PATCH] Require that xenstored writes to a domain complete in a single chunk David Edmondson
2007-02-26 17:20 ` Keir Fraser
2007-02-26 17:48   ` David Edmondson
2007-02-26 18:14     ` Keir Fraser
2007-02-26 19:09       ` John Levon
2007-02-26 23:12         ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.