[PATCH] Require that xenstored writes to a domain complete in a single chunk

* [PATCH] Require that xenstored writes to a domain complete in a single chunk
@ 2007-02-26 16:24 David Edmondson
  2007-02-26 17:20 ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: David Edmondson @ 2007-02-26 16:24 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 566 bytes --]

If xenstored is part-way through writing a header+payload into the
buffer shared with a guest domain when the guest domain decides to
suspend, the buffer is corrupted, as xenstored doesn't know that it
has a partial write to complete when the domain revives.  The domain
is expecting proper completion of the partial header+payload and is
disappointed.

The attached patch modifies xenstored such that it checks for
sufficient space for header+payload before making any changes to the
shared buffer.

It is against 3.0.4-1, but the code in unstable looks the same.


[-- Attachment #2: xenstored-complete-writes --]
[-- Type: text/plain, Size: 4725 bytes --]

Require that writes to a domain complete in a single chunk (i.e
both header and payload are written at the same time) to avoid leaving
the ring in an inconsistent state across suspend/resume.

Signed-off-by: David Edmondson <dme@sun.com>

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -264,10 +264,17 @@ static bool write_messages(struct connec
 {
 	int ret;
 	struct buffered_data *out;
+	unsigned int space_required = 0;
+	bool r = false;
 
 	out = list_top(&conn->out_list, struct buffered_data, list);
 	if (out == NULL)
 		return true;
+
+	if (out->inhdr)
+		space_required += sizeof (out->hdr);
+	space_required += out->hdr.msg.len;
+	space_required -= out->used;
 
 	if (out->inhdr) {
 		if (verbose)
@@ -277,36 +284,59 @@ static bool write_messages(struct connec
 				out->buffer, conn);
 		ret = conn->write(conn, out->hdr.raw + out->used,
 				  sizeof(out->hdr) - out->used);
-		if (ret < 0)
-			return false;
-
+		if (ret < 0) {
+			r = false;
+			goto done;
+		}
+
+		space_required -= ret;
 		out->used += ret;
-		if (out->used < sizeof(out->hdr))
-			return true;
+		if (out->used < sizeof(out->hdr)) {
+			r = true;
+			goto done;
+		}
 
 		out->inhdr = false;
 		out->used = 0;
 
 		/* Second write might block if non-zero. */
-		if (out->hdr.msg.len && !conn->domain)
-			return true;
+		if ((out->hdr.msg.len != 0) &&
+		    (conn->domain == NULL)) {
+			r = true;
+			goto done;
+		}
 	}
 
 	ret = conn->write(conn, out->buffer + out->used,
 			  out->hdr.msg.len - out->used);
-	if (ret < 0)
-		return false;
-
+	if (ret < 0) {
+		r = false;
+		goto done;
+	}
+
+	space_required -= ret;
 	out->used += ret;
-	if (out->used != out->hdr.msg.len)
-		return true;
-
+	if (out->used != out->hdr.msg.len) {
+		r = true;
+		goto done;
+	}
+
+	r = true;
 	trace_io(conn, "OUT", out);
 
 	list_del(&out->list);
 	talloc_free(out);
 
-	return true;
+done:
+	if ((conn->domain != NULL) && (space_required != 0))
+		/*
+		 * Not all of the data was consumed.  This can leave
+		 * the shared ring pointers in an inconsistent state
+		 * across suspend/resume.
+		 */
+		eprintf("partial write to shared page of domain");
+
+	return r;
 }
 
 static int destroy_conn(void *_conn)
@@ -1997,7 +2027,7 @@ int main(int argc, char *argv[])
 				goto more;
 			}
 
-			if (domain_can_write(i) && !list_empty(&i->out_list)) {
+			if (domain_can_write(i)) {
 				handle_output(i);
 				goto more;
 			}
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -109,23 +109,29 @@ static int writechn(struct connection *c
 	void *dest;
 	struct xenstore_domain_interface *intf = conn->domain->interface;
 	XENSTORE_RING_IDX cons, prod;
-
-	/* Must read indexes once, and before anything else, and verified. */
-	cons = intf->rsp_cons;
-	prod = intf->rsp_prod;
-	mb();
-	if (!check_indexes(cons, prod)) {
-		errno = EIO;
-		return -1;
-	}
-
-	dest = get_output_chunk(cons, prod, intf->rsp, &avail);
-	if (avail < len)
-		len = avail;
-
-	memcpy(dest, data, len);
-	mb();
-	intf->rsp_prod += len;
+	unsigned int remain = len;
+
+	while (remain > 0) {
+		/* Must read indexes once, and before anything else,
+		 * and verified. */
+		cons = intf->rsp_cons;
+		prod = intf->rsp_prod;
+		mb();
+		if (!check_indexes(cons, prod)) {
+			errno = EIO;
+			return -1;
+		}
+
+		dest = get_output_chunk(cons, prod, intf->rsp, &avail);
+		if (avail > remain)
+			avail = remain;
+		memcpy(dest, data, avail);
+		data += avail;
+		remain -= avail;
+
+		mb();
+		intf->rsp_prod += avail;
+	}
 
 	xc_evtchn_notify(xce_handle, conn->domain->port);
 
@@ -236,7 +242,30 @@ bool domain_can_write(struct connection 
 bool domain_can_write(struct connection *conn)
 {
 	struct xenstore_domain_interface *intf = conn->domain->interface;
-	return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
+	struct buffered_data *out;
+	unsigned int space_required = 0, space_available;
+
+	out = list_top(&conn->out_list, struct buffered_data, list);
+	if (out == NULL)
+		return false;
+
+	if (out->inhdr)
+		space_required += sizeof (out->hdr);
+	space_required += out->hdr.msg.len;
+	space_required -= out->used;
+
+	if (space_required > XENSTORE_RING_SIZE) {
+		eprintf("attempt to write %d bytes "
+			"to domain %d will never succeed",
+			space_required, conn->domain->domid);
+		talloc_free(conn);
+		return false;
+	}
+
+	space_available = XENSTORE_RING_SIZE -
+		(intf->rsp_prod - intf->rsp_cons);
+
+	return (space_available >= space_required);
 }
 
 static char *talloc_domain_path(void *context, unsigned int domid)

[-- Attachment #3: Type: text/plain, Size: 64 bytes --]


dme.
-- 
David Edmondson, Sun Microsystems, http://www.dme.org

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread