xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Jonathan Davies <jonathan.davies@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Jonathan Davies <jonathan.davies@citrix.com>,
	Jon Ludlam <jonathan.ludlam@citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Euan Harris <euan.harris@citrix.com>,
	Dave Scott <dave@recoil.org>
Subject: [PATCH 6/7] oxenstored: replay transaction upon conflict
Date: Thu, 17 Mar 2016 17:51:14 +0000	[thread overview]
Message-ID: <1458237075-13777-7-git-send-email-jonathan.davies@citrix.com> (raw)
In-Reply-To: <1458237075-13777-1-git-send-email-jonathan.davies@citrix.com>

The existing transaction merge algorithm keeps track of the least upper bound
(longest common prefix) of all the nodes which have been read and written, and
will re-combine two stores which have disjoint upper bounds. This works well for
small transactions but causes unnecessary conflicts for ones that span a large
subtree, such as the following ones used by the xapi toolstack:

 * VM start: creates /vm/... /vss/... /local/domain/...
   The least upper bound of this transaction is / and so all
   these transactions conflict with everything.

 * Device hotplug: creates /local/domain/0/... /local/domain/n/...
   The least upper bound of this transaction is /local/domain so
   all these transactions conflict with each other.

If the existing merge algorithm cannot merge and commit, we attempt
a /replay/ of the failed transaction against the new store.

When we replay the requests we check whether the response sent to the client is
the same as during the first attempt at the transaction. If the responses are
all the same then the transaction replay can be committed. If any differ then
the transaction replay must be aborted and the client must retry.

This algorithm uses the intuition that the transactions made by the toolstack
are designed to be for separate domains, and should fundamentally not conflict
in the sense that they don't read or write any shared keys. By replaying the
transaction on the server side we do what the client would have to do anyway,
only we can do it quickly without allowing any other requests to interfere.

Performing 300 parallel simulated VM start and shutdowns without this code:

300 parallel starts and shutdowns: 268.92

Performing 300 parallel simulated VM start and shutdowns with this code:

300 parallel starts and shutdowns: 3.80

Signed-off-by: Dave Scott <dave@recoil.org>
Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jon Ludlam <jonathan.ludlam@citrix.com>
Reviewed-by: Euan Harris <euan.harris@citrix.com>
---
 tools/ocaml/xenstored/connection.ml |    5 ++++-
 tools/ocaml/xenstored/process.ml    |   33 +++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml
index 0a2c481..b18336f 100644
--- a/tools/ocaml/xenstored/connection.ml
+++ b/tools/ocaml/xenstored/connection.ml
@@ -233,7 +233,10 @@ let end_transaction con tid commit =
 	let trans = Hashtbl.find con.transactions tid in
 	Hashtbl.remove con.transactions tid;
 	Logging.end_transaction ~tid ~con:(get_domstr con);
-	if commit then Transaction.commit ~con:(get_domstr con) trans else true
+	match commit with
+	| None -> true
+	| Some transaction_replay_f ->
+		Transaction.commit ~con:(get_domstr con) trans || transaction_replay_f con trans
 
 let get_transaction con tid =
 	Hashtbl.find con.transactions tid
diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
index 39ae71b..6d1f551 100644
--- a/tools/ocaml/xenstored/process.ml
+++ b/tools/ocaml/xenstored/process.ml
@@ -281,6 +281,38 @@ let input_handle_error ~cons ~doms ~fct ~con ~t ~req =
 	| (Failure "int_of_string")    -> reply_error "EINVAL"
 	| Define.Unknown_operation     -> reply_error "ENOSYS"
 
+(* Replay a stored transaction against a fresh store, check the responses are
+   all equivalent: if so, commit the transaction. Otherwise send the abort to
+   the client. *)
+let transaction_replay c t doms cons =
+	match t.Transaction.ty with
+	| Transaction.No ->
+		error "attempted to replay a non-full transaction";
+		false
+	| Transaction.Full(id, oldroot, cstore) ->
+		let tid = Connection.start_transaction c cstore in
+		let new_t = Transaction.make tid cstore in
+		let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in
+		let perform_exn (request, response) =
+			let fct = function_of_type_simple_op request.Packet.ty in
+			let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:new_t ~req:request in
+			if not(Packet.response_equal response response') then raise Transaction_again in
+		finally
+		(fun () ->
+			try
+				Logging.start_transaction ~con ~tid;
+				List.iter perform_exn (Transaction.get_operations t);
+				Logging.end_transaction ~con ~tid;
+
+				Transaction.commit ~con new_t
+			with e ->
+				info "transaction_replay %d caught: %s" tid (Printexc.to_string e);
+				false
+			)
+		(fun () ->
+			Connection.end_transaction c tid None
+		)
+
 let do_watch con t domains cons data =
 	let (node, token) = 
 		match (split None '\000' data) with
@@ -313,6 +345,7 @@ let do_transaction_end con t domains cons data =
 		| _        -> raise Invalid_Cmd_Args
 		in
 	let success =
+		let commit = if commit then Some (fun con trans -> transaction_replay con trans domains cons) else None in
 		Connection.end_transaction con (Transaction.get_id t) commit in
 	if not success then
 		raise Transaction_again;
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-03-17 17:51 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-17 17:51 [PATCH 0/7] oxenstored: improve transaction conflict handling Jonathan Davies
2016-03-17 17:51 ` [PATCH 1/7] oxenstored: refactor putting response on wire Jonathan Davies
2016-03-17 17:51 ` [PATCH 2/7] oxenstored: remove some unused parameters Jonathan Davies
2016-03-17 17:51 ` [PATCH 3/7] oxenstored: refactor request processing Jonathan Davies
2016-03-24 22:22   ` Boris Ostrovsky
2016-03-24 22:49     ` Andrew Cooper
2016-03-24 23:57       ` Boris Ostrovsky
2016-03-29  9:08         ` Jonathan Davies
2016-03-29 12:45           ` Boris Ostrovsky
2016-03-29 16:38           ` Wei Liu
2016-03-29 19:41             ` David Scott
2016-03-30 15:46               ` Jonathan Davies
2016-03-30 15:53                 ` Wei Liu
2016-03-17 17:51 ` [PATCH 4/7] oxenstored: keep track of each transaction's operations Jonathan Davies
2016-03-17 17:51 ` [PATCH 5/7] oxenstored: move functions that process simple operations Jonathan Davies
2016-03-17 17:51 ` Jonathan Davies [this message]
2016-03-17 17:51 ` [PATCH 7/7] oxenstored: log request and response during transaction replay Jonathan Davies
2016-03-18 14:33 ` [PATCH 0/7] oxenstored: improve transaction conflict handling Konrad Rzeszutek Wilk
2016-03-18 16:21   ` Jonathan Davies
2016-03-18 16:36   ` Wei Liu
2016-03-19 11:30     ` David Scott

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1458237075-13777-7-git-send-email-jonathan.davies@citrix.com \
    --to=jonathan.davies@citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dave@recoil.org \
    --cc=euan.harris@citrix.com \
    --cc=jonathan.ludlam@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).