All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Edwin Török" <edvin.torok@citrix.com>
To: <xen-devel@lists.xenproject.org>
Cc: "Edwin Török" <edvin.torok@citrix.com>,
	"Christian Lindig" <christian.lindig@citrix.com>,
	"David Scott" <dave@recoil.org>,
	"Ian Jackson" <iwj@xenproject.org>, "Wei Liu" <wl@xen.org>,
	"Juergen Gross" <jgross@suse.com>,
	"Pau Ruiz Safont" <pau.safont@citrix.com>
Subject: [PATCH v2 4/8] tools/ocaml/xenstored: only quit on SIGTERM when a reload is possible
Date: Fri, 15 Jan 2021 22:28:46 +0000	[thread overview]
Message-ID: <023574503750d06132e3ca260848c364ff439001.1610748224.git.edvin.torok@citrix.com> (raw)
In-Reply-To: <cover.1610748224.git.edvin.torok@citrix.com>

Currently when oxenstored receives SIGTERM it dumps its state and quits.
It is possible to then restart it if --restart is given, however that is
not always safe:

* domains could have active transactions, and after a restart they would
either reuse transaction IDs of already open transactions, or get an
error back that the transaction doesn't exist

* there could be pending data to send to a VM still in oxenstored's
  queue which would be lost

* there could be pending input to be processed from a VM in oxenstored's
  queue which would be lost

Prevent shutting down oxenstored via SIGTERM in the above situations.
Also ignore domains marked as bad because oxenstored would never talk
to them again.

Signed-off-by: Edwin Török <edvin.torok@citrix.com>
Reviewed-by: Pau Ruiz Safont <pau.safont@citrix.com>
Reviewed-by: Christian Lindig <christian.lindig@citrix.com>

---
Changed since V1:
* post publicly now that the XSA is out
---
 tools/ocaml/xenstored/connection.ml  | 35 ++++++++++++++++++++++++++++
 tools/ocaml/xenstored/connections.ml |  8 +++++++
 tools/ocaml/xenstored/xenstored.ml   | 13 +++++++++--
 tools/xenstore/xenstored_core.c      |  7 +++++-
 4 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml
index fa0d3c4d92..bd02060cd0 100644
--- a/tools/ocaml/xenstored/connection.ml
+++ b/tools/ocaml/xenstored/connection.ml
@@ -290,6 +290,41 @@ let has_new_output con = Xenbus.Xb.has_new_output con.xb
 let peek_output con = Xenbus.Xb.peek_output con.xb
 let do_output con = Xenbus.Xb.output con.xb
 
+let is_bad con = match con.dom with None -> false | Some dom -> Domain.is_bad_domain dom
+
+(* oxenstored currently only dumps limited information about its state.
+   A live update is only possible if any of the state that is not dumped would be empty.
+   Compared to https://xenbits.xen.org/docs/unstable/designs/xenstore-migration.html:
+     * GLOBAL_DATA: not strictly needed, systemd is giving the socket FDs to us
+     * CONNECTION_DATA: PARTIAL
+       * for domains: PARTIAL, see Connection.dump -> Domain.dump, only if data and tdomid is empty
+       * for sockets (Dom0 toolstack): NO
+     * WATCH_DATA: OK, see Connection.dump
+     * TRANSACTION_DATA: NO
+     * NODE_DATA: OK (except for transactions), see Store.dump_fct and DB.to_channel
+
+   Also xenstored will never talk to a Domain once it is marked as bad,
+   so treat it as idle for live-update.
+
+   Restrictions below can be relaxed once xenstored learns to dump more
+   of its live state in a safe way *)
+let has_extra_connection_data con =
+	let has_in = has_input con in
+	let has_out = has_output con in
+	let has_socket = con.dom = None in
+	let has_nondefault_perms = make_perm con.dom <> con.perm in
+	has_in || has_out
+	|| has_socket (* dom0 sockets not dumped yet *)
+	|| has_nondefault_perms (* set_target not dumped yet *)
+
+let has_transaction_data con =
+	let n = number_of_transactions con in
+	dbg "%s: number of transactions = %d" (get_domstr con) n;
+	n > 0
+
+let prevents_live_update con = not (is_bad con)
+	&& (has_extra_connection_data con || has_transaction_data con)
+
 let has_more_work con =
 	has_more_input con || not (has_old_output con) && has_new_output con
 
diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml
index 6ee3552ec2..82988f7e8d 100644
--- a/tools/ocaml/xenstored/connections.ml
+++ b/tools/ocaml/xenstored/connections.ml
@@ -194,3 +194,11 @@ let debug cons =
 	let anonymous = Hashtbl.fold (fun _ con accu -> Connection.debug con :: accu) cons.anonymous [] in
 	let domains = Hashtbl.fold (fun _ con accu -> Connection.debug con :: accu) cons.domains [] in
 	String.concat "" (domains @ anonymous)
+
+let filter ~f cons =
+	let fold _ v acc = if f v then v :: acc else acc in
+	[]
+	|> Hashtbl.fold fold cons.anonymous
+	|> Hashtbl.fold fold cons.domains
+
+let prevents_quit cons = filter ~f:Connection.prevents_live_update cons
diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
index 39d6d767e4..6b5381962b 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -20,6 +20,7 @@ open Parse_arg
 open Stdext
 
 let error fmt = Logging.error "xenstored" fmt
+let warn fmt = Logging.warn "xenstored" fmt
 let debug fmt = Logging.debug "xenstored" fmt
 let info fmt = Logging.info "xenstored" fmt
 
@@ -312,7 +313,9 @@ let _ =
 	);
 
 	Sys.set_signal Sys.sighup (Sys.Signal_handle sighup_handler);
-	Sys.set_signal Sys.sigterm (Sys.Signal_handle (fun _ -> quit := true));
+	Sys.set_signal Sys.sigterm (Sys.Signal_handle (fun _ ->
+		 info "Received SIGTERM";
+		 quit := true));
 	Sys.set_signal Sys.sigusr1 (Sys.Signal_handle (fun _ -> sigusr1_handler store));
 	Sys.set_signal Sys.sigpipe Sys.Signal_ignore;
 
@@ -424,6 +427,12 @@ let _ =
 		);
 		let elapsed = Unix.gettimeofday () -. now in
 		debug "periodic_ops took %F seconds." elapsed;
+		if !quit then
+		(match Connections.prevents_quit cons with
+		| [] -> ()
+		| domains ->
+		    List.iter (fun con -> warn "%s prevents live update" (Connection.get_domstr con)) domains
+		);
 		delay_next_frequent_ops_by elapsed
 	in
 
@@ -475,7 +484,7 @@ let _ =
 		in
 
 	Systemd.sd_notify_ready ();
-	while not !quit
+	while not (!quit && Connections.prevents_quit cons = [])
 	do
 		try
 			main_loop ()
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 50986f8b29..b9495365c4 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -1970,6 +1970,7 @@ static struct option options[] = {
 	{ "internal-db", 0, NULL, 'I' },
 	{ "verbose", 0, NULL, 'V' },
 	{ "watch-nb", 1, NULL, 'W' },
+	{ "live-update", 0, NULL, 'U' },
 	{ NULL, 0, NULL, 0 } };
 
 extern void dump_conn(struct connection *conn); 
@@ -1984,11 +1985,12 @@ int main(int argc, char *argv[])
 	bool dofork = true;
 	bool outputpid = false;
 	bool no_domain_init = false;
+	bool live_update = false;
 	const char *pidfile = NULL;
 	int timeout;
 
 
-	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:", options,
+	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:U", options,
 				  NULL)) != -1) {
 		switch (opt) {
 		case 'D':
@@ -2046,6 +2048,9 @@ int main(int argc, char *argv[])
 		case 'p':
 			priv_domid = strtol(optarg, NULL, 10);
 			break;
+		case 'U':
+			live_update = true;
+			break;
 		}
 	}
 	if (optind != argc)
-- 
2.29.2



  parent reply	other threads:[~2021-01-15 22:30 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-15 22:28 [PATCH v2 0/2] oxenstored build enhancements Edwin Török
2021-01-15 22:29 ` [PATCH v1 0/4] tools/ocaml/xenstored: bugfixes Edwin Török
2021-01-15 22:28 ` [PATCH v1 0/5] tools/ocaml/xenstored: structured fuzz testing Edwin Török
2021-01-15 22:28 ` [PATCH v4 0/4] tools/ocaml/xenstored: optimizations Edwin Török
2021-01-15 22:28 ` [PATCH v2 0/2] tools/ocaml/libs/xc: domid control Edwin Török
2021-01-15 22:28 ` [PATCH v2 0/8] tools/ocaml/xenstored: implement live update Edwin Török
2021-01-15 22:28 ` [PATCH v2 1/2] automation/: add Ubuntu:focal container Edwin Török
2021-01-15 22:28 ` [PATCH v2 2/2] Makefile: add build-tools-oxenstored Edwin Török
2021-01-15 22:28 ` [PATCH v2 1/8] tools/xenstore: add live update command to xenstore-control Edwin Török
2021-01-18  7:50   ` Jürgen Groß
2021-01-18  9:40     ` Edwin Torok
2021-01-15 22:28 ` [PATCH v2 2/8] Add workaround for xenstore-control flood issues Edwin Török
2021-01-15 22:28 ` [PATCH v2 3/8] docs/designs/xenstore-migration.md: clarify that deletes are recursive Edwin Török
2021-01-22 13:04   ` Jürgen Groß
2021-01-22 14:44     ` Edwin Torok
2021-01-15 22:28 ` Edwin Török [this message]
2021-01-18  7:51   ` [PATCH v2 4/8] tools/ocaml/xenstored: only quit on SIGTERM when a reload is possible Jürgen Groß
2021-01-18  9:28     ` Edwin Torok
2021-01-15 22:28 ` [PATCH v2 5/8] tools/ocaml/xenstored: Automatically resume when possible Edwin Török
2021-01-15 22:28 ` [PATCH v2 6/8] tools/ocaml/xenstored: add cooperative live-update command Edwin Török
2021-01-15 22:28 ` [PATCH v2 7/8] tools/ocaml/xenstored: start live update process Edwin Török
2021-01-15 22:28 ` [PATCH v2 8/8] tools/ocaml/xenstored: Implement live update for socket connections Edwin Török
2021-01-15 22:28 ` [PATCH v2 1/2] tools/ocaml/xenstored: trim txhistory on xenbus reconnect Edwin Török
2021-01-15 22:28 ` [PATCH v2 2/2] tools/ocaml/libs/xc: backward compatible domid control at domain creation time Edwin Török
2021-01-15 22:28 ` [PATCH v4 1/4] tools/ocaml/xenstored: replace hand rolled GC with weak GC references Edwin Török
2021-01-15 22:28 ` [PATCH v4 2/4] tools/ocaml/xenstored: backport find_opt/update from 4.06 Edwin Török
2021-01-15 22:28 ` [PATCH v4 3/4] tools/ocaml/xenstored: use more efficient node trees Edwin Török
2021-01-15 22:28 ` [PATCH v4 4/4] tools/ocaml/xenstored: use more efficient tries Edwin Török
2021-01-15 22:29 ` [PATCH v1 1/5] tools/ocaml: add unit test skeleton with Dune build system Edwin Török
2021-01-15 22:29 ` [PATCH v1 2/5] tools/ocaml/xenstored: implement the live migration binary format Edwin Török
2021-01-15 22:29 ` [PATCH v1 3/5] tools/ocaml/xenstored: add binary dump format support Edwin Török
2021-01-15 22:29 ` [PATCH v1 4/5] tools/ocaml/xenstored: add support for binary format Edwin Török
2021-01-15 22:29 ` [PATCH v1 5/5] Add structured fuzzing unit test Edwin Török
2021-01-15 22:29 ` [PATCH v1 1/4] tools/ocaml/libs/xb: do not crash after xenbus is unmapped Edwin Török
2021-01-15 22:29 ` [PATCH v1 2/4] tools/ocaml/xenstored: fix quota calculation for mkdir EEXIST Edwin Török
2021-01-15 22:29 ` [PATCH v1 3/4] tools/ocaml/xenstored: reject invalid watch paths early Edwin Török
2021-01-15 22:29 ` [PATCH v1 4/4] tools/ocaml/xenstored: mkdir conflicts were sometimes missed Edwin Török
2021-01-21 11:15 ` [PATCH v4 0/4] tools/ocaml/xenstored: optimizations Christian Lindig
2021-01-21 11:16 ` [PATCH v2 0/2] tools/ocaml/libs/xc: domid control Christian Lindig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=023574503750d06132e3ca260848c364ff439001.1610748224.git.edvin.torok@citrix.com \
    --to=edvin.torok@citrix.com \
    --cc=christian.lindig@citrix.com \
    --cc=dave@recoil.org \
    --cc=iwj@xenproject.org \
    --cc=jgross@suse.com \
    --cc=pau.safont@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.