xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: "Edwin Török" <edvin.torok@citrix.com>
To: <xen-devel@lists.xenproject.org>
Cc: "Edwin Török" <edvin.torok@citrix.com>,
	"Christian Lindig" <christian.lindig@citrix.com>,
	"David Scott" <dave@recoil.org>,
	"Ian Jackson" <iwj@xenproject.org>, "Wei Liu" <wl@xen.org>
Subject: [PATCH v2 04/17] tools/ocaml/xenstored: implement the live migration binary format
Date: Tue, 11 May 2021 19:05:17 +0100	[thread overview]
Message-ID: <1203d68f34f55b675e64df228c7d45405e1304a8.1620755942.git.edvin.torok@citrix.com> (raw)
In-Reply-To: <cover.1620755942.git.edvin.torok@citrix.com>

This is implemented by C xenstored as live update dump format.
oxenstored already has its own (text-based) dump format, but for
compatibility implement one compatible with C xenstored.

This will also be useful in the future for non-cooperative guest live migration.

docs/designs/xenstore-migration.md documents the format

For now this always dumps integers in big endian order, because even old
versions of OCaml have support for that.
The binary format supports both little and big endian orders, so this
should be compatible.

To dump in little endian or native endian order we would
require OCaml 4.08+.

Signed-off-by: Edwin Török <edvin.torok@citrix.com>
---
 tools/ocaml/xenstored/disk.ml | 318 ++++++++++++++++++++++++++++++++++
 1 file changed, 318 insertions(+)

diff --git a/tools/ocaml/xenstored/disk.ml b/tools/ocaml/xenstored/disk.ml
index 4739967b61..595fdab54a 100644
--- a/tools/ocaml/xenstored/disk.ml
+++ b/tools/ocaml/xenstored/disk.ml
@@ -155,3 +155,321 @@ let write store =
 		Unix.rename tfile xs_daemon_database
 	with exc ->
 		error "caught exn %s" (Printexc.to_string exc)
+
+	module BinaryOut = struct
+		let version = 0x1
+		let endian = 1
+		let padding = String.make 7 '\x00'
+
+		let write_header ch =
+			(* for testing endian order *)
+			output_binary_int ch 0x78656e73;
+			output_binary_int ch 0x746f7265;
+			output_binary_int ch version;
+			output_binary_int ch endian;
+			ch
+
+		let w8 = output_char
+		let w16 ch i =
+			assert (i >= 0 && i lsr 16 = 0);
+			output_byte ch (i lsr 8);
+			output_byte ch i
+
+		let w32 ch v =
+			assert (v >= 0 && v <= 0xFFFF_FFFF);
+			output_binary_int ch v
+
+		let pos = pos_out
+		let wpad ch =
+			let padto = 8 in
+			let padby = (padto - pos ch mod padto) mod padto in
+			if padby > 0 then
+				output_substring ch padding 0 padby
+
+		let wstring = output_string
+	end
+
+	module BinaryIn = struct
+		type t = in_channel
+
+		let read_header t =
+			let h = Bytes.make 8 '\x00' in
+			really_input t h 0 (Bytes.length h);
+			let ver = input_binary_int t in
+			let endian = input_binary_int t in
+			if Bytes.to_string h <> "xenstore" then
+				failwith "Header doesn't begin with 'xenstore'";
+			if ver <> BinaryOut.version then
+				failwith "Incompatible version";
+			if endian <> BinaryOut.endian then
+				failwith "Incompatible endianness"
+
+		let r8 = input_char
+
+		let r16 t = 
+			let r0 = input_byte t in
+			let r1 = input_byte t  in
+			(r0 lsl 8) lor r1
+
+		let r32 t =
+			(* read unsigned 32-bit int *)
+			let r = input_binary_int t land 0xFFFF_FFFF in
+			assert (r >= 0);
+			r
+
+		let rstring = really_input_string
+
+		let rpad t =
+			let padto = 8 in
+			let padby = (padto - pos_in t mod padto) mod padto in
+			if padby > 0 then
+				ignore (really_input_string t padby)
+	end
+
+module FD : sig
+     type t = Unix.file_descr
+     val of_int: int -> t
+     val to_int : t -> int
+end = struct
+    type t = Unix.file_descr
+    (* This is like Obj.magic but just for these types,
+       and relies on Unix.file_descr = int *)
+    external to_int : t -> int = "%identity"
+    external of_int : int -> t = "%identity"
+end
+
+module LiveRecord = struct
+	(* See docs/designs/xenstore-migration.md for binary format *)
+	module Type : sig
+		type t = private int
+		val end_ : t
+		val global_data : t
+		val connection_data : t
+		val watch_data : t
+		val transaction_data : t
+		val node_data: t
+	end = struct
+		type t = int
+		let end_ = 0x0
+		let global_data = 0x01
+		let connection_data = 0x02
+		let watch_data = 0x03
+		let transaction_data = 0x04
+		let node_data = 0x05
+	end
+
+	module I = BinaryIn
+	module O = BinaryOut
+
+	let write_expect msg expected actual =
+		if expected <> actual then
+			let m = Printf.sprintf "expected %d <> %d: %s" expected actual msg in
+			invalid_arg m
+
+	let write_record t (typ: Type.t) len f =
+		assert (O.pos t mod 8 = 0);
+		O.w32 t (typ :> int);
+		O.w32 t len;
+		let p0 = O.pos t in
+		f t;
+		let p1 = O.pos t in
+		write_expect "position and length" len (p1-p0);
+		O.wpad t
+
+	let write_end t =
+		write_record t Type.end_ 0 ignore
+
+	let read_expect t msg expected actual =
+		if expected <> actual then
+			let pos = pos_in t in
+			let m = Printf.sprintf "expected %d <> %d at ~%d: %s" expected actual pos msg in
+			invalid_arg m
+
+	let read_end t ~len f =
+		read_expect t "end" 0 len;
+		f ()
+
+	let write_global_data t ~rw_sock =
+		write_record t Type.global_data 8 @@ fun b ->
+		O.w32 b (FD.to_int rw_sock);
+		O.w32 b (-1)
+
+	let read_global_data t ~len f =
+		read_expect t "global_data" 8 len;
+		let rw_sock = FD.of_int (I.r32 t) in
+		let _ = FD.of_int (I.r32 t) in
+		f ~rw_sock
+
+	let conn_shared_ring = 0x0
+	let conn_socket = 0x1
+	let domid_invalid = 0x7FF4
+
+	(* oxenstored doesn't support readonly sockets yet *)
+	let flags_connection_readonly = 0x1l
+
+	type dom = { id: int; target: int; remote_port: int }
+	type conn = Socket of Unix.file_descr | Domain of dom
+
+	let write_connection_data t ~conid ~conn xb_pktin xb_partialout xb_pktout =
+		let in_data_len = Buffer.length xb_pktin in
+		let out_resp_len = String.length xb_partialout in
+		let out_data_len = Buffer.length xb_pktout in
+		let data_len = in_data_len + out_data_len in
+
+		write_record t Type.connection_data (32 + data_len) @@ fun b ->
+		assert (conid > 0);
+		O.w32 b conid;
+		O.w32 b (match conn with
+		| Socket _ -> conn_socket
+		| Domain _ -> conn_shared_ring
+		);
+		let flags = 0x0 in
+		O.w32 b flags;
+
+		(match conn with
+		| Socket fd ->
+			O.w32 b (FD.to_int fd);
+			O.w32 b 0 (* pad *)
+		| Domain dom ->
+			O.w16 b dom.id;
+			O.w16 b dom.target;
+			O.w32 b dom.remote_port
+			);
+
+		O.w32 b in_data_len;
+		O.w32 b out_resp_len;
+		O.w32 b out_data_len;
+		Buffer.output_buffer b xb_pktin;
+		O.wstring b xb_partialout;
+		Buffer.output_buffer b xb_pktout
+
+	let read_connection_data t ~len f =
+		let conid = I.r32 t in
+		assert (conid > 0);
+		let kind = I.r32 t in
+		let flags = I.r32 t in
+		read_expect t "flags" 0 flags;
+		let conn = (match kind with
+		| x when x = conn_socket ->
+			let fd = FD.of_int (I.r32 t) in
+			I.r32 t |> ignore;
+			Socket fd
+		| x when x = conn_shared_ring ->
+			let id = I.r16 t in
+			let target = I.r16 t in
+			let remote_port = I.r32 t in
+			Domain {id; target; remote_port }
+		| x ->
+			invalid_arg (Printf.sprintf "Unknown connection kind %x" x)
+		) in
+		let in_data_len = I.r32 t in
+		let out_resp_len = I.r32 t in
+		let out_data_len = I.r32 t in
+		let in_data = really_input_string t in_data_len in
+		let out_data = really_input_string t out_data_len in
+		f ~conid ~conn ~in_data ~out_data ~out_resp_len
+
+
+	let write_watch_data t ~conid ~wpath ~token =
+		let wpath_len = String.length wpath in
+		let token_len = String.length token in
+
+		write_record t Type.watch_data (12+wpath_len+token_len) @@ fun b ->
+		O.w32 b conid;
+		O.w32 b (String.length wpath);
+		O.w32 b (String.length token);
+		O.wstring b wpath;
+		O.wstring b token
+
+	let read_watch_data t ~len f =
+		let conid = I.r32 t in
+		let wpathlen = I.r32 t in
+		let tokenlen = I.r32 t in
+		let wpath = I.rstring t wpathlen in
+		let token = I.rstring t tokenlen in
+		f ~conid ~wpath ~token
+
+	let write_transaction_data t ~conid ~txid =
+		write_record t Type.transaction_data 8 @@ fun b ->
+		O.w32 b conid;
+		O.w32 b txid
+
+	let read_transaction_data t ~len f =
+		read_expect t "transaction" 8 len;
+		let conid = I.r32 t in
+		let txid = I.r32 t in
+		f ~conid ~txid
+
+	type access = R | W | RW | Del
+
+	let write_node_data t ~txidaccess ~path ~value ~perms =
+		let path_len = String.length path in
+		let value_len = String.length value in
+		let perms = Perms.Node.acls perms in
+		let len = 24 + (List.length perms)*4 + path_len + value_len in
+
+		write_record t Type.node_data len @@ fun b ->
+		O.w32 b (match txidaccess with None -> 0 | Some (conid, _, _) -> conid);
+		O.w32 b (match txidaccess with None -> 0 | Some (_, txid, _) -> txid);
+		O.w32 b path_len;
+		O.w32 b value_len;
+		O.w32 b (match txidaccess with
+		| None -> 0x0
+		| Some (_, _, Del) -> 0x0
+		| Some (_, _, R) -> 0x1
+		| Some (_, _, W) -> 0x2
+		| Some (_, _, RW) -> 0x3
+		);
+		O.w32 b (List.length perms);
+		List.iter (fun (domid, permty) ->
+			O.w8 b (Perms.char_of_permty permty);
+			O.w8 b '\x00';
+			O.w16 b domid;
+		) perms;
+		O.wstring b path;
+		O.wstring b value
+
+	let read_node_data t ~len f =
+		let conid = I.r32 t in
+		let txid = I.r32 t in
+		let path_len = I.r32 t in
+		let value_len = I.r32 t in
+		let txaccess = match conid, I.r32 t with
+		| 0, _ -> None
+		| _, 0 -> Some (conid, txid, Del)
+		| _, 1 -> Some (conid, txid, R)
+		| _, 2 -> Some (conid, txid, W)
+		| _, 3 -> Some (conid, txid, RW)
+		| _ -> invalid_arg "invalid access flag"
+		in
+		let a = Array.init (I.r32 t) (fun _ ->
+					let perm = Perms.permty_of_char (I.r8 t) in
+					I.r8 t |> ignore;
+					let domid = I.r16 t in
+					domid, perm
+		) in
+		let perms = match Array.to_list a with
+		| [] -> invalid_arg "Permission list cannot be empty";
+		| (owner, other) :: acls ->
+			Perms.Node.create owner other acls
+		in
+		let path = I.rstring t path_len in
+		let value = I.rstring t value_len in
+		f ~txaccess ~perms ~path ~value
+
+	let read_record t ~on_end ~on_global_data ~on_connection_data ~on_watch_data ~on_transaction_data ~on_node_data =
+		I.rpad t; (* if we fail to process a record (e.g. callback raises, ensure we resume at right place *)
+		let typ = I.r32 t in
+		let len = I.r32 t in
+		let p0 = pos_in t in
+		(match typ with
+		| x when x = (Type.end_ :> int) -> read_end t ~len on_end
+		| x when x = (Type.global_data :> int) -> read_global_data t ~len on_global_data
+		| x when x = (Type.connection_data :> int) -> read_connection_data t ~len on_connection_data
+		| x when x = (Type.watch_data :> int) -> read_watch_data t ~len on_watch_data
+		| x when x = (Type.transaction_data :> int) -> read_transaction_data t ~len on_transaction_data
+		| x when x = (Type.node_data :> int) -> read_node_data t ~len on_node_data
+		| x -> failwith (Printf.sprintf "Unknown record type: %x" x));
+		let p1 = pos_in t in
+		read_expect t "record length" len (p1-p0)
+end
-- 
2.25.1



  parent reply	other threads:[~2021-05-11 18:07 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-11 18:05 [PATCH v2 00/17] live update and gnttab patches Edwin Török
2021-05-11 18:05 ` [PATCH v2 01/17] docs/designs/xenstore-migration.md: clarify that deletes are recursive Edwin Török
2021-05-11 18:05 ` [PATCH v2 02/17] tools/ocaml: add unit test skeleton with Dune build system Edwin Török
2021-05-11 18:05 ` [PATCH v2 03/17] tools/ocaml: vendor external dependencies for convenience Edwin Török
2021-05-11 18:05 ` Edwin Török [this message]
2021-05-11 18:05 ` [PATCH v2 05/17] tools/ocaml/xenstored: add binary dump format support Edwin Török
2021-05-11 18:05 ` [PATCH v2 06/17] tools/ocaml/xenstored: add support for binary format Edwin Török
2021-05-11 18:05 ` [PATCH v2 07/17] tools/ocaml/xenstored: validate config file before live update Edwin Török
2021-05-11 18:05 ` [PATCH v2 08/17] Add structured fuzzing unit test Edwin Török
2021-05-11 18:05 ` [PATCH v2 09/17] tools/ocaml: use common macros for manipulating mmap_interface Edwin Török
2021-05-11 18:05 ` [PATCH v2 10/17] tools/ocaml/libs/mmap: allocate correct number of bytes Edwin Török
2021-05-11 18:05 ` [PATCH v2 11/17] tools/ocaml/libs/mmap: Expose stub_mmap_alloc Edwin Török
2021-05-11 18:05 ` [PATCH v2 12/17] tools/ocaml/libs/mmap: mark mmap/munmap as blocking Edwin Török
2021-05-11 18:05 ` [PATCH v2 13/17] tools/ocaml/libs/xb: import gnttab stubs from mirage Edwin Török
2021-05-11 18:05 ` [PATCH v2 14/17] tools/ocaml: safer Xenmmap interface Edwin Török
2021-05-11 18:05 ` [PATCH v2 15/17] tools/ocaml/xenstored: use gnttab instead of xenctrl's foreign_map_range Edwin Török
2021-05-11 18:05 ` [PATCH v2 16/17] tools/ocaml/xenstored: don't store domU's mfn of ring page Edwin Török
2021-05-11 18:05 ` [PATCH v2 17/17] tools/ocaml/libs/mmap: Clean up unused read/write Edwin Török
2021-05-12 13:06   ` Andrew Cooper
2021-05-11 18:12 ` [PATCH v2 00/17] live update and gnttab patches Edwin Torok
2021-05-11 20:05 ` Andrew Cooper
2021-05-12 10:10   ` Edwin Torok
2021-05-12 12:51     ` Andrew Cooper
2021-05-12 15:04       ` Edwin Torok

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1203d68f34f55b675e64df228c7d45405e1304a8.1620755942.git.edvin.torok@citrix.com \
    --to=edvin.torok@citrix.com \
    --cc=christian.lindig@citrix.com \
    --cc=dave@recoil.org \
    --cc=iwj@xenproject.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    --subject='Re: [PATCH v2 04/17] tools/ocaml/xenstored: implement the live migration binary format' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).