All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Edwin Török" <edvin.torok@citrix.com>
To: <xen-devel@lists.xenproject.org>
Cc: "Edwin Török" <edvin.torok@citrix.com>,
	"Christian Lindig" <christian.lindig@citrix.com>,
	"David Scott" <dave@recoil.org>,
	"Ian Jackson" <iwj@xenproject.org>, "Wei Liu" <wl@xen.org>
Subject: [PATCH v4 1/4] tools/ocaml/xenstored: replace hand rolled GC with weak GC references
Date: Fri, 15 Jan 2021 22:28:55 +0000	[thread overview]
Message-ID: <c455755573a079ebad208c1933719e251b589102.1610748224.git.edvin.torok@citrix.com> (raw)
In-Reply-To: <cover.1610748224.git.edvin.torok@citrix.com>

The code here is attempting to reduce memory usage by sharing common
substrings in the tree: it replaces strings with ints, and keeps a
string->int map that gets manually garbage collected using a hand-rolled
mark and sweep algorithm.

This is unnecessary: OCaml already has a mark-and-sweep Garbage
Collector runtime, and sharing of common strings in tree nodes
can be achieved through Weak references: if the string hasn't been seen
yet it gets added to the Weak reference table, and if it has we use the
entry from the table instead, thus storing a string only once.
When the string is no longer referenced OCaml's GC will drop it from the
weak table: there is no need to manually do a mark-and-sweep, or to tell
OCaml when to drop it.

Signed-off-by: Edwin Török <edvin.torok@citrix.com>
Acked-by: Christian Lindig <christian.lindig@citrix.com>

---
Changed since V3:
* repost after XSA to avoid conflicts
---
 tools/ocaml/xenstored/connection.ml |  3 --
 tools/ocaml/xenstored/history.ml    | 14 ------
 tools/ocaml/xenstored/store.ml      | 11 ++---
 tools/ocaml/xenstored/symbol.ml     | 68 ++++++-----------------------
 tools/ocaml/xenstored/symbol.mli    | 21 ++-------
 tools/ocaml/xenstored/xenstored.ml  | 16 +------
 6 files changed, 24 insertions(+), 109 deletions(-)

diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml
index 1cf24beafd..51041dde8e 100644
--- a/tools/ocaml/xenstored/connection.ml
+++ b/tools/ocaml/xenstored/connection.ml
@@ -334,9 +334,6 @@ let has_more_work con =
 
 let incr_ops con = con.stat_nb_ops <- con.stat_nb_ops + 1
 
-let mark_symbols con =
-	Hashtbl.iter (fun _ t -> Store.mark_symbols (Transaction.get_store t)) con.transactions
-
 let stats con =
 	Hashtbl.length con.watches, con.stat_nb_ops
 
diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml
index 3899353da8..ba5c9cb571 100644
--- a/tools/ocaml/xenstored/history.ml
+++ b/tools/ocaml/xenstored/history.ml
@@ -22,20 +22,6 @@ type history_record = {
 
 let history : history_record list ref = ref []
 
-(* Called from periodic_ops to ensure we don't discard symbols that are still needed. *)
-(* There is scope for optimisation here, since in consecutive commits one commit's `after`
- * is the same thing as the next commit's `before`, but not all commits in history are
- * consecutive. *)
-let mark_symbols () =
-	(* There are gaps where dom0's commits are missing. Otherwise we could assume that
-	 * each element's `before` is the same thing as the next element's `after`
-	 * since the next element is the previous commit *)
-	List.iter (fun hist_rec ->
-			Store.mark_symbols hist_rec.before;
-			Store.mark_symbols hist_rec.after;
-		)
-		!history
-
 (* Keep only enough commit-history to protect the running transactions that we are still tracking *)
 (* There is scope for optimisation here, replacing List.filter with something more efficient,
  * probably on a different list-like structure. *)
diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
index a3be2e6bbe..9c226e4ef7 100644
--- a/tools/ocaml/xenstored/store.ml
+++ b/tools/ocaml/xenstored/store.ml
@@ -46,18 +46,18 @@ let add_child node child =
 
 let exists node childname =
 	let childname = Symbol.of_string childname in
-	List.exists (fun n -> n.name = childname) node.children
+	List.exists (fun n -> Symbol.equal n.name childname) node.children
 
 let find node childname =
 	let childname = Symbol.of_string childname in
-	List.find (fun n -> n.name = childname) node.children
+	List.find (fun n -> Symbol.equal n.name childname) node.children
 
 let replace_child node child nchild =
 	(* this is the on-steroid version of the filter one-replace one *)
 	let rec replace_one_in_list l =
 		match l with
 		| []                               -> []
-		| h :: tl when h.name = child.name -> nchild :: tl
+		| h :: tl when Symbol.equal h.name child.name -> nchild :: tl
 		| h :: tl                          -> h :: replace_one_in_list tl
 		in
 	{ node with children = (replace_one_in_list node.children) }
@@ -67,7 +67,7 @@ let del_childname node childname =
 	let rec delete_one_in_list l =
 		match l with
 		| []                        -> raise Not_found
-		| h :: tl when h.name = sym -> tl
+		| h :: tl when Symbol.equal h.name sym -> tl
 		| h :: tl                   -> h :: delete_one_in_list tl
 		in
 	{ node with children = (delete_one_in_list node.children) }
@@ -489,9 +489,6 @@ let copy store = {
 	quota = Quota.copy store.quota;
 }
 
-let mark_symbols store =
-	Node.recurse (fun node -> Symbol.mark_as_used node.Node.name) store.root
-
 let incr_transaction_coalesce store =
 	store.stat_transaction_coalesce <- store.stat_transaction_coalesce + 1
 let incr_transaction_abort store =
diff --git a/tools/ocaml/xenstored/symbol.ml b/tools/ocaml/xenstored/symbol.ml
index 4420c6a4d7..2b41d120f6 100644
--- a/tools/ocaml/xenstored/symbol.ml
+++ b/tools/ocaml/xenstored/symbol.ml
@@ -14,63 +14,23 @@
  * GNU Lesser General Public License for more details.
  *)
 
-type t = int
+module WeakTable = Weak.Make(struct
+    type t = string
+    let equal (x:string) (y:string) = (x = y)
+    let hash = Hashtbl.hash
+end)
 
-type 'a record = { data: 'a; mutable garbage: bool }
-let int_string_tbl : (int,string record) Hashtbl.t = Hashtbl.create 1024
-let string_int_tbl : (string,int) Hashtbl.t = Hashtbl.create 1024
+type t = string
 
-let created_counter = ref 0
-let used_counter = ref 0
+let tbl = WeakTable.create 1024
 
-let count = ref 0
-let rec fresh () =
-	if Hashtbl.mem int_string_tbl !count
-	then begin
-		incr count;
-		fresh ()
-	end else
-		!count
+let of_string s = WeakTable.merge tbl s
+let to_string s = s
 
-let new_record v = { data=v; garbage=false }
-
-let of_string name =
-	if Hashtbl.mem string_int_tbl name
-	then begin
-		incr used_counter;
-		Hashtbl.find string_int_tbl name
-	end else begin
-		let i = fresh () in
-		incr created_counter;
-		Hashtbl.add string_int_tbl name i;
-		Hashtbl.add int_string_tbl i (new_record name);
-		i
-	end
-
-let to_string i =
-	(Hashtbl.find int_string_tbl i).data
-
-let mark_all_as_unused () =
-	Hashtbl.iter (fun _ v -> v.garbage <- true) int_string_tbl
-
-let mark_as_used symb =
-	let record1 = Hashtbl.find int_string_tbl symb in
-		record1.garbage <- false
-
-let garbage () =
-	let records = Hashtbl.fold (fun symb record accu ->
-		if record.garbage then (symb, record.data) :: accu else accu
-	) int_string_tbl [] in
-	let remove (int,string) =
-		Hashtbl.remove int_string_tbl int;
-		Hashtbl.remove string_int_tbl string
-	in
-	created_counter := 0;
-	used_counter := 0;
-	List.iter remove records
+let equal a b =
+  (* compare using physical equality, both members have to be part of the above weak table *)
+  a == b
 
 let stats () =
-	Hashtbl.length string_int_tbl
-
-let created () = !created_counter
-let used () = !used_counter
+  let len, entries, _, _, _, _ = WeakTable.stats tbl in
+  len, entries
diff --git a/tools/ocaml/xenstored/symbol.mli b/tools/ocaml/xenstored/symbol.mli
index c3c9f6e2f8..586ab57507 100644
--- a/tools/ocaml/xenstored/symbol.mli
+++ b/tools/ocaml/xenstored/symbol.mli
@@ -29,24 +29,11 @@ val of_string : string -> t
 val to_string : t -> string
 (** Convert a symbol into a string. *)
 
-(** {6 Garbage Collection} *)
-
-(** Symbols need to be regulary garbage collected. The following steps should be followed:
--     mark all the knowns symbols as unused (with [mark_all_as_unused]);
--     mark all the symbols really usefull as used (with [mark_as_used]); and
--     finally, call [garbage] *)
-
-val mark_all_as_unused : unit -> unit
-val mark_as_used : t -> unit
-val garbage : unit -> unit
+val equal: t -> t -> bool
+(** Compare two symbols for equality *)
 
 (** {6 Statistics } *)
 
-val stats : unit -> int
-(** Get the number of used symbols. *)
+val stats : unit -> int * int
+(** Get the table size and number of entries. *)
 
-val created : unit -> int
-(** Returns the number of symbols created since the last GC. *)
-
-val used : unit -> int
-(** Returns the number of existing symbols used since the last GC *)
diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
index 5893af2caa..885b397d71 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -431,18 +431,6 @@ let _ =
 
 	let periodic_ops now =
 		debug "periodic_ops starting";
-		(* we garbage collect the string->int dictionary after a sizeable amount of operations,
-		 * there's no need to be really fast even if we got loose
-		 * objects since names are often reuse.
-		 *)
-		if Symbol.created () > 1000 || Symbol.used () > 20000
-		then begin
-			Symbol.mark_all_as_unused ();
-			Store.mark_symbols store;
-			Connections.iter cons Connection.mark_symbols;
-			History.mark_symbols ();
-			Symbol.garbage ()
-		end;
 
 		(* scan all the xs rings as a safenet for ill-behaved clients *)
 		if !ring_scan_interval >= 0 && now > (!last_scan_time +. float !ring_scan_interval) then
@@ -460,11 +448,11 @@ let _ =
 			let (lanon, lanon_ops, lanon_watchs,
 			     ldom, ldom_ops, ldom_watchs) = Connections.stats cons in
 			let store_nodes, store_abort, store_coalesce = Store.stats store in
-			let symtbl_len = Symbol.stats () in
+			let symtbl_len, symtbl_entries = Symbol.stats () in
 
 			info "store stat: nodes(%d) t-abort(%d) t-coalesce(%d)"
 			     store_nodes store_abort store_coalesce;
-			info "sytbl stat: %d" symtbl_len;
+			info "sytbl stat: length(%d) entries(%d)" symtbl_len symtbl_entries;
 			info "  con stat: anonymous(%d, %d o, %d w) domains(%d, %d o, %d w)"
 			     lanon lanon_ops lanon_watchs ldom ldom_ops ldom_watchs;
 			info "  mem stat: minor(%.0f) promoted(%.0f) major(%.0f) heap(%d w, %d c) live(%d w, %d b) free(%d w, %d b)"
-- 
2.29.2



  parent reply	other threads:[~2021-01-15 22:59 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-15 22:28 [PATCH v2 0/2] oxenstored build enhancements Edwin Török
2021-01-15 22:29 ` [PATCH v1 0/4] tools/ocaml/xenstored: bugfixes Edwin Török
2021-01-15 22:28 ` [PATCH v1 0/5] tools/ocaml/xenstored: structured fuzz testing Edwin Török
2021-01-15 22:28 ` [PATCH v4 0/4] tools/ocaml/xenstored: optimizations Edwin Török
2021-01-15 22:28 ` [PATCH v2 0/2] tools/ocaml/libs/xc: domid control Edwin Török
2021-01-15 22:28 ` [PATCH v2 0/8] tools/ocaml/xenstored: implement live update Edwin Török
2021-01-15 22:28 ` [PATCH v2 1/2] automation/: add Ubuntu:focal container Edwin Török
2021-01-15 22:28 ` [PATCH v2 2/2] Makefile: add build-tools-oxenstored Edwin Török
2021-01-15 22:28 ` [PATCH v2 1/8] tools/xenstore: add live update command to xenstore-control Edwin Török
2021-01-18  7:50   ` Jürgen Groß
2021-01-18  9:40     ` Edwin Torok
2021-01-15 22:28 ` [PATCH v2 2/8] Add workaround for xenstore-control flood issues Edwin Török
2021-01-15 22:28 ` [PATCH v2 3/8] docs/designs/xenstore-migration.md: clarify that deletes are recursive Edwin Török
2021-01-22 13:04   ` Jürgen Groß
2021-01-22 14:44     ` Edwin Torok
2021-01-15 22:28 ` [PATCH v2 4/8] tools/ocaml/xenstored: only quit on SIGTERM when a reload is possible Edwin Török
2021-01-18  7:51   ` Jürgen Groß
2021-01-18  9:28     ` Edwin Torok
2021-01-15 22:28 ` [PATCH v2 5/8] tools/ocaml/xenstored: Automatically resume when possible Edwin Török
2021-01-15 22:28 ` [PATCH v2 6/8] tools/ocaml/xenstored: add cooperative live-update command Edwin Török
2021-01-15 22:28 ` [PATCH v2 7/8] tools/ocaml/xenstored: start live update process Edwin Török
2021-01-15 22:28 ` [PATCH v2 8/8] tools/ocaml/xenstored: Implement live update for socket connections Edwin Török
2021-01-15 22:28 ` [PATCH v2 1/2] tools/ocaml/xenstored: trim txhistory on xenbus reconnect Edwin Török
2021-01-15 22:28 ` [PATCH v2 2/2] tools/ocaml/libs/xc: backward compatible domid control at domain creation time Edwin Török
2021-01-15 22:28 ` Edwin Török [this message]
2021-01-15 22:28 ` [PATCH v4 2/4] tools/ocaml/xenstored: backport find_opt/update from 4.06 Edwin Török
2021-01-15 22:28 ` [PATCH v4 3/4] tools/ocaml/xenstored: use more efficient node trees Edwin Török
2021-01-15 22:28 ` [PATCH v4 4/4] tools/ocaml/xenstored: use more efficient tries Edwin Török
2021-01-15 22:29 ` [PATCH v1 1/5] tools/ocaml: add unit test skeleton with Dune build system Edwin Török
2021-01-15 22:29 ` [PATCH v1 2/5] tools/ocaml/xenstored: implement the live migration binary format Edwin Török
2021-01-15 22:29 ` [PATCH v1 3/5] tools/ocaml/xenstored: add binary dump format support Edwin Török
2021-01-15 22:29 ` [PATCH v1 4/5] tools/ocaml/xenstored: add support for binary format Edwin Török
2021-01-15 22:29 ` [PATCH v1 5/5] Add structured fuzzing unit test Edwin Török
2021-01-15 22:29 ` [PATCH v1 1/4] tools/ocaml/libs/xb: do not crash after xenbus is unmapped Edwin Török
2021-01-15 22:29 ` [PATCH v1 2/4] tools/ocaml/xenstored: fix quota calculation for mkdir EEXIST Edwin Török
2021-01-15 22:29 ` [PATCH v1 3/4] tools/ocaml/xenstored: reject invalid watch paths early Edwin Török
2021-01-15 22:29 ` [PATCH v1 4/4] tools/ocaml/xenstored: mkdir conflicts were sometimes missed Edwin Török
2021-01-21 11:15 ` [PATCH v4 0/4] tools/ocaml/xenstored: optimizations Christian Lindig
2021-01-21 11:16 ` [PATCH v2 0/2] tools/ocaml/libs/xc: domid control Christian Lindig
  -- strict thread matches above, loose matches on Subject: below --
2020-08-27 17:35 [PATCH v4 0/4] tools/ocaml/xenstored: simplify code Edwin Török
2020-08-27 17:35 ` [PATCH v4 1/4] tools/ocaml/xenstored: replace hand rolled GC with weak GC references Edwin Török

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c455755573a079ebad208c1933719e251b589102.1610748224.git.edvin.torok@citrix.com \
    --to=edvin.torok@citrix.com \
    --cc=christian.lindig@citrix.com \
    --cc=dave@recoil.org \
    --cc=iwj@xenproject.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.