All of lore.kernel.org
 help / color / mirror / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Git List <git@vger.kernel.org>
Cc: Junio C Hamano <gitster@pobox.com>, Jeff King <peff@peff.net>,
	Jonathan Tan <jonathantanmy@google.com>
Subject: [PATCH v3 4/5] oidset: use khash
Date: Thu, 4 Oct 2018 17:13:06 +0200	[thread overview]
Message-ID: <3651262f-dec2-16d4-fc77-346ed74c6ffa@web.de> (raw)
In-Reply-To: <14e57ec8-b8a2-10bd-688d-1cb926e77675@web.de>

Reimplement oidset using khash.h in order to reduce its memory footprint
and make it faster.

Performance of a command that mainly checks for duplicate objects using
an oidset, with master and Clang 6.0.1:

  $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'"

  $ /usr/bin/time $cmd >/dev/null
  0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k
  0inputs+0outputs (0major+11204minor)pagefaults 0swaps

  $ hyperfine "$cmd"
  Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'

    Time (mean ± σ):     250.0 ms ±   6.0 ms    [User: 225.9 ms, System: 23.6 ms]

    Range (min … max):   242.0 ms … 261.1 ms

And with this patch:

  $ /usr/bin/time $cmd >/dev/null
  0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k
  0inputs+0outputs (0major+8318minor)pagefaults 0swaps

  $ hyperfine "$cmd"
  Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'

    Time (mean ± σ):     151.9 ms ±   4.9 ms    [User: 130.5 ms, System: 21.2 ms]

    Range (min … max):   148.2 ms … 170.4 ms

Initial-patch-by: Jeff King <peff@peff.net>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 oidset.c | 34 ++++++++++++----------------------
 oidset.h | 36 ++++++++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/oidset.c b/oidset.c
index 454c54f933..9836d427ef 100644
--- a/oidset.c
+++ b/oidset.c
@@ -3,38 +3,28 @@
 
 int oidset_contains(const struct oidset *set, const struct object_id *oid)
 {
-	if (!set->map.map.tablesize)
-		return 0;
-	return !!oidmap_get(&set->map, oid);
+	khiter_t pos = kh_get_oid(&set->set, *oid);
+	return pos != kh_end(&set->set);
 }
 
 int oidset_insert(struct oidset *set, const struct object_id *oid)
 {
-	struct oidmap_entry *entry;
-
-	if (!set->map.map.tablesize)
-		oidmap_init(&set->map, 0);
-	else if (oidset_contains(set, oid))
-		return 1;
-
-	entry = xmalloc(sizeof(*entry));
-	oidcpy(&entry->oid, oid);
-
-	oidmap_put(&set->map, entry);
-	return 0;
+	int added;
+	kh_put_oid(&set->set, *oid, &added);
+	return !added;
 }
 
 int oidset_remove(struct oidset *set, const struct object_id *oid)
 {
-	struct oidmap_entry *entry;
-
-	entry = oidmap_remove(&set->map, oid);
-	free(entry);
-
-	return (entry != NULL);
+	khiter_t pos = kh_get_oid(&set->set, *oid);
+	if (pos == kh_end(&set->set))
+		return 0;
+	kh_del_oid(&set->set, pos);
+	return 1;
 }
 
 void oidset_clear(struct oidset *set)
 {
-	oidmap_free(&set->map, 1);
+	kh_release_oid(&set->set);
+	oidset_init(set, 0);
 }
diff --git a/oidset.h b/oidset.h
index 40ec5f87fe..4b90540cd4 100644
--- a/oidset.h
+++ b/oidset.h
@@ -1,7 +1,8 @@
 #ifndef OIDSET_H
 #define OIDSET_H
 
-#include "oidmap.h"
+#include "hashmap.h"
+#include "khash.h"
 
 /**
  * This API is similar to sha1-array, in that it maintains a set of object ids
@@ -15,19 +16,33 @@
  *      table overhead.
  */
 
+static inline unsigned int oid_hash(struct object_id oid)
+{
+	return sha1hash(oid.hash);
+}
+
+static inline int oid_equal(struct object_id a, struct object_id b)
+{
+	return oideq(&a, &b);
+}
+
+KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
+
 /**
  * A single oidset; should be zero-initialized (or use OIDSET_INIT).
  */
 struct oidset {
-	struct oidmap map;
+	kh_oid_t set;
 };
 
-#define OIDSET_INIT { OIDMAP_INIT }
+#define OIDSET_INIT { { 0 } }
 
 
 static inline void oidset_init(struct oidset *set, size_t initial_size)
 {
-	oidmap_init(&set->map, initial_size);
+	memset(&set->set, 0, sizeof(set->set));
+	if (initial_size)
+		kh_resize_oid(&set->set, initial_size);
 }
 
 /**
@@ -58,19 +73,24 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
 void oidset_clear(struct oidset *set);
 
 struct oidset_iter {
-	struct oidmap_iter m_iter;
+	kh_oid_t *set;
+	khiter_t iter;
 };
 
 static inline void oidset_iter_init(struct oidset *set,
 				    struct oidset_iter *iter)
 {
-	oidmap_iter_init(&set->map, &iter->m_iter);
+	iter->set = &set->set;
+	iter->iter = kh_begin(iter->set);
 }
 
 static inline struct object_id *oidset_iter_next(struct oidset_iter *iter)
 {
-	struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter);
-	return e ? &e->oid : NULL;
+	for (; iter->iter != kh_end(iter->set); iter->iter++) {
+		if (kh_exist(iter->set, iter->iter))
+			return &kh_key(iter->set, iter->iter++);
+	}
+	return NULL;
 }
 
 static inline struct object_id *oidset_iter_first(struct oidset *set,
-- 
2.19.0

  parent reply	other threads:[~2018-10-04 15:13 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-03 13:11 [PATCH v2 0/2] oidset: use khash René Scharfe
2018-10-03 13:12 ` [PATCH v2 1/2] khash: factor out kh_release_* René Scharfe
2018-10-03 13:16 ` [PATCH v2 2/2] oidset: use khash René Scharfe
2018-10-03 19:40   ` Jeff King
2018-10-04  5:56     ` René Scharfe
2018-10-04  6:48       ` Jeff King
2018-10-04  6:50         ` Jeff King
2018-10-04 15:05         ` René Scharfe
2018-10-04 15:05 ` [PATCH v3 0/5] " René Scharfe
2018-10-04 15:09   ` [PATCH v3 1/5] fetch-pack: factor out is_unmatched_ref() René Scharfe
2018-10-04 15:09   ` [PATCH v3 2/5] fetch-pack: load tip_oids eagerly iff needed René Scharfe
2018-10-04 21:38     ` Jonathan Tan
2018-10-04 22:11       ` René Scharfe
2018-10-05 20:13         ` René Scharfe
2018-10-04 22:14       ` Jeff King
2018-10-04 22:52         ` Jonathan Tan
2018-10-04 23:18           ` Jeff King
2018-10-04 22:07     ` Jeff King
2018-10-05 20:13       ` René Scharfe
2018-10-05 20:27         ` Jeff King
2018-10-05 21:22           ` René Scharfe
2018-10-05 21:47             ` Jeff King
2018-10-04 15:10   ` [PATCH v3 3/5] khash: factor out kh_release_* René Scharfe
2018-10-04 15:13   ` René Scharfe [this message]
2018-10-04 15:14   ` [PATCH 5/5] oidset: uninline oidset_init() René Scharfe
2018-10-04 22:15   ` [PATCH v3 0/5] oidset: use khash Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3651262f-dec2-16d4-fc77-346ed74c6ffa@web.de \
    --to=l.s.r@web.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.