From: "René Scharfe" <l.s.r@web.de>
To: Git List <git@vger.kernel.org>
Cc: Junio C Hamano <gitster@pobox.com>, Jeff King <peff@peff.net>
Subject: [PATCH v2 2/2] oidset: use khash
Date: Wed, 3 Oct 2018 15:16:39 +0200 [thread overview]
Message-ID: <5efe6695-2e82-786c-1170-7874978cb534@web.de> (raw)
In-Reply-To: <64911aec-71cd-d990-5dfd-bf2c3163690c@web.de>
Reimplement oidset using khash.h in order to reduce its memory footprint
and make it faster.
Performance of a command that mainly checks for duplicate objects using
an oidset, with master and Clang 6.0.1:
$ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'"
$ /usr/bin/time $cmd >/dev/null
0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k
0inputs+0outputs (0major+11204minor)pagefaults 0swaps
$ hyperfine "$cmd"
Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'
Time (mean ± σ): 250.0 ms ± 6.0 ms [User: 225.9 ms, System: 23.6 ms]
Range (min … max): 242.0 ms … 261.1 ms
And with this patch:
$ /usr/bin/time $cmd >/dev/null
0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k
0inputs+0outputs (0major+8318minor)pagefaults 0swaps
$ hyperfine "$cmd"
Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'
Time (mean ± σ): 151.9 ms ± 4.9 ms [User: 130.5 ms, System: 21.2 ms]
Range (min … max): 148.2 ms … 170.4 ms
Initial-patch-by: Jeff King <peff@peff.net>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
fetch-pack.c | 2 +-
oidset.c | 34 ++++++++++++----------------------
oidset.h | 36 ++++++++++++++++++++++++++++--------
3 files changed, 41 insertions(+), 31 deletions(-)
diff --git a/fetch-pack.c b/fetch-pack.c
index 75047a4b2a..a839315726 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -536,7 +536,7 @@ static int tip_oids_contain(struct oidset *tip_oids,
* add to "newlist" between calls, the additions will always be for
* oids that are already in the set.
*/
- if (!tip_oids->map.map.tablesize) {
+ if (!tip_oids->set.n_buckets) {
add_refs_to_oidset(tip_oids, unmatched);
add_refs_to_oidset(tip_oids, newlist);
}
diff --git a/oidset.c b/oidset.c
index 454c54f933..9836d427ef 100644
--- a/oidset.c
+++ b/oidset.c
@@ -3,38 +3,28 @@
int oidset_contains(const struct oidset *set, const struct object_id *oid)
{
- if (!set->map.map.tablesize)
- return 0;
- return !!oidmap_get(&set->map, oid);
+ khiter_t pos = kh_get_oid(&set->set, *oid);
+ return pos != kh_end(&set->set);
}
int oidset_insert(struct oidset *set, const struct object_id *oid)
{
- struct oidmap_entry *entry;
-
- if (!set->map.map.tablesize)
- oidmap_init(&set->map, 0);
- else if (oidset_contains(set, oid))
- return 1;
-
- entry = xmalloc(sizeof(*entry));
- oidcpy(&entry->oid, oid);
-
- oidmap_put(&set->map, entry);
- return 0;
+ int added;
+ kh_put_oid(&set->set, *oid, &added);
+ return !added;
}
int oidset_remove(struct oidset *set, const struct object_id *oid)
{
- struct oidmap_entry *entry;
-
- entry = oidmap_remove(&set->map, oid);
- free(entry);
-
- return (entry != NULL);
+ khiter_t pos = kh_get_oid(&set->set, *oid);
+ if (pos == kh_end(&set->set))
+ return 0;
+ kh_del_oid(&set->set, pos);
+ return 1;
}
void oidset_clear(struct oidset *set)
{
- oidmap_free(&set->map, 1);
+ kh_release_oid(&set->set);
+ oidset_init(set, 0);
}
diff --git a/oidset.h b/oidset.h
index 40ec5f87fe..4b90540cd4 100644
--- a/oidset.h
+++ b/oidset.h
@@ -1,7 +1,8 @@
#ifndef OIDSET_H
#define OIDSET_H
-#include "oidmap.h"
+#include "hashmap.h"
+#include "khash.h"
/**
* This API is similar to sha1-array, in that it maintains a set of object ids
@@ -15,19 +16,33 @@
* table overhead.
*/
+static inline unsigned int oid_hash(struct object_id oid)
+{
+ return sha1hash(oid.hash);
+}
+
+static inline int oid_equal(struct object_id a, struct object_id b)
+{
+ return oideq(&a, &b);
+}
+
+KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
+
/**
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
*/
struct oidset {
- struct oidmap map;
+ kh_oid_t set;
};
-#define OIDSET_INIT { OIDMAP_INIT }
+#define OIDSET_INIT { { 0 } }
static inline void oidset_init(struct oidset *set, size_t initial_size)
{
- oidmap_init(&set->map, initial_size);
+ memset(&set->set, 0, sizeof(set->set));
+ if (initial_size)
+ kh_resize_oid(&set->set, initial_size);
}
/**
@@ -58,19 +73,24 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
void oidset_clear(struct oidset *set);
struct oidset_iter {
- struct oidmap_iter m_iter;
+ kh_oid_t *set;
+ khiter_t iter;
};
static inline void oidset_iter_init(struct oidset *set,
struct oidset_iter *iter)
{
- oidmap_iter_init(&set->map, &iter->m_iter);
+ iter->set = &set->set;
+ iter->iter = kh_begin(iter->set);
}
static inline struct object_id *oidset_iter_next(struct oidset_iter *iter)
{
- struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter);
- return e ? &e->oid : NULL;
+ for (; iter->iter != kh_end(iter->set); iter->iter++) {
+ if (kh_exist(iter->set, iter->iter))
+ return &kh_key(iter->set, iter->iter++);
+ }
+ return NULL;
}
static inline struct object_id *oidset_iter_first(struct oidset *set,
--
2.19.0
next prev parent reply other threads:[~2018-10-03 13:16 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-03 13:11 [PATCH v2 0/2] oidset: use khash René Scharfe
2018-10-03 13:12 ` [PATCH v2 1/2] khash: factor out kh_release_* René Scharfe
2018-10-03 13:16 ` René Scharfe [this message]
2018-10-03 19:40 ` [PATCH v2 2/2] oidset: use khash Jeff King
2018-10-04 5:56 ` René Scharfe
2018-10-04 6:48 ` Jeff King
2018-10-04 6:50 ` Jeff King
2018-10-04 15:05 ` René Scharfe
2018-10-04 15:05 ` [PATCH v3 0/5] " René Scharfe
2018-10-04 15:09 ` [PATCH v3 1/5] fetch-pack: factor out is_unmatched_ref() René Scharfe
2018-10-04 15:09 ` [PATCH v3 2/5] fetch-pack: load tip_oids eagerly iff needed René Scharfe
2018-10-04 21:38 ` Jonathan Tan
2018-10-04 22:11 ` René Scharfe
2018-10-05 20:13 ` René Scharfe
2018-10-04 22:14 ` Jeff King
2018-10-04 22:52 ` Jonathan Tan
2018-10-04 23:18 ` Jeff King
2018-10-04 22:07 ` Jeff King
2018-10-05 20:13 ` René Scharfe
2018-10-05 20:27 ` Jeff King
2018-10-05 21:22 ` René Scharfe
2018-10-05 21:47 ` Jeff King
2018-10-04 15:10 ` [PATCH v3 3/5] khash: factor out kh_release_* René Scharfe
2018-10-04 15:13 ` [PATCH v3 4/5] oidset: use khash René Scharfe
2018-10-04 15:14 ` [PATCH 5/5] oidset: uninline oidset_init() René Scharfe
2018-10-04 22:15 ` [PATCH v3 0/5] oidset: use khash Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5efe6695-2e82-786c-1170-7874978cb534@web.de \
--to=l.s.r@web.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).