All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: git@vger.kernel.org
Cc: "Jeff King" <peff@peff.net>, "René Scharfe" <l.s.r@web.de>
Subject: [PATCH v2 1/5] speed up alt_odb_usable() with many alternates
Date: Tue, 29 Jun 2021 20:53:01 +0000	[thread overview]
Message-ID: <20210629205305.7100-2-e@80x24.org> (raw)
In-Reply-To: <20210627024718.25383-1-e@80x24.org>

With many alternates, the duplicate check in alt_odb_usable()
wastes many cycles doing repeated fspathcmp() on every existing
alternate.  Use a khash to speed up lookups by odb->path.

Since the kh_put_* API uses the supplied key without
duplicating it, we also take advantage of it to replace both
xstrdup() and strbuf_release() in link_alt_odb_entry() with
strbuf_detach() to avoid the allocation and copy.

In a test repository with 50K alternates and each of those 50K
alternates having one alternate each (for a total of 100K total
alternates); this speeds up lookup of a non-existent blob from
over 16 minutes to roughly 2.7 seconds on my busy workstation.

Note: all underlying git object directories were small and
unpacked with only loose objects and no packs.  Having to load
packs increases times significantly.

Signed-off-by: Eric Wong <e@80x24.org>
---
 object-file.c  | 33 ++++++++++++++++++++++-----------
 object-store.h | 17 +++++++++++++++++
 object.c       |  2 ++
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/object-file.c b/object-file.c
index f233b440b2..304af3a172 100644
--- a/object-file.c
+++ b/object-file.c
@@ -517,9 +517,9 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf,
  */
 static int alt_odb_usable(struct raw_object_store *o,
 			  struct strbuf *path,
-			  const char *normalized_objdir)
+			  const char *normalized_objdir, khiter_t *pos)
 {
-	struct object_directory *odb;
+	int r;
 
 	/* Detect cases where alternate disappeared */
 	if (!is_directory(path->buf)) {
@@ -533,14 +533,22 @@ static int alt_odb_usable(struct raw_object_store *o,
 	 * Prevent the common mistake of listing the same
 	 * thing twice, or object directory itself.
 	 */
-	for (odb = o->odb; odb; odb = odb->next) {
-		if (!fspathcmp(path->buf, odb->path))
-			return 0;
+	if (!o->odb_by_path) {
+		khiter_t p;
+
+		o->odb_by_path = kh_init_odb_path_map();
+		assert(!o->odb->next);
+		p = kh_put_odb_path_map(o->odb_by_path, o->odb->path, &r);
+		if (r < 0) die_errno(_("kh_put_odb_path_map"));
+		assert(r == 1); /* never used */
+		kh_value(o->odb_by_path, p) = o->odb;
 	}
 	if (!fspathcmp(path->buf, normalized_objdir))
 		return 0;
-
-	return 1;
+	*pos = kh_put_odb_path_map(o->odb_by_path, path->buf, &r);
+	if (r < 0) die_errno(_("kh_put_odb_path_map"));
+	/* r: 0 = exists, 1 = never used, 2 = deleted */
+	return r == 0 ? 0 : 1;
 }
 
 /*
@@ -566,6 +574,7 @@ static int link_alt_odb_entry(struct repository *r, const char *entry,
 {
 	struct object_directory *ent;
 	struct strbuf pathbuf = STRBUF_INIT;
+	khiter_t pos;
 
 	if (!is_absolute_path(entry) && relative_base) {
 		strbuf_realpath(&pathbuf, relative_base, 1);
@@ -587,23 +596,25 @@ static int link_alt_odb_entry(struct repository *r, const char *entry,
 	while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
 		strbuf_setlen(&pathbuf, pathbuf.len - 1);
 
-	if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir)) {
+	if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir, &pos)) {
 		strbuf_release(&pathbuf);
 		return -1;
 	}
 
 	CALLOC_ARRAY(ent, 1);
-	ent->path = xstrdup(pathbuf.buf);
+	/* pathbuf.buf is already in r->objects->odb_by_path */
+	ent->path = strbuf_detach(&pathbuf, NULL);
 
 	/* add the alternate entry */
 	*r->objects->odb_tail = ent;
 	r->objects->odb_tail = &(ent->next);
 	ent->next = NULL;
+	assert(r->objects->odb_by_path);
+	kh_value(r->objects->odb_by_path, pos) = ent;
 
 	/* recursively add alternates */
-	read_info_alternates(r, pathbuf.buf, depth + 1);
+	read_info_alternates(r, ent->path, depth + 1);
 
-	strbuf_release(&pathbuf);
 	return 0;
 }
 
diff --git a/object-store.h b/object-store.h
index ec32c23dcb..20c1cedb75 100644
--- a/object-store.h
+++ b/object-store.h
@@ -7,6 +7,8 @@
 #include "oid-array.h"
 #include "strbuf.h"
 #include "thread-utils.h"
+#include "khash.h"
+#include "dir.h"
 
 struct object_directory {
 	struct object_directory *next;
@@ -30,6 +32,19 @@ struct object_directory {
 	char *path;
 };
 
+static inline int odb_path_eq(const char *a, const char *b)
+{
+	return !fspathcmp(a, b);
+}
+
+static inline int odb_path_hash(const char *str)
+{
+	return ignore_case ? strihash(str) : __ac_X31_hash_string(str);
+}
+
+KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
+	struct object_directory *, 1, odb_path_hash, odb_path_eq);
+
 void prepare_alt_odb(struct repository *r);
 char *compute_alternate_path(const char *path, struct strbuf *err);
 typedef int alt_odb_fn(struct object_directory *, void *);
@@ -116,6 +131,8 @@ struct raw_object_store {
 	 */
 	struct object_directory *odb;
 	struct object_directory **odb_tail;
+	kh_odb_path_map_t *odb_by_path;
+
 	int loaded_alternates;
 
 	/*
diff --git a/object.c b/object.c
index 14188453c5..2b3c075a15 100644
--- a/object.c
+++ b/object.c
@@ -511,6 +511,8 @@ static void free_object_directories(struct raw_object_store *o)
 		free_object_directory(o->odb);
 		o->odb = next;
 	}
+	kh_destroy_odb_path_map(o->odb_by_path);
+	o->odb_by_path = NULL;
 }
 
 void raw_object_store_clear(struct raw_object_store *o)

  parent reply	other threads:[~2021-06-29 20:53 UTC|newest]

Thread overview: 99+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-24  0:58 [PATCH] speed up alt_odb_usable() with many alternates Eric Wong
2021-06-27  2:47 ` [PATCH 0/5] optimizations for many odb alternates Eric Wong
2021-06-27  2:47   ` [PATCH 1/5] speed up alt_odb_usable() with many alternates Eric Wong
2021-06-27  2:47   ` [PATCH 2/5] avoid strlen via strbuf_addstr in link_alt_odb_entry Eric Wong
2021-06-27  2:47   ` [PATCH 3/5] make object_directory.loose_objects_subdir_seen a bitmap Eric Wong
2021-06-27 10:23     ` René Scharfe
2021-06-28 23:09       ` Eric Wong
2021-06-27  2:47   ` [PATCH 4/5] oidcpy_with_padding: constify `src' arg Eric Wong
2021-06-27  2:47   ` [PATCH 5/5] oidtree: a crit-bit tree for odb_loose_cache Eric Wong
2021-06-29 14:42     ` Junio C Hamano
2021-06-29 20:17       ` Eric Wong
2021-06-29 20:53   ` [PATCH v2 0/5] optimizations for many alternates Eric Wong
2021-07-07 23:10     ` [PATCH v3 " Eric Wong
2021-07-07 23:10     ` [PATCH v3 1/5] speed up alt_odb_usable() with " Eric Wong
2021-07-08  0:20       ` Junio C Hamano
2021-07-08  1:14         ` Eric Wong
2021-07-08  4:30           ` Junio C Hamano
2021-07-07 23:10     ` [PATCH v3 2/5] avoid strlen via strbuf_addstr in link_alt_odb_entry Eric Wong
2021-07-08  4:57       ` Junio C Hamano
2021-07-07 23:10     ` [PATCH v3 3/5] make object_directory.loose_objects_subdir_seen a bitmap Eric Wong
2021-07-07 23:10     ` [PATCH v3 4/5] oidcpy_with_padding: constify `src' arg Eric Wong
2021-07-07 23:10     ` [PATCH v3 5/5] oidtree: a crit-bit tree for odb_loose_cache Eric Wong
2021-06-29 20:53   ` Eric Wong [this message]
2021-07-03 10:05     ` [PATCH v2 1/5] speed up alt_odb_usable() with many alternates René Scharfe
2021-07-04  9:02       ` René Scharfe
2021-07-06 23:01       ` Eric Wong
2021-06-29 20:53   ` [PATCH v2 2/5] avoid strlen via strbuf_addstr in link_alt_odb_entry Eric Wong
2021-06-29 20:53   ` [PATCH v2 3/5] make object_directory.loose_objects_subdir_seen a bitmap Eric Wong
2021-06-29 20:53   ` [PATCH v2 4/5] oidcpy_with_padding: constify `src' arg Eric Wong
2021-06-29 20:53   ` [PATCH v2 5/5] oidtree: a crit-bit tree for odb_loose_cache Eric Wong
2021-07-04  9:02     ` René Scharfe
2021-07-06 23:21       ` Eric Wong
2021-07-04  9:32     ` Ævar Arnfjörð Bjarmason
2021-07-07 23:12       ` Eric Wong
2021-08-06 15:31     ` Andrzej Hunt
2021-08-06 17:53       ` René Scharfe
2021-08-07 22:49         ` Eric Wong
2021-08-09  1:35           ` Carlo Arenas
2021-08-09  1:38             ` [PATCH/RFC 0/3] pedantic errors in next Carlo Marcelo Arenas Belón
2021-08-09  1:38               ` [PATCH/RFC 1/3] oidtree: avoid nested struct oidtree_node Carlo Marcelo Arenas Belón
2021-08-09  1:38               ` [PATCH/RFC 2/3] object-store: avoid extra ';' from KHASH_INIT Carlo Marcelo Arenas Belón
2021-08-09 15:53                 ` Junio C Hamano
2021-08-09  1:38               ` [PATCH/RFC 3/3] ci: run a pedantic build as part of the GitHub workflow Carlo Marcelo Arenas Belón
2021-08-09 10:50                 ` Bagas Sanjaya
2021-08-09 22:03                   ` Carlo Arenas
2021-08-09 14:56                 ` Phillip Wood
2021-08-09 22:48                   ` Carlo Arenas
2021-08-10 15:24                     ` Phillip Wood
2021-08-10 18:25                       ` Junio C Hamano
2021-08-30 11:36                   ` Ævar Arnfjörð Bjarmason
2021-08-31 20:28                     ` Carlo Arenas
2021-08-31 20:51                       ` Ævar Arnfjörð Bjarmason
2021-08-31 23:54                         ` Carlo Arenas
2021-09-01  1:52                           ` Jeff King
2021-09-01 17:55                             ` Junio C Hamano
2021-08-30 11:40                 ` Ævar Arnfjörð Bjarmason
2021-09-01  9:19                 ` [RFC PATCH v2 0/4] developer: support pedantic Carlo Marcelo Arenas Belón
2021-09-01  9:19                   ` [RFC PATCH v2 1/4] developer: retire USE_PARENS_AROUND_GETTEXT_N support Carlo Marcelo Arenas Belón
2021-09-01  9:19                   ` [RFC PATCH v2 2/4] developer: enable pedantic by default Carlo Marcelo Arenas Belón
2021-09-01  9:19                   ` [RFC PATCH v2 3/4] developer: add an alternative script for detecting broken N_() Carlo Marcelo Arenas Belón
2021-09-01  9:19                   ` [RFC PATCH v2 4/4] developer: move detect-compiler out of the main directory Carlo Marcelo Arenas Belón
2021-09-01 10:10                   ` [RFC PATCH v2 0/4] developer: support pedantic Jeff King
2021-09-01 11:25                     ` [PATCH] gettext: remove optional non-standard parens in N_() definition Ævar Arnfjörð Bjarmason
2021-09-01 17:31                       ` Eric Sunshine
2021-09-02  9:13                       ` Jeff King
2021-09-02 19:19                       ` Junio C Hamano
2021-09-01 11:27                     ` [RFC PATCH v2 0/4] developer: support pedantic Ævar Arnfjörð Bjarmason
2021-09-01 18:03                       ` Carlo Arenas
2021-09-03 17:02                   ` [PATCH v3 0/3] support pedantic in developer mode Carlo Marcelo Arenas Belón
2021-09-03 17:02                     ` [PATCH v3 1/3] gettext: remove optional non-standard parens in N_() definition Carlo Marcelo Arenas Belón
2021-09-10 15:39                       ` Ævar Arnfjörð Bjarmason
2021-09-03 17:02                     ` [PATCH v3 2/3] win32: allow building with pedantic mode enabled Carlo Marcelo Arenas Belón
2021-09-03 18:47                       ` René Scharfe
2021-09-03 20:13                         ` Carlo Marcelo Arenas Belón
2021-09-03 20:32                           ` Junio C Hamano
2021-09-03 20:38                           ` René Scharfe
2021-09-04  9:37                             ` René Scharfe
2021-09-04 14:42                               ` Carlo Arenas
2021-09-27 23:04                       ` Jonathan Tan
2021-09-28  0:30                         ` Carlo Arenas
2021-09-28 16:50                           ` Jonathan Tan
2021-09-28 17:37                           ` Junio C Hamano
2021-09-28 20:16                             ` Jonathan Tan
2021-09-29  1:00                               ` Carlo Arenas
2021-09-29 15:55                                 ` Junio C Hamano
2021-09-03 17:02                     ` [PATCH v3 3/3] developer: enable pedantic by default Carlo Marcelo Arenas Belón
2021-09-05  7:54                     ` [PATCH v3 0/3] support pedantic in developer mode Ævar Arnfjörð Bjarmason
2021-08-09 16:44               ` [PATCH/RFC 0/3] pedantic errors in next Junio C Hamano
2021-08-09 20:10                 ` Eric Wong
2021-08-10  6:16                 ` Carlo Marcelo Arenas Belón
2021-08-10 19:30                   ` René Scharfe
2021-08-10 23:49                     ` Carlo Arenas
2021-08-11  0:57                       ` Carlo Arenas
2021-08-11 14:57                       ` René Scharfe
2021-08-11 17:20                         ` Junio C Hamano
2021-08-10 18:59             ` [PATCH v2 5/5] oidtree: a crit-bit tree for odb_loose_cache René Scharfe
2021-08-10 19:40           ` René Scharfe
2021-08-14 20:00       ` [PATCH] oidtree: avoid unaligned access to crit-bit tree René Scharfe
2021-08-16 19:11         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210629205305.7100-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=git@vger.kernel.org \
    --cc=l.s.r@web.de \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.