git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff King <peff@peff.net>
To: Geert Jansen <gerardu@amazon.com>
Cc: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Junio C Hamano" <gitster@pobox.com>,
	"git@vger.kernel.org" <git@vger.kernel.org>,
	"René Scharfe" <l.s.r@web.de>,
	"Takuto Ikuta" <tikuta@chromium.org>
Subject: [PATCH 7/9] object-store: provide helpers for loose_objects_cache
Date: Mon, 12 Nov 2018 09:50:56 -0500	[thread overview]
Message-ID: <20181112145056.GG7400@sigill.intra.peff.net> (raw)
In-Reply-To: <20181112144627.GA2478@sigill.intra.peff.net>

Our object_directory struct has a loose objects cache that all users of
the struct can see. But the only one that knows how to load the cache is
find_short_object_filename(). Let's extract that logic in to a reusable
function.

While we're at it, let's also reset the cache when we re-read the object
directories. This shouldn't have an impact on performance, as re-reads
are meant to be rare (and are already expensive, so we avoid them with
things like OBJECT_INFO_QUICK).

Since the cache is already meant to be an approximation, it's tempting
to skip even this bit of safety. But it's necessary to allow more code
to use it. For instance, fetch-pack explicitly re-reads the object
directory after performing its fetch, and would be confused if we didn't
clear the cache.

Signed-off-by: Jeff King <peff@peff.net>
---
 object-store.h | 18 +++++++++++++-----
 packfile.c     |  8 ++++++++
 sha1-file.c    | 26 ++++++++++++++++++++++++++
 sha1-name.c    | 21 +--------------------
 4 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/object-store.h b/object-store.h
index 30faf7b391..bf1e0cb761 100644
--- a/object-store.h
+++ b/object-store.h
@@ -11,11 +11,12 @@ struct object_directory {
 	struct object_directory *next;
 
 	/*
-	 * Used to store the results of readdir(3) calls when searching
-	 * for unique abbreviated hashes.  This cache is never
-	 * invalidated, thus it's racy and not necessarily accurate.
-	 * That's fine for its purpose; don't use it for tasks requiring
-	 * greater accuracy!
+	 * Used to store the results of readdir(3) calls when we are OK
+	 * sacrificing accuracy due to races for speed. That includes
+	 * our search for unique abbreviated hashes. Don't use it for tasks
+	 * requiring greater accuracy!
+	 *
+	 * Be sure to call odb_load_loose_cache() before using.
 	 */
 	char loose_objects_subdir_seen[256];
 	struct oid_array loose_objects_cache;
@@ -45,6 +46,13 @@ void add_to_alternates_file(const char *dir);
  */
 void add_to_alternates_memory(const char *dir);
 
+/*
+ * Populate an odb's loose object cache for one particular subdirectory (i.e.,
+ * the one that corresponds to the first byte of objects you're interested in,
+ * from 0 to 255 inclusive).
+ */
+void odb_load_loose_cache(struct object_directory *odb, int subdir_nr);
+
 struct packed_git {
 	struct packed_git *next;
 	struct list_head mru;
diff --git a/packfile.c b/packfile.c
index 1eda33247f..91fd40efb0 100644
--- a/packfile.c
+++ b/packfile.c
@@ -987,6 +987,14 @@ static void prepare_packed_git(struct repository *r)
 
 void reprepare_packed_git(struct repository *r)
 {
+	struct object_directory *odb;
+
+	for (odb = r->objects->odb; odb; odb = odb->next) {
+		oid_array_clear(&odb->loose_objects_cache);
+		memset(&odb->loose_objects_subdir_seen, 0,
+		       sizeof(odb->loose_objects_subdir_seen));
+	}
+
 	r->objects->approximate_object_count_valid = 0;
 	r->objects->packed_git_initialized = 0;
 	prepare_packed_git(r);
diff --git a/sha1-file.c b/sha1-file.c
index 503262edd2..4aae716a37 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -2125,6 +2125,32 @@ int for_each_loose_object(each_loose_object_fn cb, void *data,
 	return 0;
 }
 
+static int append_loose_object(const struct object_id *oid, const char *path,
+			       void *data)
+{
+	oid_array_append(data, oid);
+	return 0;
+}
+
+void odb_load_loose_cache(struct object_directory *odb, int subdir_nr)
+{
+	struct strbuf buf = STRBUF_INIT;
+
+	if (subdir_nr < 0 ||
+	    subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen))
+		BUG("subdir_nr out of range");
+
+	if (odb->loose_objects_subdir_seen[subdir_nr])
+		return;
+
+	strbuf_addstr(&buf, odb->path);
+	for_each_file_in_obj_subdir(subdir_nr, &buf,
+				    append_loose_object,
+				    NULL, NULL,
+				    &odb->loose_objects_cache);
+	odb->loose_objects_subdir_seen[subdir_nr] = 1;
+}
+
 static int check_stream_sha1(git_zstream *stream,
 			     const char *hdr,
 			     unsigned long size,
diff --git a/sha1-name.c b/sha1-name.c
index 358ca5e288..b24502811b 100644
--- a/sha1-name.c
+++ b/sha1-name.c
@@ -83,36 +83,19 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
 	/* otherwise, current can be discarded and candidate is still good */
 }
 
-static int append_loose_object(const struct object_id *oid, const char *path,
-			       void *data)
-{
-	oid_array_append(data, oid);
-	return 0;
-}
-
 static int match_sha(unsigned, const unsigned char *, const unsigned char *);
 
 static void find_short_object_filename(struct disambiguate_state *ds)
 {
 	int subdir_nr = ds->bin_pfx.hash[0];
 	struct object_directory *odb;
-	struct strbuf buf = STRBUF_INIT;
 
 	for (odb = the_repository->objects->odb;
 	     odb && !ds->ambiguous;
 	     odb = odb->next) {
 		int pos;
 
-		if (!odb->loose_objects_subdir_seen[subdir_nr]) {
-			strbuf_reset(&buf);
-			strbuf_addstr(&buf, odb->path);
-			for_each_file_in_obj_subdir(subdir_nr, &buf,
-						    append_loose_object,
-						    NULL, NULL,
-						    &odb->loose_objects_cache);
-			odb->loose_objects_subdir_seen[subdir_nr] = 1;
-		}
-
+		odb_load_loose_cache(odb, subdir_nr);
 		pos = oid_array_lookup(&odb->loose_objects_cache, &ds->bin_pfx);
 		if (pos < 0)
 			pos = -1 - pos;
@@ -125,8 +108,6 @@ static void find_short_object_filename(struct disambiguate_state *ds)
 			pos++;
 		}
 	}
-
-	strbuf_release(&buf);
 }
 
 static int match_sha(unsigned len, const unsigned char *a, const unsigned char *b)
-- 
2.19.1.1577.g2c5b293d4f


  parent reply	other threads:[~2018-11-12 14:51 UTC|newest]

Thread overview: 99+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-25 18:38 [RFC PATCH] index-pack: improve performance on NFS Jansen, Geert
2018-10-26  0:21 ` Junio C Hamano
2018-10-26 20:38   ` Ævar Arnfjörð Bjarmason
2018-10-27  7:26     ` Junio C Hamano
2018-10-27  9:33       ` Jeff King
2018-10-27 11:22         ` Ævar Arnfjörð Bjarmason
2018-10-28 22:50           ` [PATCH 0/4] index-pack: optionally turn off SHA-1 collision checking Ævar Arnfjörð Bjarmason
2018-10-30  2:49             ` Geert Jansen
2018-10-30  9:04               ` Junio C Hamano
2018-10-30 18:43             ` [PATCH v2 0/3] index-pack: test updates Ævar Arnfjörð Bjarmason
2018-11-13 20:19               ` [PATCH v3] index-pack: add ability to disable SHA-1 collision check Ævar Arnfjörð Bjarmason
2018-11-14  7:09                 ` Junio C Hamano
2018-11-14 12:40                   ` Ævar Arnfjörð Bjarmason
2018-10-30 18:43             ` [PATCH v2 1/3] pack-objects test: modernize style Ævar Arnfjörð Bjarmason
2018-10-30 18:43             ` [PATCH v2 2/3] pack-objects tests: don't leave test .git corrupt at end Ævar Arnfjörð Bjarmason
2018-10-30 18:43             ` [PATCH v2 3/3] index-pack tests: don't leave test repo dirty " Ævar Arnfjörð Bjarmason
2018-10-28 22:50           ` [PATCH 1/4] pack-objects test: modernize style Ævar Arnfjörð Bjarmason
2018-10-28 22:50           ` [PATCH 2/4] pack-objects tests: don't leave test .git corrupt at end Ævar Arnfjörð Bjarmason
2018-10-28 22:50           ` [PATCH 3/4] index-pack tests: don't leave test repo dirty " Ævar Arnfjörð Bjarmason
2018-10-28 22:50           ` [PATCH 4/4] index-pack: add ability to disable SHA-1 collision check Ævar Arnfjörð Bjarmason
2018-10-29 15:04           ` [RFC PATCH] index-pack: improve performance on NFS Jeff King
2018-10-29 15:09             ` Jeff King
2018-10-29 19:36             ` Ævar Arnfjörð Bjarmason
2018-10-29 23:27               ` Jeff King
2018-11-07 22:55                 ` Geert Jansen
2018-11-08 12:02                   ` Jeff King
2018-11-08 20:58                     ` Geert Jansen
2018-11-08 21:18                       ` Jeff King
2018-11-08 21:55                         ` Geert Jansen
2018-11-08 22:20                     ` Ævar Arnfjörð Bjarmason
2018-11-09 10:11                       ` Ævar Arnfjörð Bjarmason
2018-11-12 14:31                       ` Jeff King
2018-11-12 14:46                     ` [PATCH 0/9] caching loose objects Jeff King
2018-11-12 14:46                       ` [PATCH 1/9] fsck: do not reuse child_process structs Jeff King
2018-11-12 15:26                         ` Derrick Stolee
2018-11-12 14:47                       ` [PATCH 2/9] submodule--helper: prefer strip_suffix() to ends_with() Jeff King
2018-11-12 18:23                         ` Stefan Beller
2018-11-12 14:48                       ` [PATCH 3/9] rename "alternate_object_database" to "object_directory" Jeff King
2018-11-12 15:30                         ` Derrick Stolee
2018-11-12 15:36                           ` Jeff King
2018-11-12 19:41                             ` Ramsay Jones
2018-11-12 14:48                       ` [PATCH 4/9] sha1_file_name(): overwrite buffer instead of appending Jeff King
2018-11-12 15:32                         ` Derrick Stolee
2018-11-12 14:49                       ` [PATCH 5/9] handle alternates paths the same as the main object dir Jeff King
2018-11-12 15:38                         ` Derrick Stolee
2018-11-12 15:46                           ` Jeff King
2018-11-12 15:50                             ` Derrick Stolee
2018-11-12 14:50                       ` [PATCH 6/9] sha1-file: use an object_directory for " Jeff King
2018-11-12 15:48                         ` Derrick Stolee
2018-11-12 16:09                           ` Jeff King
2018-11-12 19:04                             ` Stefan Beller
2018-11-22 17:42                               ` Jeff King
2018-11-12 18:48                           ` Stefan Beller
2018-11-12 14:50                       ` Jeff King [this message]
2018-11-12 19:24                         ` [PATCH 7/9] object-store: provide helpers for loose_objects_cache René Scharfe
2018-11-12 20:16                           ` Jeff King
2018-11-12 14:54                       ` [PATCH 8/9] sha1-file: use loose object cache for quick existence check Jeff King
2018-11-12 16:00                         ` Derrick Stolee
2018-11-12 16:01                         ` Ævar Arnfjörð Bjarmason
2018-11-12 16:21                           ` Jeff King
2018-11-12 22:18                             ` Ævar Arnfjörð Bjarmason
2018-11-12 22:30                               ` Ævar Arnfjörð Bjarmason
2018-11-13 10:02                                 ` Ævar Arnfjörð Bjarmason
2018-11-14 18:21                                   ` René Scharfe
2018-12-02 10:52                                   ` René Scharfe
2018-12-03 22:04                                     ` Jeff King
2018-12-04 21:45                                       ` René Scharfe
2018-12-05  4:46                                         ` Jeff King
2018-12-05  6:02                                           ` René Scharfe
2018-12-05  6:51                                             ` Jeff King
2018-12-05  8:15                                               ` Jeff King
2018-12-05 18:41                                                 ` René Scharfe
2018-12-05 20:17                                                   ` Jeff King
2018-11-12 22:44                             ` Geert Jansen
2018-11-27 20:48                         ` René Scharfe
2018-12-01 19:49                           ` Jeff King
2018-11-12 14:55                       ` [PATCH 9/9] fetch-pack: drop custom loose object cache Jeff King
2018-11-12 19:25                         ` René Scharfe
2018-11-12 19:32                           ` Ævar Arnfjörð Bjarmason
2018-11-12 20:07                             ` Jeff King
2018-11-12 20:13                             ` René Scharfe
2018-11-12 16:02                       ` [PATCH 0/9] caching loose objects Derrick Stolee
2018-11-12 19:10                         ` Stefan Beller
2018-11-09 13:43                   ` [RFC PATCH] index-pack: improve performance on NFS Ævar Arnfjörð Bjarmason
2018-11-09 16:08                     ` Duy Nguyen
2018-11-10 14:04                       ` Ævar Arnfjörð Bjarmason
2018-11-12 14:34                         ` Jeff King
2018-11-12 22:58                     ` Geert Jansen
2018-10-27 14:04         ` Duy Nguyen
2018-10-29 15:18           ` Jeff King
2018-10-29  0:48         ` Junio C Hamano
2018-10-29 15:20           ` Jeff King
2018-10-29 18:43             ` Ævar Arnfjörð Bjarmason
2018-10-29 21:34           ` Geert Jansen
2018-10-29 21:50             ` Jeff King
2018-10-29 22:21               ` Geert Jansen
2018-10-29 22:27             ` Jeff King
2018-10-29 22:35               ` Stefan Beller
2018-10-29 23:29                 ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181112145056.GG7400@sigill.intra.peff.net \
    --to=peff@peff.net \
    --cc=avarab@gmail.com \
    --cc=gerardu@amazon.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=l.s.r@web.de \
    --cc=tikuta@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).