git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Berg <johannes@sipsolutions.net>
To: git@vger.kernel.org
Cc: bup-list@googlegroups.com
Subject: [RFC PATCH] multi-pack-index: allow operating without pack files
Date: Fri, 20 Aug 2021 21:55:58 +0200	[thread overview]
Message-ID: <20210820195558.44275-1-johannes@sipsolutions.net> (raw)

Technically, multi-pack-index doesn't need pack files to exist,
but add_packed_git() today checks whether it exists or not.

In bup, a git pack format based backup tool, we'd really like
to take advantage of the multi-pack-index, since bup needs it
to save new objects to the repository efficiently (to check if
something already exists), and uses git to access the repo, so
the multi-pack-index can make more efficient.

Alternatively, bup has its own 'midx' format, of which multiple
can exist in a repository, predating the multi-pack-index.

All of this works well as long as the bup repository is just a
normal git repository. However, I've been adding encrypted and
encrypted remote repositories to bup, where the pack files are
not local, similar to promisor remotes, but not really done in
the same way.

In this case, the local storage is only the idx files, no pack
files (it's just a cache), and we access the pack files and
objects within in different ways. Unfortunately, in this case
we also cannot reuse bup's midx format very well: it only has
information on which objects exists, not where to find them,
and so reading from the repository requires reading all of the
idx files, something that git's multi-pack-index solves.

While we'll need to add read access to git's multi-pack-index
to bup, having a call to 'git multi-pack-index' write it would
be nice and save some duplication. However, in the case of the
remote/encrypted repositories, git currently cannot do that as
it requires the pack files to exist.

Add a command-line option to be able to not require pack files
to exist, to make that easier (rather than requiring writing
some dummy pack files, git even accepts empty files.)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 Documentation/git-multi-pack-index.txt |  6 +++++-
 builtin/multi-pack-index.c             |  5 ++++-
 midx.c                                 |  9 ++++++---
 midx.h                                 |  1 +
 packfile.c                             | 10 ++++++++--
 packfile.h                             |  2 ++
 6 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt
index ffd601bc17b4..23db70fbebc2 100644
--- a/Documentation/git-multi-pack-index.txt
+++ b/Documentation/git-multi-pack-index.txt
@@ -10,7 +10,7 @@ SYNOPSIS
 --------
 [verse]
 'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress]
-	[--preferred-pack=<pack>] <subcommand>
+	<subcommand> [<subcommand options>]
 
 DESCRIPTION
 -----------
@@ -40,6 +40,10 @@ write::
 		multiple packs contain the same object. If not given,
 		ties are broken in favor of the pack with the lowest
 		mtime.
+
+	--no-require-packs::
+		Don't require pack files to exist, useful only for
+		certain non-repository caches.
 --
 
 verify::
diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
index 8ff0dee2ecbb..2c9293b20c49 100644
--- a/builtin/multi-pack-index.c
+++ b/builtin/multi-pack-index.c
@@ -7,7 +7,7 @@
 #include "object-store.h"
 
 #define BUILTIN_MIDX_WRITE_USAGE \
-	N_("git multi-pack-index [<options>] write [--preferred-pack=<pack>]")
+	N_("git multi-pack-index [<options>] write [--preferred-pack=<pack>] [--no-require-packs]")
 
 #define BUILTIN_MIDX_VERIFY_USAGE \
 	N_("git multi-pack-index [<options>] verify")
@@ -68,6 +68,9 @@ static int cmd_multi_pack_index_write(int argc, const char **argv)
 		OPT_STRING(0, "preferred-pack", &opts.preferred_pack,
 			   N_("preferred-pack"),
 			   N_("pack for reuse when computing a multi-pack bitmap")),
+		OPT_BIT(0, "no-require-packs", &opts.flags,
+			N_("don't require pack files to exist"),
+			MIDX_DONT_REQUIRE_PACKS),
 		OPT_END(),
 	};
 
diff --git a/midx.c b/midx.c
index 902e1a7a7d9d..98b3cb33201f 100644
--- a/midx.c
+++ b/midx.c
@@ -468,6 +468,7 @@ struct write_midx_context {
 	uint32_t num_large_offsets;
 
 	int preferred_pack_idx;
+	unsigned flags;
 };
 
 static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -482,9 +483,10 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
 
 		ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
 
-		ctx->info[ctx->nr].p = add_packed_git(full_path,
-						      full_path_len,
-						      0);
+		ctx->info[ctx->nr].p = _add_packed_git(full_path,
+						       full_path_len,
+						       0,
+						       !(ctx->flags & MIDX_DONT_REQUIRE_PACKS));
 
 		if (!ctx->info[ctx->nr].p) {
 			warning(_("failed to add packfile '%s'"),
@@ -924,6 +926,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	ctx.nr = 0;
 	ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
 	ctx.info = NULL;
+	ctx.flags = flags;
 	ALLOC_ARRAY(ctx.info, ctx.alloc);
 
 	if (ctx.m) {
diff --git a/midx.h b/midx.h
index 8684cf0fefe8..aa6382d99386 100644
--- a/midx.h
+++ b/midx.h
@@ -41,6 +41,7 @@ struct multi_pack_index {
 
 #define MIDX_PROGRESS     (1 << 0)
 #define MIDX_WRITE_REV_INDEX (1 << 1)
+#define MIDX_DONT_REQUIRE_PACKS (1 << 2)
 
 char *get_midx_rev_filename(struct multi_pack_index *m);
 
diff --git a/packfile.c b/packfile.c
index 9ef6d9829280..dfe994205914 100644
--- a/packfile.c
+++ b/packfile.c
@@ -687,7 +687,8 @@ void unuse_pack(struct pack_window **w_cursor)
 	}
 }
 
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *_add_packed_git(const char *path, size_t path_len, int local,
+				   int require_pack)
 {
 	struct stat st;
 	size_t alloc;
@@ -717,7 +718,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 		p->pack_promisor = 1;
 
 	xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
-	if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
+	if (require_pack && (stat(p->pack_name, &st) || !S_ISREG(st.st_mode))) {
 		free(p);
 		return NULL;
 	}
@@ -734,6 +735,11 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 	return p;
 }
 
+struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+{
+	return _add_packed_git(path, path_len, local, 1);
+}
+
 void install_packed_git(struct repository *r, struct packed_git *pack)
 {
 	if (pack->pack_fd != -1)
diff --git a/packfile.h b/packfile.h
index 3ae117a8aef0..a921077a05ef 100644
--- a/packfile.h
+++ b/packfile.h
@@ -96,6 +96,8 @@ void close_object_store(struct raw_object_store *o);
 void unuse_pack(struct pack_window **);
 void clear_delta_base_cache(void);
 struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *_add_packed_git(const char *path, size_t path_len, int local,
+				   int require_pack);
 
 /*
  * Unlink the .pack and associated extension files.
-- 
2.31.1


             reply	other threads:[~2021-08-20 19:56 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-20 19:55 Johannes Berg [this message]
2021-08-23  0:34 ` [RFC PATCH] multi-pack-index: allow operating without pack files Derrick Stolee
2021-08-23  1:11   ` Martin Fick
2021-08-23  8:21     ` Johannes Berg
2021-08-23  4:05   ` Taylor Blau
2021-08-23  8:23     ` Johannes Berg
2021-08-23  9:22   ` Johannes Berg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210820195558.44275-1-johannes@sipsolutions.net \
    --to=johannes@sipsolutions.net \
    --cc=bup-list@googlegroups.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).