git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 2/5] index-helper: new daemon for caching index and related stuff
Date: Tue, 10 Jun 2014 20:24:22 +0700	[thread overview]
Message-ID: <1402406665-27988-3-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1402406665-27988-1-git-send-email-pclouds@gmail.com>

The shared memory's name folows the template "git-<something>-<SHA1>"
where <SHA1> is the trailing SHA-1 of the index file. <something> is
"index" for caching index files. If such shared memory exists, it
contains the same index content as on disk. The content is already
validated by the daemon and git won't validate it again. Note that it
does not necessarily use the same format as the on-disk version. The
content could be in a format that can be parsed much faster, or even
reused without parsing).

Git can poke the daemon to tell it to refresh the index cache, or to
not exit for another some minutes via UNIX signals. It can't give any
real data directly to the daemon. Real data goes to disk first, then
the daemon reads and verifies it from there.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 .gitignore                               |   1 +
 Documentation/git-index-helper.txt (new) |  24 +++++
 Makefile                                 |   7 ++
 cache.h                                  |   1 +
 config.mak.uname                         |   1 +
 git-compat-util.h                        |   1 +
 index-helper.c (new)                     | 145 +++++++++++++++++++++++++++++++
 read-cache.c                             |  78 +++++++++++++++--
 shm.c (new)                              |  67 ++++++++++++++
 shm.h (new)                              |  23 +++++
 10 files changed, 341 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/git-index-helper.txt
 create mode 100644 index-helper.c
 create mode 100644 shm.c
 create mode 100644 shm.h

diff --git a/.gitignore b/.gitignore
index 70992a4..5a829dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /git-http-fetch
 /git-http-push
 /git-imap-send
+/git-index-helper
 /git-index-pack
 /git-init
 /git-init-db
diff --git a/Documentation/git-index-helper.txt b/Documentation/git-index-helper.txt
new file mode 100644
index 0000000..d0b1365
--- /dev/null
+++ b/Documentation/git-index-helper.txt
@@ -0,0 +1,24 @@
+git-index-helper(1)
+=============
+
+NAME
+----
+git-index-helper - A simple cache server for speeding up index file access
+
+SYNOPSIS
+--------
+[verse]
+'git index-helper
+
+DESCRIPTION
+-----------
+Keep the index file in memory for faster access. This daemon is per
+repository. This daemon is only available on POSIX system with
+shared memory support (e.g. Linux)
+
+OPTIONS
+-------
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index f7058a6..d42f3cc 100644
--- a/Makefile
+++ b/Makefile
@@ -886,6 +886,7 @@ LIB_OBJS += sha1-lookup.o
 LIB_OBJS += sha1_file.o
 LIB_OBJS += sha1_name.o
 LIB_OBJS += shallow.o
+LIB_OBJS += shm.o
 LIB_OBJS += sideband.o
 LIB_OBJS += sigchain.o
 LIB_OBJS += split-index.o
@@ -1498,6 +1499,12 @@ ifdef HAVE_DEV_TTY
 	BASIC_CFLAGS += -DHAVE_DEV_TTY
 endif
 
+ifdef HAVE_SHM
+	BASIC_CFLAGS += -DHAVE_SHM
+	EXTLIBS += -lrt
+	PROGRAM_OBJS += index-helper.o
+endif
+
 ifdef DIR_HAS_BSD_GROUP_SEMANTICS
 	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
 endif
diff --git a/cache.h b/cache.h
index 6549e02..f05e062 100644
--- a/cache.h
+++ b/cache.h
@@ -483,6 +483,7 @@ extern int is_index_unborn(struct index_state *);
 extern int read_index_unmerged(struct index_state *);
 #define COMMIT_LOCK		(1 << 0)
 #define CLOSE_LOCK		(1 << 1)
+#define REFRESH_DAEMON		(1 << 2)
 extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags);
 extern int discard_index(struct index_state *);
 extern int unmerged_index(const struct index_state *);
diff --git a/config.mak.uname b/config.mak.uname
index eee0fc2..8de61a4 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -39,6 +39,7 @@ ifeq ($(uname_S),Linux)
 	HAVE_PATHS_H = YesPlease
 	LIBC_CONTAINS_LIBINTL = YesPlease
 	HAVE_DEV_TTY = YesPlease
+	HAVE_SHM = YesPlease
 endif
 ifeq ($(uname_S),GNU/kFreeBSD)
 	NO_STRLCPY = YesPlease
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..a6ebecc 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -356,6 +356,7 @@ static inline const char *skip_prefix(const char *str, const char *prefix)
 #define PROT_READ 1
 #define PROT_WRITE 2
 #define MAP_PRIVATE 1
+#define MAP_SHARED 2
 #endif
 
 #define mmap git_mmap
diff --git a/index-helper.c b/index-helper.c
new file mode 100644
index 0000000..e10d0c3
--- /dev/null
+++ b/index-helper.c
@@ -0,0 +1,145 @@
+#include "cache.h"
+#include "parse-options.h"
+#include "sigchain.h"
+#include "split-index.h"
+#include "shm.h"
+
+static unsigned char cached_sha1[20];
+static unsigned char cached_shared_sha1[20];
+static void *shm_index, *shm_sharedindex;
+static size_t size_index, size_sharedindex;
+static int daemonized;
+
+static void cleanup(void)
+{
+	if (daemonized)
+		return;
+	unlink(git_path("read-cache--daemon.pid"));
+	if (shm_index) {
+		munmap(shm_index, size_index);
+		git_shm_unlink("git-index-%s", sha1_to_hex(cached_sha1));
+	}
+	if (shm_sharedindex) {
+		munmap(shm_sharedindex, size_sharedindex);
+		git_shm_unlink("git-index-%s", sha1_to_hex(cached_shared_sha1));
+	}
+}
+
+static void cleanup_on_signal(int sig)
+{
+	cleanup();
+	sigchain_pop(sig);
+	raise(sig);
+}
+
+static void share_index(struct index_state *istate, void **mmap,
+			size_t *length, unsigned char *sha1)
+{
+	void *new_mmap;
+	if (istate->mmap_size <= 20 ||
+	    hashcmp(istate->sha1,
+		    (unsigned char *)istate->mmap + istate->mmap_size - 20) ||
+	    !hashcmp(istate->sha1, sha1) ||
+	    git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, istate->mmap_size,
+			&new_mmap, PROT_READ | PROT_WRITE, MAP_SHARED,
+			"git-index-%s", sha1_to_hex(istate->sha1)) < 0)
+		return;
+
+	if (*mmap) {
+		munmap(*mmap, *length);
+		git_shm_unlink("git-index-%s", sha1_to_hex(sha1));
+	}
+
+	*length = istate->mmap_size;
+	*mmap = new_mmap;
+	hashcpy(sha1, istate->sha1);
+	memcpy(new_mmap, istate->mmap, istate->mmap_size - 20);
+
+	/*
+	 * The trailing hash must be written last after everything is
+	 * written. It's the indication that the shared memory is now
+	 * ready.
+	 */
+	hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, sha1);
+}
+
+static void refresh(int sig)
+{
+	the_index.keep_mmap = 1;
+	if (read_cache() < 0)
+		die("could not read index");
+	share_index(&the_index, &shm_index, &size_index, cached_sha1);
+	if (the_index.split_index && the_index.split_index->base)
+		share_index(the_index.split_index->base, &shm_sharedindex,
+			    &size_sharedindex, cached_shared_sha1);
+	discard_index(&the_index);
+}
+
+#ifdef HAVE_SHM
+
+static void do_nothing(int sig)
+{
+	/*
+	 * what we need is the signal received and interrupts
+	 * sleep(). We don't need to do anything else when receving
+	 * the signal
+	 */
+}
+
+static void loop(const char *pid_file, int idle_in_seconds)
+{
+	sigchain_push(SIGHUP, refresh);
+	sigchain_push(SIGUSR1, do_nothing);
+	refresh(0);
+	while (sleep(idle_in_seconds))
+		utime(git_path("read-cache--daemon.pid"), NULL);
+}
+
+#else
+
+static void loop(const char *pid_file, int idle_in_seconds)
+{
+}
+
+#endif
+
+static const char * const usage_text[] = {
+	"git read-cache--daemon [options]",
+	NULL
+};
+
+int main(int argc, char **argv)
+{
+	static struct lock_file lock;
+	struct strbuf sb = STRBUF_INIT;
+	const char *prefix;
+	int fd;
+	struct option options[] = {
+		OPT_END()
+	};
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(usage_text, options);
+	prefix = setup_git_directory();
+	if (parse_options(argc, (const char **)argv, prefix,
+			  options, usage_text, 0))
+		die("too many arguments");
+
+	fd = hold_lock_file_for_update(&lock,
+				       git_path("read-cache--daemon.pid"),
+				       LOCK_DIE_ON_ERROR);
+	strbuf_addf(&sb,
+		    "%" PRIuMAX, (uintmax_t) getpid());
+	write_in_full(fd, sb.buf, sb.len);
+	commit_lock_file(&lock);
+
+	atexit(cleanup);
+	sigchain_push(SIGINT,  cleanup_on_signal);
+	sigchain_push(SIGTERM, cleanup_on_signal);
+	sigchain_push(SIGQUIT, cleanup_on_signal);
+	sigchain_push(SIGPIPE, cleanup_on_signal);
+
+	loop(sb.buf, 600);
+	strbuf_release(&sb);
+	return 0;
+}
diff --git a/read-cache.c b/read-cache.c
index a5031f3..f9df984 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -16,6 +16,7 @@
 #include "varint.h"
 #include "split-index.h"
 #include "sigchain.h"
+#include "shm.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -1332,6 +1333,8 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 	hdr_version = ntohl(hdr->hdr_version);
 	if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version)
 		return error("bad index version %d", hdr_version);
+	if (!size)
+		return 0;
 	git_SHA1_Init(&c);
 	git_SHA1_Update(&c, hdr, size - 20);
 	git_SHA1_Final(sha1, &c);
@@ -1462,6 +1465,58 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
 	return ce;
 }
 
+static void poke_daemon(struct stat *st, int refresh_cache)
+{
+	int fd;
+	struct strbuf sb;
+
+	if (st->st_mtime + 600 < time(NULL))
+		return;	  /* don't try to read from stale .pid file */
+
+	fd = open(git_path("read-cache--daemon.pid"), O_RDONLY);
+	if (fd < 0)
+		return;
+	strbuf_init(&sb, st->st_size);
+	if (read_in_full(fd, sb.buf, st->st_size) == st->st_size) {
+#ifdef HAVE_SHM
+		char *end = NULL;
+		pid_t pid = strtoul(sb.buf, &end, 10);
+		if (end && !*end)
+			kill(pid, refresh_cache ? SIGHUP : SIGUSR1);
+#endif
+	}
+	close(fd);
+	strbuf_release(&sb);
+}
+
+static void *try_shm(struct index_state *istate, void *mmap, size_t *mmap_size)
+{
+	void *new_mmap = NULL;
+	size_t old_size = *mmap_size;
+	ssize_t new_length;
+	const unsigned char *sha1;
+	struct stat st;
+
+	if (old_size <= 20)
+		return mmap;
+	sha1 = (unsigned char *)mmap + old_size - 20;
+	if (stat(git_path("read-cache--daemon.pid"), &st))
+		return mmap;
+	new_length = git_shm_map(O_RDONLY, 0700, -1, &new_mmap,
+				 PROT_READ, MAP_SHARED,
+				 "git-index-%s", sha1_to_hex(sha1));
+	if (new_length <= 20 ||
+	    hashcmp((unsigned char *)mmap + old_size - 20,
+		    (unsigned char *)new_mmap + new_length - 20)) {
+		if (new_mmap)
+			munmap(new_mmap, new_length);
+		return mmap;
+	}
+	poke_daemon(&st, 0);
+	*mmap_size = new_length;
+	return new_mmap;
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -1469,8 +1524,8 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	struct stat st;
 	unsigned long src_offset;
 	struct cache_header *hdr;
-	void *mmap;
-	size_t mmap_size;
+	void *mmap, *old_mmap;
+	size_t mmap_size, old_mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
 	if (istate->initialized)
@@ -1501,9 +1556,14 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	}
 	close(fd);
 
-	hdr = mmap;
-	if (verify_hdr(hdr, mmap_size) < 0)
+	old_mmap      = mmap;
+	old_mmap_size = mmap_size;
+	mmap	      = try_shm(istate, mmap, &mmap_size);
+	hdr	      = mmap;
+	if (mmap == old_mmap && verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
+	if (mmap != old_mmap)
+		munmap(old_mmap, old_mmap_size);
 
 	hashcpy(istate->sha1, (const unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
@@ -2006,9 +2066,13 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
 		return ret;
 	assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
 	       (COMMIT_LOCK | CLOSE_LOCK));
-	if (flags & COMMIT_LOCK)
-		return commit_locked_index(lock);
-	else if (flags & CLOSE_LOCK)
+	if (flags & COMMIT_LOCK) {
+		struct stat st;
+		if (!commit_locked_index(lock) &&
+		    !stat(git_path("read-cache--daemon.pid"), &st))
+			poke_daemon(&st, 1);
+		return ret;
+	} else if (flags & CLOSE_LOCK)
 		return close_lock_file(lock);
 	else
 		return ret;
diff --git a/shm.c b/shm.c
new file mode 100644
index 0000000..4ec1a00
--- /dev/null
+++ b/shm.c
@@ -0,0 +1,67 @@
+#include "git-compat-util.h"
+#include "shm.h"
+
+#ifdef HAVE_SHM
+
+#define SHM_PATH_LEN 72		/* we don't create very long paths.. */
+
+ssize_t git_shm_map(int oflag, int perm, ssize_t length, void **mmap,
+		    int prot, int flags, const char *fmt, ...)
+{
+	va_list ap;
+	char path[SHM_PATH_LEN];
+	int fd;
+
+	path[0] = '/';
+	va_start(ap, fmt);
+	vsprintf(path + 1, fmt, ap);
+	va_end(ap);
+	fd = shm_open(path, oflag, perm);
+	if (fd < 0)
+		return -1;
+	if (length > 0 && ftruncate(fd, length)) {
+		shm_unlink(path);
+		close(fd);
+		return -1;
+	}
+	if (length < 0 && !(oflag & O_CREAT)) {
+		struct stat st;
+		if (fstat(fd, &st))
+			die_errno("unable to stat %s", path);
+		length = st.st_size;
+	}
+	*mmap = xmmap(NULL, length, prot, flags, fd, 0);
+	close(fd);
+	if (*mmap == MAP_FAILED) {
+		*mmap = NULL;
+		shm_unlink(path);
+		return -1;
+	}
+	return length;
+}
+
+void git_shm_unlink(const char *fmt, ...)
+{
+	va_list ap;
+	char path[SHM_PATH_LEN];
+
+	path[0] = '/';
+	va_start(ap, fmt);
+	vsprintf(path + 1, fmt, ap);
+	va_end(ap);
+	shm_unlink(path);
+}
+
+#else
+
+ssize_t git_shm_map(int oflag, int perm, ssize_t length, void **mmap,
+		    int prot, int flags, const char *fmt, ...)
+{
+	return -1;
+}
+
+void git_shm_unlink(const char *fmt, ...)
+{
+}
+
+#endif
diff --git a/shm.h b/shm.h
new file mode 100644
index 0000000..798d3fd
--- /dev/null
+++ b/shm.h
@@ -0,0 +1,23 @@
+#ifndef SHM_H
+#define SHM_H
+
+/*
+ * Create or open a shared memory and mmap it. Return mmap size if
+ * successful, -1 otherwise. If successful mmap contains the mmap'd
+ * pointer. If oflag does not contain O_CREAT and length is negative,
+ * the mmap size is retrieved from existing shared memory object.
+ *
+ * The mmap could be freed by munmap, even on Windows. Note that on
+ * Windows, git_shm_unlink() is no-op, so the last unmap will destroy
+ * the shared memory.
+ */
+ssize_t git_shm_map(int oflag, int perm, ssize_t length, void **mmap,
+		    int prot, int flags, const char *fmt, ...);
+
+/*
+ * Unlink a shared memory object. Only needed on POSIX platforms. On
+ * Windows this is no-op.
+ */
+void git_shm_unlink(const char *fmt, ...);
+
+#endif
-- 
1.9.1.346.ga2b5940

  parent reply	other threads:[~2014-06-10 13:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-10 13:24 [PATCH v2 0/5] Speed up cache loading time Nguyễn Thái Ngọc Duy
2014-06-10 13:24 ` [PATCH 1/5] read-cache: allow to keep mmap'd memory after reading Nguyễn Thái Ngọc Duy
2014-06-10 13:24 ` Nguyễn Thái Ngọc Duy [this message]
2014-06-10 18:44   ` [PATCH 2/5] index-helper: new daemon for caching index and related stuff David Turner
2014-06-10 13:24 ` [PATCH 3/5] index-helper: add Windows support Nguyễn Thái Ngọc Duy
2014-06-10 13:24 ` [PATCH 4/5] daemonize(): set a flag before exiting the main process Nguyễn Thái Ngọc Duy
2014-06-10 13:24 ` [PATCH 5/5] index-helper: add --detach Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1402406665-27988-3-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).