git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 4/8] Add read-cache--daemon for caching index and related stuff
Date: Tue, 13 May 2014 18:15:33 +0700	[thread overview]
Message-ID: <1399979737-8577-7-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1399979737-8577-1-git-send-email-pclouds@gmail.com>

The name of the shared memory folows the template "/git-index-<SHA1>"
where <SHA1> is the trailing SHA-1 of the index file. If such a shared
memory exists, it contains the same index content as on disk. The
content is already validated by the daemon. Note that it does not
necessarily use the same format as the on-disk version. The content
could be in a format that can be parsed much faster, or even reused
without parsing).

While preparing the shm object, the daemon would keep the shm object
"/git-index-<SHA1>.lock". After "git-index-<SHA1>" is ready, the
".lock" object is removed. A shared object must not be updated
afterwards. So if ".lock" does not exist, it's safe to assume that the
associated shm object is ready.

Other info could also by cached if it's tied to the index. For
example, name hash could be stored in "/git-namehash-<SHA1>"..

After Git writes a new index down, it may want to ask the daemon to
preload the new index so next time Git runs the index is already
validated and in memory. It does so by send a command to a UNIX socket
in $GIT_DIR/daemon/index.

Windows can use its named shared memory instead of POSIX shared memory
and probably named pipe in place of UNIX socket.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 .gitignore                                     |   1 +
 Documentation/git-read-cache--daemon.txt (new) |  27 ++++
 Makefile                                       |   6 +
 config.mak.uname                               |   1 +
 read-cache--daemon.c (new)                     | 207 +++++++++++++++++++++++++
 wrapper.c                                      |  14 ++
 6 files changed, 256 insertions(+)
 create mode 100644 Documentation/git-read-cache--daemon.txt
 create mode 100644 read-cache--daemon.c

diff --git a/.gitignore b/.gitignore
index 70992a4..07e0cb6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -110,6 +110,7 @@
 /git-pull
 /git-push
 /git-quiltimport
+/git-read-cache--daemon
 /git-read-tree
 /git-rebase
 /git-rebase--am
diff --git a/Documentation/git-read-cache--daemon.txt b/Documentation/git-read-cache--daemon.txt
new file mode 100644
index 0000000..1b05be4
--- /dev/null
+++ b/Documentation/git-read-cache--daemon.txt
@@ -0,0 +1,27 @@
+git-read-cache--daemon(1)
+=============
+
+NAME
+----
+git-daemon - A simple cache server for speeding up index file access
+
+SYNOPSIS
+--------
+[verse]
+'git daemon' [--detach]
+
+DESCRIPTION
+-----------
+Keep the index file in memory for faster access. This daemon is per
+repository. Note that core.useReadCacheDaemon must be set for Git to
+contact the daemon. This daemon is only available on POSIX system with
+shared memory support (e.g. Linux)
+
+OPTIONS
+-------
+--detach::
+	Detach from the shell.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index d0a2b4b..a44ab0b 100644
--- a/Makefile
+++ b/Makefile
@@ -1504,6 +1504,12 @@ ifdef HAVE_DEV_TTY
 	BASIC_CFLAGS += -DHAVE_DEV_TTY
 endif
 
+ifdef HAVE_SHM
+	BASIC_CFLAGS += -DHAVE_SHM
+	EXTLIBS += -lrt
+	PROGRAM_OBJS += read-cache--daemon.o
+endif
+
 ifdef DIR_HAS_BSD_GROUP_SEMANTICS
 	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
 endif
diff --git a/config.mak.uname b/config.mak.uname
index 23a8803..b6a37e5 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -33,6 +33,7 @@ ifeq ($(uname_S),Linux)
 	HAVE_PATHS_H = YesPlease
 	LIBC_CONTAINS_LIBINTL = YesPlease
 	HAVE_DEV_TTY = YesPlease
+	HAVE_SHM = YesPlease
 endif
 ifeq ($(uname_S),GNU/kFreeBSD)
 	NO_STRLCPY = YesPlease
diff --git a/read-cache--daemon.c b/read-cache--daemon.c
new file mode 100644
index 0000000..4531978
--- /dev/null
+++ b/read-cache--daemon.c
@@ -0,0 +1,207 @@
+#include "cache.h"
+#include "sigchain.h"
+#include "unix-socket.h"
+#include "split-index.h"
+#include "pkt-line.h"
+
+static char *socket_path;
+static struct strbuf shm_index = STRBUF_INIT;
+static struct strbuf shm_sharedindex = STRBUF_INIT;
+static struct strbuf shm_lock = STRBUF_INIT;
+static int lock_fd = -1;
+static int daemonized;
+
+static void cleanup_socket(void)
+{
+	if (daemonized)
+		return;
+	if (socket_path)
+		unlink(socket_path);
+	if (shm_index.len)
+		shm_unlink(shm_index.buf);
+	if (shm_sharedindex.len)
+		shm_unlink(shm_sharedindex.buf);
+	if (lock_fd != -1)
+		close(lock_fd);
+	if (shm_lock.len)
+		shm_unlink(shm_lock.buf);
+}
+
+static void cleanup_socket_on_signal(int sig)
+{
+	cleanup_socket();
+	sigchain_pop(sig);
+	raise(sig);
+}
+
+static int do_share_index(struct index_state *istate, struct strbuf *shm_path)
+{
+	struct strbuf sb = STRBUF_INIT;
+	void *map;
+	int fd;
+
+	strbuf_addf(&sb, "/git-index-%s", sha1_to_hex(istate->sha1));
+	fd = shm_open(sb.buf, O_RDWR | O_CREAT | O_EXCL, 0700);
+	if (fd < 0)
+		return -1;
+	if (shm_path->len) {
+		shm_unlink(shm_path->buf);
+		strbuf_reset(shm_path);
+	}
+	if (ftruncate(fd, istate->mmap_size)) {
+		close(fd);
+		shm_unlink(shm_path->buf);
+		return -1;
+	}
+	strbuf_addbuf(shm_path, &sb);
+	map = xmmap(NULL, istate->mmap_size, PROT_READ | PROT_WRITE,
+		    MAP_SHARED, fd, 0);
+	if (map == MAP_FAILED) {
+		close(fd);
+		shm_unlink(shm_path->buf);
+		return -1;
+	}
+	memcpy(map, istate->mmap, istate->mmap_size);
+	munmap(map, istate->mmap_size);
+	fchmod(fd, 0400);
+	close(fd);
+	return 0;
+}
+
+static void share_index(struct index_state *istate, struct strbuf *shm_path)
+{
+	if (shm_lock.len)
+		return;
+
+	strbuf_addf(&shm_lock, "/git-index-%s.lock", sha1_to_hex(istate->sha1));
+	lock_fd = shm_open(shm_lock.buf, O_CREAT | O_EXCL, 0700);
+	if (lock_fd < 0) {
+		strbuf_reset(&shm_lock);
+		return;
+	}
+	do_share_index(istate, shm_path);
+	close(lock_fd);
+	lock_fd = -1;
+	shm_unlink(shm_lock.buf);
+	strbuf_reset(&shm_lock);
+}
+
+static void refresh()
+{
+	the_index.keep_mmap = 1;
+	if (read_cache() < 0)
+		die("could not read index");
+	share_index(&the_index, &shm_index);
+	if (the_index.split_index &&
+	    the_index.split_index->base)
+		share_index(the_index.split_index->base, &shm_sharedindex);
+	discard_index(&the_index);
+}
+
+static void serve_one_client(int fd)
+{
+	char *buf = packet_read_line(fd, NULL);
+	if (!strcmp(buf, "refresh"))
+		refresh();
+	else
+		fprintf(stderr, "unrecognized command %s\n", buf);
+}
+
+static unsigned long next;
+static int serve_cache_loop(int fd)
+{
+	struct pollfd pfd;
+	unsigned long now = time(NULL);
+
+	if (now > next)
+		return 0;
+
+	pfd.fd = fd;
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, 1000 * (next - now)) < 0) {
+		if (errno != EINTR)
+			die_errno("poll failed");
+		return 1;
+	}
+
+	if (pfd.revents & POLLIN) {
+		int client = accept(fd, NULL, NULL);
+		if (client < 0) {
+			warning("accept failed: %s", strerror(errno));
+			return 1;
+		}
+		serve_one_client(client);
+		close(client);
+		next = now + 600;
+	}
+	return 1;
+}
+
+static void serve_cache(const char *socket_path, int detach)
+{
+	int fd;
+
+	fd = unix_stream_listen(socket_path);
+	if (fd < 0)
+		die_errno("unable to bind to '%s'", socket_path);
+
+	refresh();
+	if (detach && daemonize(&daemonized))
+		die_errno("unable to detach");
+
+	next = time(NULL) + 600;
+	while (serve_cache_loop(fd))
+		; /* nothing */
+
+	close(fd);
+	unlink(socket_path);
+}
+
+static void check_socket_directory(const char *path)
+{
+	struct stat st;
+	char *path_copy = xstrdup(path);
+	char *dir = dirname(path_copy);
+
+	if (!stat(dir, &st)) {
+		free(path_copy);
+		return;
+	}
+
+	/*
+	 * We must be sure to create the directory with the correct mode,
+	 * not just chmod it after the fact; otherwise, there is a race
+	 * condition in which somebody can chdir to it, sleep, then try to open
+	 * our protected socket.
+	 */
+	if (safe_create_leading_directories_const(dir) < 0)
+		die_errno("unable to create directories for '%s'", dir);
+	if (mkdir(dir, 0700) < 0)
+		die_errno("unable to mkdir '%s'", dir);
+	free(path_copy);
+}
+
+int main(int argc, const char **argv)
+{
+	int detach = 0;
+	switch (argc) {
+	case 1:
+		break;
+	case 2:
+		if (!strcmp(argv[1], "--detach"))
+			detach = 1;
+		else
+			die("unknown option %s", argv[1]);
+		break;
+	default:
+		die("unexpected argument number %d\n", argc);
+	}
+
+	setup_git_directory();
+	socket_path = git_pathdup("daemon/index");
+	check_socket_directory(socket_path);
+	atexit(cleanup_socket);
+	sigchain_push_common(cleanup_socket_on_signal);
+	serve_cache(socket_path, detach);
+	return 0;
+}
diff --git a/wrapper.c b/wrapper.c
index 0cc5636..4cd7415 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -455,3 +455,17 @@ struct passwd *xgetpwuid_self(void)
 		    errno ? strerror(errno) : _("no such user"));
 	return pw;
 }
+
+#ifndef HAVE_SHM
+int shm_open(const char *path, int flags, mode_t mode)
+{
+	errno = ENOSYS;
+	return -1;
+}
+
+int shm_unlink(const char *path)
+{
+	errno = ENOSYS;
+	return -1;
+}
+#endif
-- 
1.9.1.346.ga2b5940

  parent reply	other threads:[~2014-05-13 11:16 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-28 10:55 [PATCH 00/32] Split index mode for very large indexes Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 01/32] ewah: fix constness of ewah_read_mmap Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 02/32] ewah: delete unused ewah_read_mmap_native declaration Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 03/32] sequencer: do not update/refresh index if the lock cannot be held Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 04/32] read-cache: new API write_locked_index instead of write_index/write_cache Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 05/32] read-cache: relocate and unexport commit_locked_index() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 06/32] read-cache: store in-memory flags in the first 12 bits of ce_flags Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 07/32] read-cache: be strict about "changed" in remove_marked_cache_entries() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 08/32] read-cache: be specific what part of the index has changed Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 09/32] update-index: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 10/32] resolve-undo: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 11/32] unpack-trees: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 12/32] cache-tree: mark istate->cache_changed on cache tree invalidation Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 13/32] cache-tree: mark istate->cache_changed on cache tree update Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 14/32] cache-tree: mark istate->cache_changed on prime_cache_tree() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 15/32] entry.c: update cache_changed if refresh_cache is set in checkout_entry() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 16/32] read-cache: save index SHA-1 after reading Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 17/32] read-cache: split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 22:46   ` Junio C Hamano
2014-04-29  1:43     ` Duy Nguyen
2014-04-29 17:23       ` Junio C Hamano
2014-04-29 22:45         ` Duy Nguyen
2014-04-30 13:57           ` Junio C Hamano
2014-04-28 10:55 ` [PATCH 18/32] read-cache: mark new entries for split index Nguyễn Thái Ngọc Duy
2014-04-30 20:35   ` Eric Sunshine
2014-04-28 10:55 ` [PATCH 19/32] read-cache: save deleted entries in " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 20/32] read-cache: mark updated entries for " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 21/32] split-index: the writing part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 22/32] split-index: the reading part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 23/32] split-index: do not invalidate cache-tree at read time Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 24/32] split-index: strip pathname of on-disk replaced entries Nguyễn Thái Ngọc Duy
2014-04-29 20:25   ` Junio C Hamano
2014-04-28 10:55 ` [PATCH 25/32] update-index: new options to enable/disable split index mode Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 26/32] update-index --split-index: do not split if $GIT_DIR is read only Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 27/32] rev-parse: add --shared-index-path to get shared index path Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 28/32] read-tree: force split-index mode off on --index-output Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 29/32] read-tree: note about dropping split-index mode or index version Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 30/32] read-cache: force split index mode with GIT_TEST_SPLIT_INDEX Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 31/32] t2104: make sure split index mode is off for the version test Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 32/32] t1700: new tests for split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 21:18 ` [PATCH 00/32] Split index mode for very large indexes Shawn Pearce
2014-04-29  1:52   ` Duy Nguyen
2014-05-09 10:27   ` Duy Nguyen
2014-05-09 17:55     ` Junio C Hamano
2014-05-13 11:15   ` [PATCH 0/8] Speed up cache loading time Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 1/8] read-cache: allow to keep mmap'd memory after reading Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 2/3] Add read-cache--daemon Nguyễn Thái Ngọc Duy
2014-05-13 11:52       ` Erik Faye-Lund
2014-05-13 12:01         ` Duy Nguyen
2014-05-13 13:01         ` Duy Nguyen
2014-05-13 13:37           ` Erik Faye-Lund
2014-05-13 13:49             ` Duy Nguyen
2014-05-13 14:06               ` Erik Faye-Lund
2014-05-13 14:10                 ` Duy Nguyen
2014-05-13 14:16                   ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 2/8] unix-socket: stub impl. for platforms with no unix socket support Nguyễn Thái Ngọc Duy
2014-05-13 11:59       ` Erik Faye-Lund
2014-05-13 12:03         ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 3/8] daemonize: set a flag before exiting the main process Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 3/3] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` Nguyễn Thái Ngọc Duy [this message]
2014-05-13 11:56       ` [PATCH 4/8] Add read-cache--daemon for caching index and related stuff Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 5/8] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 12:13       ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 6/8] read-cache--daemon: do not read index " Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 7/8] read-cache: skip verifying trailing SHA-1 on cached index Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 8/8] read-cache: inform the daemon that the index has been updated Nguyễn Thái Ngọc Duy
2014-05-13 12:17       ` Erik Faye-Lund
2014-05-22 16:38       ` David Turner
2014-05-13 14:24     ` [PATCH 0/8] Speed up cache loading time Stefan Beller
2014-05-13 14:35       ` Duy Nguyen
2014-05-13 11:20   ` [PATCH 9/8] even faster loading time with index version 254 Nguyễn Thái Ngọc Duy
2014-04-28 22:23 ` [PATCH 00/32] Split index mode for very large indexes Junio C Hamano
2014-04-30 20:48 ` Richard Hansen
2014-05-01  0:09   ` Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1399979737-8577-7-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).