git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] write-tree performance problems
@ 2005-04-19 16:50 Chris Mason
  2005-04-19 17:36 ` Linus Torvalds
  0 siblings, 1 reply; 54+ messages in thread
From: Chris Mason @ 2005-04-19 16:50 UTC (permalink / raw)
  To: Linus Torvalds, git

[-- Attachment #1: Type: text/plain, Size: 1654 bytes --]

Hello everyone,

I did a quick experiment with applying/commit 100 patches from the suse kernel 
into a kernel git tree, which quilt can do in 2 seconds.  git needs 1m5s.

The primary performance problem during each commit is write-tree recalculating 
the hash of each directory, even though the contents of most directories are 
not changing.  I've attached a very quick and dirty mod of write-tree.c, it 
takes an optional tree id (sha1) and list of directories.  The hash of any 
directories not in the list are read in from existing files instead of being 
recalculated.

You have to pass each sub dir with a modified file.  So, if you change 
fs/super.c and fs/ext3/super.c, you would call "write-tree sha1 fs fs/ext3"
With this patch, the time to apply 100 commits goes down to 22 seconds.  It 
could be faster (and easier to use) if the index stored the hash of trees 
instead of just blobs, but that would be a larger change.

I was able to get the commit time down to 13 seconds by changing read-tree.c, 
update-cache.c and read-cache.c to store/read the index in tmpfs instead of 
on the local filesystem.  I haven't attached the patch for that, but it seems 
easiest to move .git/index into .git/index_dir/index, and let the user decide 
where to put index_dir.

Quick speed summary, apply/commit 100 patches
quilt push -a     :                    2s
git (unmodified):               1m5s
git (tree hash reduction)      22s
git (tree hash, tmpfs index) 13s

This patch is against pasky's tree from this morning, but also applies to 
linus' tree.  It's nasty stuff, but will hopefully get some discussion 
started on speeding things up.

-chris

[-- Attachment #2: fast-dirs.diff --]
[-- Type: text/x-diff, Size: 4526 bytes --]

--- a/write-tree.c
+++ b/write-tree.c
@@ -4,6 +4,8 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "cache.h"
+static char **dirs;
+static int num_dirs = 0;
 
 static int check_valid_sha1(unsigned char *sha1)
 {
@@ -27,15 +29,47 @@ static int prepend_integer(char *buffer,
 	return i;
 }
 
+static int find_sha(char *buffer, int size, const char *base, int baselen, char *returnsha1)
+{
+	while(size) {
+		int len = strlen(buffer)+1;
+		unsigned char *sha1 = buffer + len;
+		char *path = strchr(buffer, ' ')+1;
+		unsigned int mode;
+
+		if (size < len + 20 || sscanf(buffer, "%o", &mode) != 1)
+			die("corrupt 'tree' file");
+		buffer = sha1 + 20;
+		size -= len + 20;
+		if (strncmp(path, base, baselen) == 0 &&
+		    strlen(path) == baselen) {
+			memcpy(returnsha1, sha1, 20);
+			return 0;
+		}
+	}
+	return -1;
+}
+
 #define ORIG_OFFSET (40)	/* Enough space to add the header of "tree <size>\0" */
 
-static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1)
+static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1, char *treesha)
 {
 	unsigned char subdir_sha1[20];
 	unsigned long size, offset;
 	char *buffer;
 	int i, nr;
-
+	char *tree = NULL;
+	unsigned long tree_size;
+	char type[20];
+	if (treesha) {
+		tree = read_sha1_file(treesha, type, &tree_size);
+		if (!tree) {
+			die("unable to read sha1 file");
+		} else {
+		}
+		if (strcmp(type, "tree"))
+			die("expected a tree node");
+	}
 	/* Guess at some random initial size */
 	size = 8192;
 	buffer = malloc(size);
@@ -55,15 +89,60 @@ static int write_tree(struct cache_entry
 
 		sha1 = ce->sha1;
 		mode = ntohl(ce->ce_mode);
-
 		/* Do we have _further_ subdirectories? */
 		filename = pathname + baselen;
 		dirname = strchr(filename, '/');
 		if (dirname) {
 			int subdir_written;
-
-			subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1);
-			nr += subdir_written;
+			int dirlen = dirname - pathname;
+			int dirmatch = 1;
+			if (tree && num_dirs > 0) {
+				dirmatch = 0;
+				for(i = 0 ; i < num_dirs; i++) {
+					int len = strlen(dirs[i]);
+					if (len <= baselen)
+						continue;
+					if (memcmp(dirs[i], pathname, dirlen)==0 &&
+					    pathname[dirlen] == '/') {
+						dirmatch = 1;
+						break;
+					}
+				}
+				if (!dirmatch && find_sha(tree, tree_size, 
+							 filename, 
+							 dirname-filename, 
+							 subdir_sha1)) {
+					dirmatch = 1;
+				}
+			}
+			if (!dirmatch) {
+				/* eat all the entries in this dir */
+				while(++nr < maxentries) {
+					char *p;
+					ce = cachep[nr];
+					p = strchr(ce->name + baselen, '/');
+					if (!p)
+						break;
+					if (p - ce->name != dirname-pathname)
+						break;
+					if (memcmp(pathname, ce->name, p-ce->name))
+						break;
+				}
+			} else {
+				unsigned char thisdir_sha1[20];
+				char *p = thisdir_sha1;
+				if (num_dirs && tree) {
+				    if (find_sha(tree, tree_size, filename, 
+				                 dirname-filename, p)) {
+				    	num_dirs = 0;
+					p = NULL;
+				    }
+				} else {
+					p = NULL;
+				}
+				subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1, p);
+				nr += subdir_written;
+			}
 
 			/* Now we need to write out the directory entry into this tree.. */
 			mode = S_IFDIR;
@@ -92,9 +172,10 @@ static int write_tree(struct cache_entry
 	i = prepend_integer(buffer, offset - ORIG_OFFSET, ORIG_OFFSET);
 	i -= 5;
 	memcpy(buffer+i, "tree ", 5);
-
 	write_sha1_file(buffer + i, offset - i, returnsha1);
 	free(buffer);
+	if (tree)
+		free(tree);
 	return nr;
 }
 
@@ -103,7 +184,19 @@ int main(int argc, char **argv)
 	int i, unmerged;
 	int entries = read_cache();
 	unsigned char sha1[20];
+	unsigned char treesha1[20];
+	char *p = NULL;
 
+	if (argc > 1) {
+		if (argc < 3)
+			die("usage: write-tree [sha1 dir1 dir2 ...]");
+		num_dirs = argc - 2;
+		dirs = argv + 2;
+		if (get_sha1_hex(argv[1], treesha1) < 0)
+			die("bad sha1 given");
+		p = treesha1;
+
+	}
 	if (entries <= 0)
 		die("write-tree: no cache contents to write");
 
@@ -123,7 +216,7 @@ int main(int argc, char **argv)
 		die("write-tree: not able to write tree");
 
 	/* Ok, write it out */
-	if (write_tree(active_cache, entries, "", 0, sha1) != entries)
+	if (write_tree(active_cache, entries, "", 0, sha1, p) != entries)
 		die("write-tree: internal error");
 	printf("%s\n", sha1_to_hex(sha1));
 	return 0;

^ permalink raw reply	[flat|nested] 54+ messages in thread

end of thread, other threads:[~2005-04-20 22:26 UTC | newest]

Thread overview: 54+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-04-19 16:50 [PATCH] write-tree performance problems Chris Mason
2005-04-19 17:36 ` Linus Torvalds
2005-04-19 18:11   ` Chris Mason
2005-04-19 19:03     ` Linus Torvalds
2005-04-19 21:08       ` Chris Mason
2005-04-19 21:23         ` Linus Torvalds
2005-04-20  0:49           ` Chris Mason
2005-04-20  1:09             ` Linus Torvalds
2005-04-20  6:43             ` Linus Torvalds
2005-04-20  7:38               ` H. Peter Anvin
2005-04-20  9:08                 ` WARNING! Object DB conversion (was Re: [PATCH] write-tree performance problems) Linus Torvalds
2005-04-20 10:04                   ` Ingo Molnar
2005-04-20 12:11                   ` Jon Seymour
2005-04-20 13:24                     ` Martin Uecker
2005-04-20 13:35                       ` Morten Welinder
2005-04-20 13:41                       ` Jon Seymour
2005-04-20 14:30                       ` C. Scott Ananian
2005-04-20 15:19                         ` Martin Uecker
2005-04-20 15:28                           ` C. Scott Ananian
2005-04-20 15:57                             ` Martin Uecker
2005-04-20 16:33                               ` Martin Uecker
2005-04-20 13:30                   ` Blob chunking code. [First look.] C. Scott Ananian
2005-04-20 17:31                     ` Blob chunking code. [Second look] C. Scott Ananian
2005-04-20 14:13                   ` WARNING! Object DB conversion (was Re: [PATCH] write-tree performance problems) David Woodhouse
2005-04-20 14:59                     ` Linus Torvalds
2005-04-20 22:29                       ` David Woodhouse
     [not found]                   ` <2cfc4032050420050655265d3a@mail.gmail.com>
2005-04-20 14:29                     ` Linus Torvalds
2005-04-20 14:35                       ` C. Scott Ananian
2005-04-20 15:22               ` [PATCH] write-tree performance problems Chris Mason
2005-04-20 15:30                 ` C. Scott Ananian
2005-04-20 15:46                   ` Linus Torvalds
2005-04-20 15:52                     ` C. Scott Ananian
2005-04-20 16:21                       ` Linus Torvalds
2005-04-20 15:40                 ` Linus Torvalds
2005-04-20 16:10                   ` David Willmore
2005-04-20 16:33                   ` Linus Torvalds
2005-04-20 16:41                     ` Linus Torvalds
2005-04-20 16:37                   ` Chris Mason
2005-04-20 17:06                     ` Linus Torvalds
2005-04-20 17:23                       ` Chris Mason
2005-04-20 17:52                         ` Linus Torvalds
2005-04-20 19:04                           ` Chris Mason
2005-04-20 19:19                             ` Linus Torvalds
2005-04-20 19:47                               ` Linus Torvalds
2005-04-20 18:07                       ` David S. Miller
2005-04-19 22:09       ` David Lang
2005-04-19 22:21         ` Linus Torvalds
2005-04-19 23:00           ` David Lang
2005-04-19 23:09             ` Linus Torvalds
2005-04-19 23:42               ` David Lang
2005-04-19 23:59                 ` Linus Torvalds
2005-04-19 21:52                   ` Christopher Li
2005-04-19 18:51   ` Olivier Galibert
2005-04-19 22:47   ` C. Scott Ananian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).