git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Create object subdirectories on demand
@ 2005-10-06 23:23 Linus Torvalds
  2005-10-07  9:38 ` Junio C Hamano
  2005-10-08  1:45 ` Create object subdirectories on demand Daniel Barkalow
  0 siblings, 2 replies; 6+ messages in thread
From: Linus Torvalds @ 2005-10-06 23:23 UTC (permalink / raw)
  To: Git Mailing List, Junio C Hamano


This makes it possible to have a "sparse" git object subdirectory 
structure, something that has become much more attractive now that people 
use pack-files all the time.

As a result of pack-files, a git object directory doesn't necessarily have 
any individual objects lying around, and in that case it's just wasting 
space to keep the empty first-level object directories around: on many 
filesystems the 256 empty directories will be aboue 1MB of diskspace.

Even more importantly, after you re-pack a project that _used_ to be 
unpacked, you could be left with huge directories that no longer contain 
anything, but that waste space and take time to look through.

With this change, "git prune-packed" can just do an rmdir() on the 
directories, and they'll get removed if empty, and re-created on demand.

This patch also tries to fix up "write_sha1_from_fd()" to use the new 
common infrastructure for creating the object files, closing a hole where 
we might otherwise leave half-written objects in the object database.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---


This has _not_ gotten a lot of testing, but I verified that basic things 
seem to work, and that packing an archive properly removes the unnecessary 
subdirectories.

I'd suggest it sit in "pu" for a while.

The primary reason for this was that a big git archive (the historic one) 
ends up with lots of objects, and the object subdirectories end up being 
about 150kB in size. Even repacking the archive did nothing for it - you 
still ended up with 256 of these monster directories, never mind that they 
were empty. Walking through them in the cold-cache case was actually 
really painful - it would walk through about 40MB of disk very very 
inefficiently, for no actual gain.

However, somebody should really check my code carefully before merging 
this. In particular, I didn't test "git-ssh-pull" at all, so I'm not sure 
I actually fixed the "write temp-file" thing properly.


diff --git a/daemon.c b/daemon.c
--- a/daemon.c
+++ b/daemon.c
@@ -142,7 +142,7 @@ static int upload(char *dir, int dirlen)
 	 * is ok with us doing this.
 	 */
 	if ((!export_all_trees && access("git-daemon-export-ok", F_OK)) ||
-	    access("objects/00", X_OK) ||
+	    access("objects/", X_OK) ||
 	    access("HEAD", R_OK)) {
 		logerror("Not a valid git-daemon-enabled repository: '%s'", dir);
 		return -1;
diff --git a/fsck-objects.c b/fsck-objects.c
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -329,9 +329,8 @@ static int fsck_dir(int i, char *path)
 	DIR *dir = opendir(path);
 	struct dirent *de;
 
-	if (!dir) {
-		return error("missing sha1 directory '%s'", path);
-	}
+	if (!dir)
+		return 0;
 
 	while ((de = readdir(dir)) != NULL) {
 		char name[100];
diff --git a/git-rename.perl b/git-rename.perl
--- a/git-rename.perl
+++ b/git-rename.perl
@@ -15,7 +15,7 @@ sub usage($);
 my $GIT_DIR = $ENV{'GIT_DIR'} || ".git";
 
 unless ( -d $GIT_DIR && -d $GIT_DIR . "/objects" && 
-	-d $GIT_DIR . "/objects/00" && -d $GIT_DIR . "/refs") {
+	-d $GIT_DIR . "/objects/" && -d $GIT_DIR . "/refs") {
 	usage("Git repository not found.");
 }
 
diff --git a/git-sh-setup.sh b/git-sh-setup.sh
--- a/git-sh-setup.sh
+++ b/git-sh-setup.sh
@@ -22,4 +22,4 @@ refs/*)	: ;;
 *)	false ;;
 esac &&
 [ -d "$GIT_DIR/refs" ] &&
-[ -d "$GIT_OBJECT_DIRECTORY/00" ]
+[ -d "$GIT_OBJECT_DIRECTORY/" ]
diff --git a/init-db.c b/init-db.c
--- a/init-db.c
+++ b/init-db.c
@@ -244,10 +244,6 @@ int main(int argc, char **argv)
 	memcpy(path, sha1_dir, len);
 
 	safe_create_dir(sha1_dir);
-	for (i = 0; i < 256; i++) {
-		sprintf(path+len, "/%02x", i);
-		safe_create_dir(path);
-	}
 	strcpy(path+len, "/pack");
 	safe_create_dir(path);
 	strcpy(path+len, "/info");
diff --git a/prune-packed.c b/prune-packed.c
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -26,6 +26,8 @@ static void prune_dir(int i, DIR *dir, c
 		else if (unlink(pathname) < 0)
 			error("unable to unlink %s", pathname);
 	}
+	pathname[len] = 0;
+	rmdir(pathname);
 }
 
 static void prune_packed_objects(void)
@@ -46,7 +48,7 @@ static void prune_packed_objects(void)
 		sprintf(pathname + len, "%02x/", i);
 		d = opendir(pathname);
 		if (!d)
-			die("unable to open %s", pathname);
+			continue;
 		prune_dir(i, d, pathname, len + 3);
 		closedir(d);
 	}
diff --git a/sha1_file.c b/sha1_file.c
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1248,6 +1248,73 @@ char *write_sha1_file_prepare(void *buf,
 	return sha1_file_name(sha1);
 }
 
+/*
+ * Link the tempfile to the final place, possibly creating the
+ * last directory level as you do so.
+ *
+ * Returns the errno on failure, 0 on success.
+ */
+static int link_temp_to_file(const char *tmpfile, char *filename)
+{
+	int ret;
+
+	if (!link(tmpfile, filename))
+		return 0;
+
+	/*
+	 * Try to mkdir the last path component if that failed
+	 * with an ENOENT.
+	 *
+	 * Re-try the "link()" regardless of whether the mkdir
+	 * succeeds, since a race might mean that somebody
+	 * else succeeded.
+	 */
+	ret = errno;
+	if (ret == ENOENT) {
+		char *dir = strrchr(filename, '/');
+		if (dir) {
+			*dir = 0;
+			mkdir(filename, 0777);
+			*dir = '/';
+			if (!link(tmpfile, filename))
+				return 0;
+			ret = errno;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Move the just written object into its final resting place
+ */
+static int move_temp_to_file(const char *tmpfile, char *filename)
+{
+	int ret = link_temp_to_file(tmpfile, filename);
+	if (ret) {
+		/*
+		 * Coda hack - coda doesn't like cross-directory links,
+		 * so we fall back to a rename, which will mean that it
+		 * won't be able to check collisions, but that's not a
+		 * big deal.
+		 *
+		 * When this succeeds, we just return 0. We have nothing
+		 * left to unlink.
+		 */
+		if (ret == EXDEV && !rename(tmpfile, filename))
+			return 0;
+	}
+	unlink(tmpfile);
+	if (ret) {
+		if (ret != EEXIST) {
+			fprintf(stderr, "unable to write sha1 filename %s: %s", filename, strerror(ret));
+			return -1;
+		}
+		/* FIXME!!! Collision check here ? */
+	}
+
+	return 0;
+}
+
 int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
 {
 	int size;
@@ -1257,7 +1324,7 @@ int write_sha1_file(void *buf, unsigned 
 	char *filename;
 	static char tmpfile[PATH_MAX];
 	unsigned char hdr[50];
-	int fd, hdrlen, ret;
+	int fd, hdrlen;
 
 	/* Normally if we have it in the pack then we do not bother writing
 	 * it out into .git/objects/??/?{38} file.
@@ -1320,32 +1387,7 @@ int write_sha1_file(void *buf, unsigned 
 	close(fd);
 	free(compressed);
 
-	ret = link(tmpfile, filename);
-	if (ret < 0) {
-		ret = errno;
-
-		/*
-		 * Coda hack - coda doesn't like cross-directory links,
-		 * so we fall back to a rename, which will mean that it
-		 * won't be able to check collisions, but that's not a
-		 * big deal.
-		 *
-		 * When this succeeds, we just return 0. We have nothing
-		 * left to unlink.
-		 */
-		if (ret == EXDEV && !rename(tmpfile, filename))
-			return 0;
-	}
-	unlink(tmpfile);
-	if (ret) {
-		if (ret != EEXIST) {
-			fprintf(stderr, "unable to write sha1 filename %s: %s", filename, strerror(ret));
-			return -1;
-		}
-		/* FIXME!!! Collision check here ? */
-	}
-
-	return 0;
+	return move_temp_to_file(tmpfile, filename);
 }
 
 int write_sha1_to_fd(int fd, const unsigned char *sha1)
@@ -1420,8 +1462,7 @@ int write_sha1_to_fd(int fd, const unsig
 int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
 		       size_t bufsize, size_t *bufposn)
 {
-	char *filename = sha1_file_name(sha1);
-
+	char tmpfile[PATH_MAX];
 	int local;
 	z_stream stream;
 	unsigned char real_sha1[20];
@@ -1429,10 +1470,11 @@ int write_sha1_from_fd(const unsigned ch
 	int ret;
 	SHA_CTX c;
 
-	local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
+	snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory());
 
+	local = mkstemp(tmpfile);
 	if (local < 0)
-		return error("Couldn't open %s\n", filename);
+		return error("Couldn't open %s for %s\n", tmpfile, sha1_to_hex(sha1));
 
 	memset(&stream, 0, sizeof(stream));
 
@@ -1462,7 +1504,7 @@ int write_sha1_from_fd(const unsigned ch
 		size = read(fd, buffer + *bufposn, bufsize - *bufposn);
 		if (size <= 0) {
 			close(local);
-			unlink(filename);
+			unlink(tmpfile);
 			if (!size)
 				return error("Connection closed?");
 			perror("Reading from connection");
@@ -1475,15 +1517,15 @@ int write_sha1_from_fd(const unsigned ch
 	close(local);
 	SHA1_Final(real_sha1, &c);
 	if (ret != Z_STREAM_END) {
-		unlink(filename);
+		unlink(tmpfile);
 		return error("File %s corrupted", sha1_to_hex(sha1));
 	}
 	if (memcmp(sha1, real_sha1, 20)) {
-		unlink(filename);
+		unlink(tmpfile);
 		return error("File %s has bad hash\n", sha1_to_hex(sha1));
 	}
-	
-	return 0;
+
+	return move_temp_to_file(tmpfile, sha1_file_name(sha1));
 }
 
 int has_pack_index(const unsigned char *sha1)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Create object subdirectories on demand
  2005-10-06 23:23 Create object subdirectories on demand Linus Torvalds
@ 2005-10-07  9:38 ` Junio C Hamano
  2005-10-07 14:56   ` Linus Torvalds
  2005-10-07 16:30   ` [PATCH] Create object subdirectories on demand (phase II) Junio C Hamano
  2005-10-08  1:45 ` Create object subdirectories on demand Daniel Barkalow
  1 sibling, 2 replies; 6+ messages in thread
From: Junio C Hamano @ 2005-10-07  9:38 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

Linus Torvalds <torvalds@osdl.org> writes:

> This has _not_ gotten a lot of testing, but I verified that basic things 
> seem to work, and that packing an archive properly removes the unnecessary 
> subdirectories.
>
> I'd suggest it sit in "pu" for a while.

I typically use the tip of "pu" myself, which resulted in an
interesting case (nothing grave).  After running 'git prune', I
switched to another topic branch that did not include this
patch, built it, and tried it out -- and got complaint because
the earlier 'git prune' happened to remove ".git/objects/00".
Now I was not in a valid git repository anymore ;-).

This needs to wait until everybody's git get this update,
especially, I cannot use this version in my $HOME/bin/ on
kernel.org right now.

But then once everybody else updates, the repository their git
creates cannot be read by my git -- an interesting chicken and
egg problem.

Maybe successful rmdir() immediately followed by mkdir(), if we
are willing to waste 4KB or so per empty directory, trading
space for safety and ease of transition?  That is, phase I tools
do not complain if objects/00 is missing, lazily creates
object/??/ is missing, and makes sure empty directories are
pruned but still recreates them for safety.  Then phase II tools
then stops the recreating part.

> However, somebody should really check my code carefully before merging 
> this. In particular, I didn't test "git-ssh-pull" at all, so I'm not sure 
> I actually fixed the "write temp-file" thing properly.

Well, the thing is, I do not use commit walkers over ssh myself
and honestly consider them having outlived their usefulness.
But you are right -- I should look at this one again before
placing in the master.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Create object subdirectories on demand
  2005-10-07  9:38 ` Junio C Hamano
@ 2005-10-07 14:56   ` Linus Torvalds
  2005-10-07 16:30   ` [PATCH] Create object subdirectories on demand (phase II) Junio C Hamano
  1 sibling, 0 replies; 6+ messages in thread
From: Linus Torvalds @ 2005-10-07 14:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git



On Fri, 7 Oct 2005, Junio C Hamano wrote:
> 
> But then once everybody else updates, the repository their git
> creates cannot be read by my git -- an interesting chicken and
> egg problem.

Well, you can do it in two (or three) phases:

 (a) remove the tests for "GIT_DIR/objects/00" from the "is this a 
     git dir" tests and allow git-fsck-objects to skip missing 
     directories.

This makes read-only operations work fine in sparse directories

 (b) apply my sha1_file.c part of the patch that knows how to do the mkdir 
     and also fixes (untested!) the write_sha1_from_fd() problem.

This makes all operations work fine in sparse object directories

 (c) remove the "for(i = 0; i < 256; i++) mkdir(i)" thing from git-init-db 
     and add the "rmdir()" to git-prune-packed.

This is the part that actually generates the sparse object directories.

So if you apply (a+b) to mainline, and wait with (c) until a later date, 
at least we can then do (c) at any point. No hurry.

Appended is a suggested split, if you want it explicitly. I haven't 
actually tested this split and did it by hand, but it looks correct.

			Linus

---- stage (a) ----

Phase 1: don't require the object subdirectories to be there if empty

diff --git a/daemon.c b/daemon.c
--- a/daemon.c
+++ b/daemon.c
@@ -142,7 +142,7 @@ static int upload(char *dir, int dirlen)
 	 * is ok with us doing this.
 	 */
 	if ((!export_all_trees && access("git-daemon-export-ok", F_OK)) ||
-	    access("objects/00", X_OK) ||
+	    access("objects/", X_OK) ||
 	    access("HEAD", R_OK)) {
 		logerror("Not a valid git-daemon-enabled repository: '%s'", dir);
 		return -1;
diff --git a/fsck-objects.c b/fsck-objects.c
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -329,9 +329,8 @@ static int fsck_dir(int i, char *path)
 	DIR *dir = opendir(path);
 	struct dirent *de;
 
-	if (!dir) {
-		return error("missing sha1 directory '%s'", path);
-	}
+	if (!dir)
+		return 0;
 
 	while ((de = readdir(dir)) != NULL) {
 		char name[100];
diff --git a/git-rename.perl b/git-rename.perl
--- a/git-rename.perl
+++ b/git-rename.perl
@@ -15,7 +15,7 @@ sub usage($);
 my $GIT_DIR = $ENV{'GIT_DIR'} || ".git";
 
 unless ( -d $GIT_DIR && -d $GIT_DIR . "/objects" && 
-	-d $GIT_DIR . "/objects/00" && -d $GIT_DIR . "/refs") {
+	-d $GIT_DIR . "/objects/" && -d $GIT_DIR . "/refs") {
 	usage("Git repository not found.");
 }
 
diff --git a/git-sh-setup.sh b/git-sh-setup.sh
--- a/git-sh-setup.sh
+++ b/git-sh-setup.sh
@@ -22,4 +22,4 @@ refs/*)	: ;;
 *)	false ;;
 esac &&
 [ -d "$GIT_DIR/refs" ] &&
-[ -d "$GIT_OBJECT_DIRECTORY/00" ]
+[ -d "$GIT_OBJECT_DIRECTORY/" ]
diff --git a/prune-packed.c b/prune-packed.c
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -46,7 +48,7 @@ static void prune_packed_objects(void)
 		sprintf(pathname + len, "%02x/", i);
 		d = opendir(pathname);
 		if (!d)
-			die("unable to open %s", pathname);
+			continue;
 		prune_dir(i, d, pathname, len + 3);
 		closedir(d);
 	}

---- stage (b) ----

Phase 2: create object subdirectories on demand

diff --git a/sha1_file.c b/sha1_file.c
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1248,6 +1248,73 @@ char *write_sha1_file_prepare(void *buf,
 	return sha1_file_name(sha1);
 }
 
+/*
+ * Link the tempfile to the final place, possibly creating the
+ * last directory level as you do so.
+ *
+ * Returns the errno on failure, 0 on success.
+ */
+static int link_temp_to_file(const char *tmpfile, char *filename)
+{
+	int ret;
+
+	if (!link(tmpfile, filename))
+		return 0;
+
+	/*
+	 * Try to mkdir the last path component if that failed
+	 * with an ENOENT.
+	 *
+	 * Re-try the "link()" regardless of whether the mkdir
+	 * succeeds, since a race might mean that somebody
+	 * else succeeded.
+	 */
+	ret = errno;
+	if (ret == ENOENT) {
+		char *dir = strrchr(filename, '/');
+		if (dir) {
+			*dir = 0;
+			mkdir(filename, 0777);
+			*dir = '/';
+			if (!link(tmpfile, filename))
+				return 0;
+			ret = errno;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Move the just written object into its final resting place
+ */
+static int move_temp_to_file(const char *tmpfile, char *filename)
+{
+	int ret = link_temp_to_file(tmpfile, filename);
+	if (ret) {
+		/*
+		 * Coda hack - coda doesn't like cross-directory links,
+		 * so we fall back to a rename, which will mean that it
+		 * won't be able to check collisions, but that's not a
+		 * big deal.
+		 *
+		 * When this succeeds, we just return 0. We have nothing
+		 * left to unlink.
+		 */
+		if (ret == EXDEV && !rename(tmpfile, filename))
+			return 0;
+	}
+	unlink(tmpfile);
+	if (ret) {
+		if (ret != EEXIST) {
+			fprintf(stderr, "unable to write sha1 filename %s: %s", filename, strerror(ret));
+			return -1;
+		}
+		/* FIXME!!! Collision check here ? */
+	}
+
+	return 0;
+}
+
 int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
 {
 	int size;
@@ -1257,7 +1324,7 @@ int write_sha1_file(void *buf, unsigned 
 	char *filename;
 	static char tmpfile[PATH_MAX];
 	unsigned char hdr[50];
-	int fd, hdrlen, ret;
+	int fd, hdrlen;
 
 	/* Normally if we have it in the pack then we do not bother writing
 	 * it out into .git/objects/??/?{38} file.
@@ -1320,32 +1387,7 @@ int write_sha1_file(void *buf, unsigned 
 	close(fd);
 	free(compressed);
 
-	ret = link(tmpfile, filename);
-	if (ret < 0) {
-		ret = errno;
-
-		/*
-		 * Coda hack - coda doesn't like cross-directory links,
-		 * so we fall back to a rename, which will mean that it
-		 * won't be able to check collisions, but that's not a
-		 * big deal.
-		 *
-		 * When this succeeds, we just return 0. We have nothing
-		 * left to unlink.
-		 */
-		if (ret == EXDEV && !rename(tmpfile, filename))
-			return 0;
-	}
-	unlink(tmpfile);
-	if (ret) {
-		if (ret != EEXIST) {
-			fprintf(stderr, "unable to write sha1 filename %s: %s", filename, strerror(ret));
-			return -1;
-		}
-		/* FIXME!!! Collision check here ? */
-	}
-
-	return 0;
+	return move_temp_to_file(tmpfile, filename);
 }
 
 int write_sha1_to_fd(int fd, const unsigned char *sha1)
@@ -1420,8 +1462,7 @@ int write_sha1_to_fd(int fd, const unsig
 int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
 		       size_t bufsize, size_t *bufposn)
 {
-	char *filename = sha1_file_name(sha1);
-
+	char tmpfile[PATH_MAX];
 	int local;
 	z_stream stream;
 	unsigned char real_sha1[20];
@@ -1429,10 +1470,11 @@ int write_sha1_from_fd(const unsigned ch
 	int ret;
 	SHA_CTX c;
 
-	local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
+	snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory());
 
+	local = mkstemp(tmpfile);
 	if (local < 0)
-		return error("Couldn't open %s\n", filename);
+		return error("Couldn't open %s for %s\n", tmpfile, sha1_to_hex(sha1));
 
 	memset(&stream, 0, sizeof(stream));
 
@@ -1462,7 +1504,7 @@ int write_sha1_from_fd(const unsigned ch
 		size = read(fd, buffer + *bufposn, bufsize - *bufposn);
 		if (size <= 0) {
 			close(local);
-			unlink(filename);
+			unlink(tmpfile);
 			if (!size)
 				return error("Connection closed?");
 			perror("Reading from connection");
@@ -1475,15 +1517,15 @@ int write_sha1_from_fd(const unsigned ch
 	close(local);
 	SHA1_Final(real_sha1, &c);
 	if (ret != Z_STREAM_END) {
-		unlink(filename);
+		unlink(tmpfile);
 		return error("File %s corrupted", sha1_to_hex(sha1));
 	}
 	if (memcmp(sha1, real_sha1, 20)) {
-		unlink(filename);
+		unlink(tmpfile);
 		return error("File %s has bad hash\n", sha1_to_hex(sha1));
 	}
-	
-	return 0;
+
+	return move_temp_to_file(tmpfile, sha1_file_name(sha1));
 }
 
 int has_pack_index(const unsigned char *sha1)

---- stage (c) ----

Phase 3: actually remove object subdirectories

diff --git a/init-db.c b/init-db.c
--- a/init-db.c
+++ b/init-db.c
@@ -244,10 +244,6 @@ int main(int argc, char **argv)
 	memcpy(path, sha1_dir, len);
 
 	safe_create_dir(sha1_dir);
-	for (i = 0; i < 256; i++) {
-		sprintf(path+len, "/%02x", i);
-		safe_create_dir(path);
-	}
 	strcpy(path+len, "/pack");
 	safe_create_dir(path);
 	strcpy(path+len, "/info");
diff --git a/prune-packed.c b/prune-packed.c
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -26,6 +26,8 @@ static void prune_dir(int i, DIR *dir, c
 		else if (unlink(pathname) < 0)
 			error("unable to unlink %s", pathname);
 	}
+	pathname[len] = 0;
+	rmdir(pathname);
 }
 
 static void prune_packed_objects(void)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Create object subdirectories on demand (phase II)
  2005-10-07  9:38 ` Junio C Hamano
  2005-10-07 14:56   ` Linus Torvalds
@ 2005-10-07 16:30   ` Junio C Hamano
  1 sibling, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2005-10-07 16:30 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

This removes the unoptimization.  The previous round does not mind
missing fan-out directories, but still makes sure they exist, lest
older versions choke on a repository created/packed by it.

This round does not play that nicely anymore -- empty fan-out
directories are not created by init-db, and will stay removed by
prune-packed.  The prune command also removes empty fan-out directories.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

    Here is what I did.  I unoptimized your original and that
    sits near the tip of "pu" branch.  This patch is to revert
    the unoptimization.  In addition, git-prune is told about
    the empty fan-out removal, although it may not matter that
    much.  Hopefully, the phase I can be included in the next
    official release, and after everybody updates, this patch
    can go in.

 git-prune.sh     |    1 +
 init-db.c        |    4 ----
 prune-packed.c   |    3 +--
 t/t0000-basic.sh |    8 ++++----
 4 files changed, 6 insertions(+), 10 deletions(-)

applies-to: d672cb30d513553ea3ca92a933563a25df1d7865
93638ce0a569543d0b41eedfac6873a541f5c753
diff --git a/git-prune.sh b/git-prune.sh
index 9657dbf..b28630c 100755
--- a/git-prune.sh
+++ b/git-prune.sh
@@ -22,6 +22,7 @@ sed -ne '/unreachable /{
 }' | {
 	cd "$GIT_OBJECT_DIRECTORY" || exit
 	xargs $echo rm -f
+	rmdir 2>/dev/null [0-9a-f][0-9a-f]
 }
 
 git-prune-packed $dryrun
diff --git a/init-db.c b/init-db.c
index aabc09f..921df9b 100644
--- a/init-db.c
+++ b/init-db.c
@@ -244,10 +244,6 @@ int main(int argc, char **argv)
 	memcpy(path, sha1_dir, len);
 
 	safe_create_dir(sha1_dir);
-	for (i = 0; i < 256; i++) {
-		sprintf(path+len, "/%02x", i);
-		safe_create_dir(path);
-	}
 	strcpy(path+len, "/pack");
 	safe_create_dir(path);
 	strcpy(path+len, "/info");
diff --git a/prune-packed.c b/prune-packed.c
index 73f0f3a..16685d1 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -27,8 +27,7 @@ static void prune_dir(int i, DIR *dir, c
 			error("unable to unlink %s", pathname);
 	}
 	pathname[len] = 0;
-	if (rmdir(pathname))
-		mkdir(pathname, 0777);
+	rmdir(pathname);
 }
 
 static void prune_packed_objects(void)
diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh
index 5c5f854..dff7d69 100755
--- a/t/t0000-basic.sh
+++ b/t/t0000-basic.sh
@@ -28,12 +28,12 @@ test_expect_success \
     '.git/objects should be empty after git-init-db in an empty repo.' \
     'cmp -s /dev/null should-be-empty' 
 
-# also it should have 258 subdirectories; 256 fan-out anymore, pack, and info.
-# 259 is counting "objects" itself
+# also it should have 2 subdirectories; no fan-out anymore, pack, and info.
+# 3 is counting "objects" itself
 find .git/objects -type d -print >full-of-directories
 test_expect_success \
-    '.git/objects should have 258 subdirectories.' \
-    'test $(wc -l < full-of-directories) = 259'
+    '.git/objects should have 3 subdirectories.' \
+    'test $(wc -l < full-of-directories) = 3'
 
 ################################################################
 # Basics of the basics
---
0.99.8.GIT

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: Create object subdirectories on demand
  2005-10-06 23:23 Create object subdirectories on demand Linus Torvalds
  2005-10-07  9:38 ` Junio C Hamano
@ 2005-10-08  1:45 ` Daniel Barkalow
  2005-10-09 10:42   ` Use the same move_temp_to_file in git-http-fetch Junio C Hamano
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Barkalow @ 2005-10-08  1:45 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano

On Thu, 6 Oct 2005, Linus Torvalds wrote:

> This patch also tries to fix up "write_sha1_from_fd()" to use the new 
> common infrastructure for creating the object files, closing a hole where 
> we might otherwise leave half-written objects in the object database.

This looks right to me, but it would be nice to also split out and 
share the temp file creation. Also, http-fetch.c writes object files and 
needs at least move_temp_to_file() if it's going to do special stuff.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Use the same move_temp_to_file in git-http-fetch.
  2005-10-08  1:45 ` Create object subdirectories on demand Daniel Barkalow
@ 2005-10-09 10:42   ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2005-10-09 10:42 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: Linus Torvalds, Git Mailing List

The http commit walker cannot use the same temporary file
creation code because it needs to use predictable temporary
filename for partial fetch continuation purposes, but the code
to move the temporary file to the final location should be
usable from the ordinary object creation case.

Export move_temp_to_file from sha1_file.c and use it, while
losing the custom relink_or_rename function from http-fetch.c.

Also the temporary object file creation part needs to make sure
the leading path exists, in preparation of the really lazy
fan-out directory creation.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

    Daniel Barkalow <barkalow@iabervon.org> writes:

    > This looks right to me, but it would be nice to also split out and 
    > share the temp file creation. Also, http-fetch.c writes object files and 
    > needs at least move_temp_to_file() if it's going to do special stuff.

    I agree.  The proposed updates branch has this patch.

 cache.h      |    1 +
 http-fetch.c |   47 ++++++++++++++++++-----------------------------
 sha1_file.c  |    2 +-
 3 files changed, 20 insertions(+), 30 deletions(-)

applies-to: f3741b89b850db8d343d3cde58416e076964c952
07d374bffd6fcd0c67c671902dde9402335776f5
diff --git a/cache.h b/cache.h
index 5987d4c..28077fc 100644
--- a/cache.h
+++ b/cache.h
@@ -221,6 +221,7 @@ extern int read_tree(void *buffer, unsig
 extern int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
 			      size_t bufsize, size_t *bufposn);
 extern int write_sha1_to_fd(int fd, const unsigned char *sha1);
+extern int move_temp_to_file(const char *tmpfile, char *filename);
 
 extern int has_sha1_pack(const unsigned char *sha1);
 extern int has_sha1_file(const unsigned char *sha1);
diff --git a/http-fetch.c b/http-fetch.c
index 5d0e3e3..c617583 100644
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -138,25 +138,6 @@ static size_t fwrite_sha1_file(void *ptr
 	return size;
 }
 
-int relink_or_rename(char *old, char *new) {
-	int ret;
-
-	ret = link(old, new);
-	if (ret < 0) {
-		/* Same Coda hack as in write_sha1_file(sha1_file.c) */
-		ret = errno;
-		if (ret == EXDEV && !rename(old, new))
-			return 0;
-	}
-	unlink(old);
-	if (ret) {
-		if (ret != EEXIST)
-			return ret;
-	}
-
-	return 0;
-}
-
 #ifdef USE_CURL_MULTI
 void process_curl_messages();
 void process_request_queue();
@@ -295,6 +276,20 @@ void start_request(struct transfer_reque
 
 	request->local = open(request->tmpfile,
 			      O_WRONLY | O_CREAT | O_EXCL, 0666);
+	/* This could have failed due to the "lazy directory creation";
+	 * try to mkdir the last path component.
+	 */
+	if (request->local < 0 && errno == ENOENT) {
+		char *dir = strrchr(request->tmpfile, '/');
+		if (dir) {
+			*dir = 0;
+			mkdir(request->tmpfile, 0777);
+			*dir = '/';
+		}
+		request->local = open(request->tmpfile,
+				      O_WRONLY | O_CREAT | O_EXCL, 0666);
+	}
+
 	if (request->local < 0) {
 		request->state = ABORTED;
 		error("Couldn't create temporary file %s for %s: %s\n",
@@ -408,7 +403,7 @@ void finish_request(struct transfer_requ
 		return;
 	}
 	request->rename =
-		relink_or_rename(request->tmpfile, request->filename);
+		move_temp_to_file(request->tmpfile, request->filename);
 
 	if (request->rename == 0)
 		pull_say("got %s\n", sha1_to_hex(request->sha1));
@@ -599,12 +594,7 @@ static int fetch_index(struct alt_base *
 
 	fclose(indexfile);
 
-	ret = relink_or_rename(tmpfile, filename);
-	if (ret)
-		return error("unable to write index filename %s: %s",
-			     filename, strerror(ret));
-
-	return 0;
+	return move_temp_to_file(tmpfile, filename);
 }
 
 static int setup_index(struct alt_base *repo, unsigned char *sha1)
@@ -869,10 +859,9 @@ static int fetch_pack(struct alt_base *r
 
 	fclose(packfile);
 
-	ret = relink_or_rename(tmpfile, filename);
+	ret = move_temp_to_file(tmpfile, filename);
 	if (ret)
-		return error("unable to write pack filename %s: %s",
-			     filename, strerror(ret));
+		return ret;
 
 	lst = &repo->packs;
 	while (*lst != target)
diff --git a/sha1_file.c b/sha1_file.c
index baaa4c0..6e3ea23 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1287,7 +1287,7 @@ static int link_temp_to_file(const char 
 /*
  * Move the just written object into its final resting place
  */
-static int move_temp_to_file(const char *tmpfile, char *filename)
+int move_temp_to_file(const char *tmpfile, char *filename)
 {
 	int ret = link_temp_to_file(tmpfile, filename);
 	if (ret) {
---
0.99.8.GIT

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2005-10-09 10:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-10-06 23:23 Create object subdirectories on demand Linus Torvalds
2005-10-07  9:38 ` Junio C Hamano
2005-10-07 14:56   ` Linus Torvalds
2005-10-07 16:30   ` [PATCH] Create object subdirectories on demand (phase II) Junio C Hamano
2005-10-08  1:45 ` Create object subdirectories on demand Daniel Barkalow
2005-10-09 10:42   ` Use the same move_temp_to_file in git-http-fetch Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).