All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] preserve mtime of local clone
@ 2009-09-09 19:51 Clemens Buchacher
  2009-09-12  5:09 ` Junio C Hamano
  2009-09-13 16:06 ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Clemens Buchacher
  0 siblings, 2 replies; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-09 19:51 UTC (permalink / raw)
  To: git; +Cc: msysgit, Junio C Hamano, Shawn O. Pearce

A local clone without hardlinks copies all objects, including dangling
ones, to the new repository. Since the mtimes are renewed, those
dangling objects cannot be pruned by "git gc --prune", even if they
would have been old enough for pruning in the original repository.

Instead, preserve mtime during copy. "git gc --prune" will then work
in the clone just like it would have in the original.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

I noticed this problem when I cloned a repo with lots of old dangling
objects onto a windows machine. git-gui immediately recommended running
git-gc, and I did. But each time I restarted git-gui, it recommended git-gc
again, because there were still plenty of dangling objects lying around
which could not be removed due to their recent mtimes.

So there is actually a problem with git-gui's recommendation. Especially on
Windows, where it only checks for 1 or more files in .git/objects/42 (as
opposed to 8 files on other platforms). The probability of that happening if
the repo contains about 100 loose objects is 1-(254/255)^100 = 32%. The
probability for the same to happen with at least 2 files is only 6% [*].
Maybe that would be a good compromise?

Alternatively, git-gc could remember the number of dangling objects, and
git-gui can adjust its recommendation accordingly, taking that number and
the date of the lastest repack into account.

Clemens

[*] The following octave script shows the probability for m or more objects
to be in .git/objects/42 for a total of n objects.

m = [1 2 8];
n = 100:100:3000;

P = zeros(length(n), length(m));
for k = 1:length(n)
	P(n(k), :) = 1-binocdf(m-1, n(k), 1/255);
end
plot(n, P);

n \ m	1	2	8
100	32%	6%	0%
500	86%	58%	0%
1000	98%	90%	5%
2000	100%	100%	55%

---
 builtin-clone.c   |    2 +-
 builtin-init-db.c |    2 +-
 cache.h           |    6 ++++--
 copy.c            |   25 ++++++++++++++++++++++---
 lockfile.c        |    2 +-
 rerere.c          |    2 +-
 6 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..cb3c895 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file(dest->buf, src->buf, 0666, 1))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/builtin-init-db.c b/builtin-init-db.c
index dd84cae..5deb81d 100644
--- a/builtin-init-db.c
+++ b/builtin-init-db.c
@@ -100,7 +100,7 @@ static void copy_templates_1(char *path, int baselen,
 				die_errno("cannot symlink '%s' '%s'", lnk, path);
 		}
 		else if (S_ISREG(st_template.st_mode)) {
-			if (copy_file(path, template, st_template.st_mode))
+			if (copy_file(path, template, st_template.st_mode, 0))
 				die_errno("cannot copy '%s' to '%s'", template,
 					  path);
 		}
diff --git a/cache.h b/cache.h
index 5fad24c..1875c97 100644
--- a/cache.h
+++ b/cache.h
@@ -921,8 +921,10 @@ extern const char *git_mailmap_file;
 
 /* IO helper functions */
 extern void maybe_flush_or_die(FILE *, const char *);
-extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_fd(int ifd, int ofd, int preserve_times);
+extern int copy_file(const char *dst, const char *src, int mode, int
+		preserve_times);
+extern int copy_times(int ofd, int ifd);
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern void write_or_die(int fd, const void *buf, size_t count);
diff --git a/copy.c b/copy.c
index e54d15a..fe0380e 100644
--- a/copy.c
+++ b/copy.c
@@ -1,6 +1,6 @@
 #include "cache.h"
 
-int copy_fd(int ifd, int ofd)
+int copy_fd(int ifd, int ofd, int preserve_times)
 {
 	while (1) {
 		char buffer[8192];
@@ -31,11 +31,18 @@ int copy_fd(int ifd, int ofd)
 			}
 		}
 	}
+	if (preserve_times && copy_times(ofd, ifd)) {
+		int time_error = errno;
+		close(ifd);
+		return error("copy-fd: failed to preserve times: %s",
+				strerror(time_error));
+	}
 	close(ifd);
 	return 0;
 }
 
-int copy_file(const char *dst, const char *src, int mode)
+int copy_file(const char *dst, const char *src, int mode,
+		int preserve_times)
 {
 	int fdi, fdo, status;
 
@@ -46,7 +53,7 @@ int copy_file(const char *dst, const char *src, int mode)
 		close(fdi);
 		return fdo;
 	}
-	status = copy_fd(fdi, fdo);
+	status = copy_fd(fdi, fdo, preserve_times);
 	if (close(fdo) != 0)
 		return error("%s: close error: %s", dst, strerror(errno));
 
@@ -55,3 +62,15 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	return status;
 }
+
+int copy_times(int ofd, int ifd)
+{
+	struct stat st;
+	struct timespec times[2];
+	if (fstat(ifd, &st))
+		return -1;
+	times[0].tv_nsec = UTIME_OMIT;
+	times[1].tv_sec = st.st_mtime;
+	times[1].tv_nsec = ST_MTIME_NSEC(st);
+	return futimens(ofd, times);
+}
diff --git a/lockfile.c b/lockfile.c
index eb931ed..c7bbd4d 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -196,7 +196,7 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
 			close(fd);
 			return error("cannot open '%s' for copying", path);
 		}
-	} else if (copy_fd(orig_fd, fd)) {
+	} else if (copy_fd(orig_fd, fd, 0)) {
 		if (flags & LOCK_DIE_ON_ERROR)
 			exit(128);
 		close(fd);
diff --git a/rerere.c b/rerere.c
index 87360dc..d25f5f1 100644
--- a/rerere.c
+++ b/rerere.c
@@ -326,7 +326,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 			continue;
 
 		fprintf(stderr, "Recorded resolution for '%s'.\n", path);
-		copy_file(rerere_path(name, "postimage"), path, 0666);
+		copy_file(rerere_path(name, "postimage"), path, 0666, 0);
 	mark_resolved:
 		rr->items[i].util = NULL;
 	}
-- 
1.6.4.2.266.gbaa17

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] preserve mtime of local clone
  2009-09-09 19:51 [PATCH] preserve mtime of local clone Clemens Buchacher
@ 2009-09-12  5:09 ` Junio C Hamano
  2009-09-12  8:26   ` Clemens Buchacher
  2009-09-13 16:06 ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Clemens Buchacher
  1 sibling, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2009-09-12  5:09 UTC (permalink / raw)
  To: Clemens Buchacher; +Cc: git, msysgit, Junio C Hamano, Shawn O. Pearce


Clemens Buchacher <drizzd@aon.at> writes:

> +int copy_times(int ofd, int ifd)
> +{
> +	struct stat st;
> +	struct timespec times[2];
> +	if (fstat(ifd, &st))
> +		return -1;
> +	times[0].tv_nsec = UTIME_OMIT;
> +	times[1].tv_sec = st.st_mtime;
> +	times[1].tv_nsec = ST_MTIME_NSEC(st);
> +	return futimens(ofd, times);
> +}

Hmm, futimens() is relatively new.  Are minority platforms folks Ok with
this patch?

At least SunOS 5.11 (OpenSolaris 0811) seems to barf on UTIME_OMIT.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] preserve mtime of local clone
  2009-09-12  5:09 ` Junio C Hamano
@ 2009-09-12  8:26   ` Clemens Buchacher
  2009-09-12  9:03     ` Clemens Buchacher
  0 siblings, 1 reply; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-12  8:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, msysgit, Shawn O. Pearce

On Fri, Sep 11, 2009 at 10:09:12PM -0700, Junio C Hamano wrote:
> Clemens Buchacher <drizzd@aon.at> writes:
> 
> > +int copy_times(int ofd, int ifd)
> > +{
> > +	struct stat st;
> > +	struct timespec times[2];
> > +	if (fstat(ifd, &st))
> > +		return -1;
> > +	times[0].tv_nsec = UTIME_OMIT;
> > +	times[1].tv_sec = st.st_mtime;
> > +	times[1].tv_nsec = ST_MTIME_NSEC(st);
> > +	return futimens(ofd, times);
> > +}
> 
> Hmm, futimens() is relatively new.  Are minority platforms folks Ok with
> this patch?
> 
> At least SunOS 5.11 (OpenSolaris 0811) seems to barf on UTIME_OMIT.

If it's a problem we can use utime() instead. I was just trying to use the
file descriptors, since they were available. But the patch would be a little
smaller if I didn't touch copy_fd().

Clemens

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] preserve mtime of local clone
  2009-09-12  8:26   ` Clemens Buchacher
@ 2009-09-12  9:03     ` Clemens Buchacher
  2009-09-13  3:06       ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-12  9:03 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, msysgit, Shawn O. Pearce

A local clone without hardlinks copies all objects, including dangling
ones, to the new repository. Since the mtimes are renewed, those
dangling objects cannot be pruned by "git gc --prune", even if they
would have been old enough for pruning in the original repository.

Instead, preserve mtime during copy. "git gc --prune" will then work
in the clone just like it did in the original.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

On Sat, Sep 12, 2009 at 10:26:24AM +0200, Clemens Buchacher wrote:

> If it's a problem we can use utime() instead. I was just trying to use the
> file descriptors, since they were available. But the patch would be a little
> smaller if I didn't touch copy_fd().

Here we go.

 builtin-clone.c   |    2 +-
 builtin-init-db.c |    2 +-
 cache.h           |    4 +++-
 copy.c            |   18 +++++++++++++++++-
 rerere.c          |    2 +-
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..cb3c895 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file(dest->buf, src->buf, 0666, 1))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/builtin-init-db.c b/builtin-init-db.c
index dd84cae..5deb81d 100644
--- a/builtin-init-db.c
+++ b/builtin-init-db.c
@@ -100,7 +100,7 @@ static void copy_templates_1(char *path, int baselen,
 				die_errno("cannot symlink '%s' '%s'", lnk, path);
 		}
 		else if (S_ISREG(st_template.st_mode)) {
-			if (copy_file(path, template, st_template.st_mode))
+			if (copy_file(path, template, st_template.st_mode, 0))
 				die_errno("cannot copy '%s' to '%s'", template,
 					  path);
 		}
diff --git a/cache.h b/cache.h
index 5fad24c..ac692d0 100644
--- a/cache.h
+++ b/cache.h
@@ -922,7 +922,9 @@ extern const char *git_mailmap_file;
 /* IO helper functions */
 extern void maybe_flush_or_die(FILE *, const char *);
 extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_file(const char *dst, const char *src, int mode, int
+		preserve_times);
+extern int copy_times(const char *dst, const char *src);
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern void write_or_die(int fd, const void *buf, size_t count);
diff --git a/copy.c b/copy.c
index e54d15a..fb5e946 100644
--- a/copy.c
+++ b/copy.c
@@ -35,7 +35,21 @@ int copy_fd(int ifd, int ofd)
 	return 0;
 }
 
-int copy_file(const char *dst, const char *src, int mode)
+int copy_times(const char *dst, const char *src)
+{
+	struct stat st;
+	struct utimbuf times;
+	if (stat(src, &st) < 0)
+		return -1;
+	times.actime = st.st_atime;
+	times.modtime = st.st_mtime;
+	if (utime(dst, &times) < 0)
+		return -1;
+	return 0;
+}
+
+int copy_file(const char *dst, const char *src, int mode,
+		int preserve_times)
 {
 	int fdi, fdo, status;
 
@@ -52,6 +66,8 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	if (!status && adjust_shared_perm(dst))
 		return -1;
+	if (!status && preserve_times && copy_times(dst, src))
+		return -1;
 
 	return status;
 }
diff --git a/rerere.c b/rerere.c
index 87360dc..d25f5f1 100644
--- a/rerere.c
+++ b/rerere.c
@@ -326,7 +326,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 			continue;
 
 		fprintf(stderr, "Recorded resolution for '%s'.\n", path);
-		copy_file(rerere_path(name, "postimage"), path, 0666);
+		copy_file(rerere_path(name, "postimage"), path, 0666, 0);
 	mark_resolved:
 		rr->items[i].util = NULL;
 	}
-- 
1.6.5.rc0.164.g5f6b0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] preserve mtime of local clone
  2009-09-12  9:03     ` Clemens Buchacher
@ 2009-09-13  3:06       ` Junio C Hamano
  2009-09-13 10:49         ` [PATCH v3] " Clemens Buchacher
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2009-09-13  3:06 UTC (permalink / raw)
  To: Clemens Buchacher; +Cc: git, msysgit, Shawn O. Pearce


Clemens Buchacher <drizzd@aon.at> writes:

> A local clone without hardlinks copies all objects, including dangling
> ones, to the new repository. Since the mtimes are renewed, those
> dangling objects cannot be pruned by "git gc --prune", even if they
> would have been old enough for pruning in the original repository.
>
> Instead, preserve mtime during copy. "git gc --prune" will then work
> in the clone just like it did in the original.
>
> Signed-off-by: Clemens Buchacher <drizzd@aon.at>
> ---
>
> On Sat, Sep 12, 2009 at 10:26:24AM +0200, Clemens Buchacher wrote:
>
>> If it's a problem we can use utime() instead. I was just trying to use the
>> file descriptors, since they were available. But the patch would be a little
>> smaller if I didn't touch copy_fd().
>
> Here we go.

Thanks.

This new feature is not wanted by most of the callers of copy_file(), and
it is not like they may want to conditionally pass 1 in preserve_times
someday.  I'd limit the damage like this replacement patch, instead.

Is it unreasonable to have a test for this one, by the way?

 builtin-clone.c |    2 +-
 cache.h         |    1 +
 copy.c          |   21 +++++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..bab2d84 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file_with_time(dest->buf, src->buf, 0666))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/cache.h b/cache.h
index 867918d..1668f28 100644
--- a/cache.h
+++ b/cache.h
@@ -923,6 +923,7 @@ extern const char *git_mailmap_file;
 extern void maybe_flush_or_die(FILE *, const char *);
 extern int copy_fd(int ifd, int ofd);
 extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_file_with_time(const char *dst, const char *src, int mode);
 extern void write_or_die(int fd, const void *buf, size_t count);
 extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
 extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
diff --git a/copy.c b/copy.c
index e54d15a..a7f58fd 100644
--- a/copy.c
+++ b/copy.c
@@ -35,6 +35,19 @@ int copy_fd(int ifd, int ofd)
 	return 0;
 }
 
+static int copy_times(const char *dst, const char *src)
+{
+	struct stat st;
+	struct utimbuf times;
+	if (stat(src, &st) < 0)
+		return -1;
+	times.actime = st.st_atime;
+	times.modtime = st.st_mtime;
+	if (utime(dst, &times) < 0)
+		return -1;
+	return 0;
+}
+
 int copy_file(const char *dst, const char *src, int mode)
 {
 	int fdi, fdo, status;
@@ -55,3 +68,11 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	return status;
 }
+
+int copy_file_with_time(const char *dst, const char *src, int mode)
+{
+	int status = copy_file(dst, src, mode);
+	if (!status)
+		return copy_times(dst, src);
+	return status;
+}

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3] preserve mtime of local clone
  2009-09-13  3:06       ` Junio C Hamano
@ 2009-09-13 10:49         ` Clemens Buchacher
  0 siblings, 0 replies; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-13 10:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, msysgit, Shawn O. Pearce

A local clone without hardlinks copies all objects, including dangling
ones, to the new repository. Since the mtimes are renewed, those
dangling objects cannot be pruned by "git gc --prune", even if they
would have been old enough for pruning in the original repository.

Instead, preserve mtime during copy. "git gc --prune" will then work
in the clone just like it did in the original.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

On Sat, Sep 12, 2009 at 08:06:51PM -0700, Junio C Hamano wrote:

> This new feature is not wanted by most of the callers of copy_file(), and
> it is not like they may want to conditionally pass 1 in preserve_times
> someday.  I'd limit the damage like this replacement patch, instead.

You're right. That's much better.

> Is it unreasonable to have a test for this one, by the way?

It's not. Test added.

Clemens

 builtin-clone.c  |    2 +-
 cache.h          |    2 ++
 copy.c           |   21 +++++++++++++++++++++
 t/t5304-prune.sh |   54 +++++++++++++++++++++++++++++++++---------------------
 4 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..bab2d84 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file_with_time(dest->buf, src->buf, 0666))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/cache.h b/cache.h
index 5fad24c..ee9c98c 100644
--- a/cache.h
+++ b/cache.h
@@ -923,6 +923,8 @@ extern const char *git_mailmap_file;
 extern void maybe_flush_or_die(FILE *, const char *);
 extern int copy_fd(int ifd, int ofd);
 extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_times(const char *dst, const char *src);
+extern int copy_file_with_time(const char *dst, const char *src, int mode);
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern void write_or_die(int fd, const void *buf, size_t count);
diff --git a/copy.c b/copy.c
index e54d15a..963b481 100644
--- a/copy.c
+++ b/copy.c
@@ -55,3 +55,24 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	return status;
 }
+
+int copy_times(const char *dst, const char *src)
+{
+	struct stat st;
+	struct utimbuf times;
+	if (stat(src, &st) < 0)
+		return -1;
+	times.actime = st.st_atime;
+	times.modtime = st.st_mtime;
+	if (utime(dst, &times) < 0)
+		return -1;
+	return 0;
+}
+
+int copy_file_with_time(const char *dst, const char *src, int mode)
+{
+     int status = copy_file(dst, src, mode);
+     if (!status)
+		return copy_times(dst, src);
+     return status;
+}
diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh
index 55ed7c7..15fd90a 100755
--- a/t/t5304-prune.sh
+++ b/t/t5304-prune.sh
@@ -6,6 +6,17 @@
 test_description='prune'
 . ./test-lib.sh
 
+day=$((60*60*24))
+week=$(($day*7))
+
+add_blob() {
+	before=$(git count-objects | sed "s/ .*//") &&
+	BLOB=$(echo aleph_0 | git hash-object -w --stdin) &&
+	BLOB_FILE=.git/objects/$(echo $BLOB | sed "s/^../&\//") &&
+	test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
+	test -f $BLOB_FILE
+}
+
 test_expect_success setup '
 
 	: > file &&
@@ -31,11 +42,7 @@ test_expect_success 'prune stale packs' '
 
 test_expect_success 'prune --expire' '
 
-	before=$(git count-objects | sed "s/ .*//") &&
-	BLOB=$(echo aleph | git hash-object -w --stdin) &&
-	BLOB_FILE=.git/objects/$(echo $BLOB | sed "s/^../&\//") &&
-	test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
-	test -f $BLOB_FILE &&
+	add_blob &&
 	git prune --expire=1.hour.ago &&
 	test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
 	test -f $BLOB_FILE &&
@@ -48,16 +55,12 @@ test_expect_success 'prune --expire' '
 
 test_expect_success 'gc: implicit prune --expire' '
 
-	before=$(git count-objects | sed "s/ .*//") &&
-	BLOB=$(echo aleph_0 | git hash-object -w --stdin) &&
-	BLOB_FILE=.git/objects/$(echo $BLOB | sed "s/^../&\//") &&
-	test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
-	test -f $BLOB_FILE &&
-	test-chmtime =-$((86400*14-30)) $BLOB_FILE &&
+	add_blob &&
+	test-chmtime =-$((2*$week-30)) $BLOB_FILE &&
 	git gc &&
 	test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
 	test -f $BLOB_FILE &&
-	test-chmtime =-$((86400*14+1)) $BLOB_FILE &&
+	test-chmtime =-$((2*$week+1)) $BLOB_FILE &&
 	git gc &&
 	test $before = $(git count-objects | sed "s/ .*//") &&
 	! test -f $BLOB_FILE
@@ -114,12 +117,8 @@ test_expect_success 'prune: do not prune heads listed as an argument' '
 
 test_expect_success 'gc --no-prune' '
 
-	before=$(git count-objects | sed "s/ .*//") &&
-	BLOB=$(echo aleph_0 | git hash-object -w --stdin) &&
-	BLOB_FILE=.git/objects/$(echo $BLOB | sed "s/^../&\//") &&
-	test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
-	test -f $BLOB_FILE &&
-	test-chmtime =-$((86400*5001)) $BLOB_FILE &&
+	add_blob &&
+	test-chmtime =-$((5001*$day)) $BLOB_FILE &&
 	git config gc.pruneExpire 2.days.ago &&
 	git gc --no-prune &&
 	test 1 = $(git count-objects | sed "s/ .*//") &&
@@ -140,9 +139,8 @@ test_expect_success 'gc respects gc.pruneExpire' '
 
 test_expect_success 'gc --prune=<date>' '
 
-	BLOB=$(echo aleph_0 | git hash-object -w --stdin) &&
-	BLOB_FILE=.git/objects/$(echo $BLOB | sed "s/^../&\//") &&
-	test-chmtime =-$((86400*5001)) $BLOB_FILE &&
+	add_blob &&
+	test-chmtime =-$((5001*$day)) $BLOB_FILE &&
 	git gc --prune=5002.days.ago &&
 	test -f $BLOB_FILE &&
 	git gc --prune=5000.days.ago &&
@@ -150,4 +148,18 @@ test_expect_success 'gc --prune=<date>' '
 
 '
 
+test_expect_success 'gc: prune old objects after local clone' '
+
+	add_blob &&
+	test-chmtime =-$((2*$week+1)) $BLOB_FILE &&
+	git clone --no-hardlinks . aclone &&
+	cd aclone &&
+	test 1 = $(git count-objects | sed "s/ .*//") &&
+	test -f $BLOB_FILE &&
+	git gc --prune &&
+	test 0 = $(git count-objects | sed "s/ .*//") &&
+	! test -f $BLOB_FILE
+
+'
+
 test_done
-- 
1.6.5.rc0.164.g5f6b0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH] git-gui: suggest gc only when counting at least 2 objects
  2009-09-09 19:51 [PATCH] preserve mtime of local clone Clemens Buchacher
  2009-09-12  5:09 ` Junio C Hamano
@ 2009-09-13 16:06 ` Clemens Buchacher
  2009-09-13 17:58   ` Junio C Hamano
  1 sibling, 1 reply; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-13 16:06 UTC (permalink / raw)
  To: git; +Cc: msysgit, Junio C Hamano, Shawn O. Pearce

On Windows, git-gui suggests running the garbage collector if it finds
1 or more files in .git/objects/42 (as opposed to 8 files on other
platforms). The probability of that happening if the repo contains
only about 100 loose objects is 32%. The probability for the same to happen
with at least 2 files is only 6%, which is bit more reasonable.

The following octave script shows the probability for m or more
objects to be in .git/objects/42 for a total of n objects.

m = [1 2 8];
n = 100:100:3000;

P = zeros(length(n), length(m));
for k = 1:length(n)
        P(k, :) = 1-binocdf(m-1, n(k), 1/255);
end
plot(n, P);

n \ m   1       2       8
100     32%     6%      0%
500     86%     58%     0%
1000    98%     90%     5%
2000    100%    100%    55%

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

So here is the patch for my other complaint. Note that I fixed a bug in the
octave script above, in case someone wants to check the numbers.

I even tested it on a windows VM (as if it wasn't slow enough already).

Clemens

 git-gui/lib/database.tcl |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/git-gui/lib/database.tcl b/git-gui/lib/database.tcl
index a18ac8b..44099e5 100644
--- a/git-gui/lib/database.tcl
+++ b/git-gui/lib/database.tcl
@@ -91,7 +91,7 @@ proc do_fsck_objects {} {
 proc hint_gc {} {
 	set object_limit 8
 	if {[is_Windows]} {
-		set object_limit 1
+		set object_limit 2
 	}
 
 	set objects_current [llength [glob \
-- 
1.6.5.rc0.164.g5f6b0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-gui: suggest gc only when counting at least 2 objects
  2009-09-13 16:06 ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Clemens Buchacher
@ 2009-09-13 17:58   ` Junio C Hamano
  2009-09-13 18:41     ` Clemens Buchacher
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2009-09-13 17:58 UTC (permalink / raw)
  To: Clemens Buchacher; +Cc: git, msysgit, Shawn O. Pearce

Clemens Buchacher <drizzd@aon.at> writes:

> On Windows, git-gui suggests running the garbage collector if it finds
> 1 or more files in .git/objects/42 (as opposed to 8 files on other
> platforms).

Somebody cares to explain why this threashold number has to be different
per platform in the first place?  Instead of bumping up to 2 like your
patch did, what bad things would happen if you increased it to 8 on
Windows?  Doesn't the same badness happen on non-Windows because they have
the threashold set to 8 already?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-gui: suggest gc only when counting at least 2 objects
  2009-09-13 17:58   ` Junio C Hamano
@ 2009-09-13 18:41     ` Clemens Buchacher
  2009-09-13 20:44       ` Jeff King
  0 siblings, 1 reply; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-13 18:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, msysgit, Shawn O. Pearce

On Sun, Sep 13, 2009 at 10:58:45AM -0700, Junio C Hamano wrote:
> Somebody cares to explain why this threashold number has to be different
> per platform in the first place? 

I really don't know. I vaguely remember someone claim that performance on
Windows suffered from many loose objects more than on other platforms. I
can't find any discussion of it though.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-gui: suggest gc only when counting at least 2 objects
  2009-09-13 18:41     ` Clemens Buchacher
@ 2009-09-13 20:44       ` Jeff King
  2009-09-13 21:19         ` Clemens Buchacher
  0 siblings, 1 reply; 13+ messages in thread
From: Jeff King @ 2009-09-13 20:44 UTC (permalink / raw)
  To: Clemens Buchacher; +Cc: Junio C Hamano, git, msysgit, Shawn O. Pearce

On Sun, Sep 13, 2009 at 08:41:50PM +0200, Clemens Buchacher wrote:

> On Sun, Sep 13, 2009 at 10:58:45AM -0700, Junio C Hamano wrote:
> > Somebody cares to explain why this threashold number has to be different
> > per platform in the first place? 
> 
> I really don't know. I vaguely remember someone claim that performance on
> Windows suffered from many loose objects more than on other platforms. I
> can't find any discussion of it though.

Maybe 8ff487c?

-Peff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-gui: suggest gc only when counting at least 2 objects
  2009-09-13 20:44       ` Jeff King
@ 2009-09-13 21:19         ` Clemens Buchacher
  2009-09-13 22:20           ` [PATCH] git-gui: search 4 directories to improve statistic of gc hint Clemens Buchacher
  2009-09-14  3:39           ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Shawn O. Pearce
  0 siblings, 2 replies; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-13 21:19 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, git, msysgit, Shawn O. Pearce

On Sun, Sep 13, 2009 at 04:44:33PM -0400, Jeff King wrote:
> On Sun, Sep 13, 2009 at 08:41:50PM +0200, Clemens Buchacher wrote:
> 
> > On Sun, Sep 13, 2009 at 10:58:45AM -0700, Junio C Hamano wrote:
> > > Somebody cares to explain why this threashold number has to be different
> > > per platform in the first place? 
> > 
> > I really don't know. I vaguely remember someone claim that performance on
> > Windows suffered from many loose objects more than on other platforms. I
> > can't find any discussion of it though.
> 
> Maybe 8ff487c?

Ok. But it's been 2 years since then and if I'm not mistaken, there have
been a number of performance improvements to msysgit. So maybe it's time to
revisit that threshold.

If, on the other hand, requiring 2 objects really is too many, we should
maybe check at least two or four directories, which would greatly improve
the statistic.

For example, the probability of q directories containing q objects, for n
objects total is

n \ q	1	4
50	18%	1%
100	32%	7%
200	54%	38%
500	86%	95%

Clemens

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] git-gui: search 4 directories to improve statistic of gc hint
  2009-09-13 21:19         ` Clemens Buchacher
@ 2009-09-13 22:20           ` Clemens Buchacher
  2009-09-14  3:39           ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Shawn O. Pearce
  1 sibling, 0 replies; 13+ messages in thread
From: Clemens Buchacher @ 2009-09-13 22:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, msysgit, Shawn O. Pearce, Jeff King

On Windows, git-gui suggests running the garbage collector if it finds
1 or more files in .git/objects/42 (as opposed to 8 files on other
platforms). The probability of that happening if the repo contains
about 100 loose objects is 32%. The probability for the same to happen
when searching 4 directories is only 8%, which is bit more reasonable.

Also remove $objects_limit from the message, because we already know
that we are above (or close to) that limit. Telling the user about
that number does not really give him any useful information.

The following octave script shows the probability for at least m*q
objects to be found in q subdirectories of .git/objects if n is the
total number of objects.

q = 4;
m = [1 2 8];
n = 0:10:2000;

P = zeros(length(n), length(m));
for k = 1:length(n)
        P(k, :) = 1-binocdf(q*m-1, n(k), q/(256-q));
end
plot(n, P);

n \ q   1       4
50      18%     1%
100     32%     8%
200     54%     39%
500     86%     96%

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

 git-gui/lib/database.tcl |   21 ++++++++++-----------
 1 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/git-gui/lib/database.tcl b/git-gui/lib/database.tcl
index a18ac8b..d4e0bed 100644
--- a/git-gui/lib/database.tcl
+++ b/git-gui/lib/database.tcl
@@ -89,27 +89,26 @@ proc do_fsck_objects {} {
 }
 
 proc hint_gc {} {
-	set object_limit 8
+	set ndirs 1
+	set limit 8
 	if {[is_Windows]} {
-		set object_limit 1
+		set ndirs 4
+		set limit 1
 	}
 
-	set objects_current [llength [glob \
-		-directory [gitdir objects 42] \
+	set count [llength [glob \
 		-nocomplain \
-		-tails \
 		-- \
-		*]]
+		[gitdir objects 4\[0-[expr {$ndirs-1}]\]/*]]]
 
-	if {$objects_current >= $object_limit} {
-		set objects_current [expr {$objects_current * 250}]
-		set object_limit    [expr {$object_limit    * 250}]
+	if {$count >= $limit * $ndirs} {
+		set objects_current [expr {$count * 256/$ndirs}]
 		if {[ask_popup \
 			[mc "This repository currently has approximately %i loose objects.
 
-To maintain optimal performance it is strongly recommended that you compress the database when more than %i loose objects exist.
+To maintain optimal performance it is strongly recommended that you compress the database.
 
-Compress the database now?" $objects_current $object_limit]] eq yes} {
+Compress the database now?" $objects_current]] eq yes} {
 			do_gc
 		}
 	}
-- 
1.6.5.rc0.164.g5f6b0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] git-gui: suggest gc only when counting at least 2 objects
  2009-09-13 21:19         ` Clemens Buchacher
  2009-09-13 22:20           ` [PATCH] git-gui: search 4 directories to improve statistic of gc hint Clemens Buchacher
@ 2009-09-14  3:39           ` Shawn O. Pearce
  1 sibling, 0 replies; 13+ messages in thread
From: Shawn O. Pearce @ 2009-09-14  3:39 UTC (permalink / raw)
  To: Clemens Buchacher; +Cc: Jeff King, Junio C Hamano, git, msysgit

Clemens Buchacher <drizzd@aon.at> wrote:
> On Sun, Sep 13, 2009 at 04:44:33PM -0400, Jeff King wrote:
> > On Sun, Sep 13, 2009 at 08:41:50PM +0200, Clemens Buchacher wrote:
> > > On Sun, Sep 13, 2009 at 10:58:45AM -0700, Junio C Hamano wrote:
> > > > Somebody cares to explain why this threashold number has to be different
> > > > per platform in the first place? 
> > > 
> > > I really don't know. I vaguely remember someone claim that performance on
> > > Windows suffered from many loose objects more than on other platforms. I
> > > can't find any discussion of it though.
> > 
> > Maybe 8ff487c?

Yes, it was 8ff487c.  Back then I was using Windows on a daily
basis and this was put into git-gui because Aunt Tillie couldn't
remember do run a git-gc every so often, and performance would just
drop in the bucket.  It also quite a bit predates `git gc --auto`
being sprinkled throughout the code base on various operations.

As to why its been 200 as the loose count estimate, that was just
a WAG based on what I observed on my desktop.  2000 on UNIX is
usually fine, 2000 on Windows meant you waited an extra 30 seconds
to perform an operation.
 
> Ok. But it's been 2 years since then and if I'm not mistaken, there have
> been a number of performance improvements to msysgit. So maybe it's time to
> revisit that threshold.

msysgit may have improved, but at the time I was running Git on
Cygwin, and I doubt NTFS has really improved that much.
 
> If, on the other hand, requiring 2 objects really is too many, we should
> maybe check at least two or four directories, which would greatly improve
> the statistic.

I'm concerned about the FS cost of checking more directories, but
this is a one-time penalty on startup of git-gui so it might not
be too bad if it gets us a better estimate.
 
-- 
Shawn.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-09-14  3:40 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-09 19:51 [PATCH] preserve mtime of local clone Clemens Buchacher
2009-09-12  5:09 ` Junio C Hamano
2009-09-12  8:26   ` Clemens Buchacher
2009-09-12  9:03     ` Clemens Buchacher
2009-09-13  3:06       ` Junio C Hamano
2009-09-13 10:49         ` [PATCH v3] " Clemens Buchacher
2009-09-13 16:06 ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Clemens Buchacher
2009-09-13 17:58   ` Junio C Hamano
2009-09-13 18:41     ` Clemens Buchacher
2009-09-13 20:44       ` Jeff King
2009-09-13 21:19         ` Clemens Buchacher
2009-09-13 22:20           ` [PATCH] git-gui: search 4 directories to improve statistic of gc hint Clemens Buchacher
2009-09-14  3:39           ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Shawn O. Pearce

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.