All of lore.kernel.org
 help / color / mirror / Atom feed
From: Clemens Buchacher <drizzd@aon.at>
To: git@vger.kernel.org
Cc: msysgit@googlegroups.com, Junio C Hamano <gitster@pobox.com>,
	"Shawn O. Pearce" <spearce@spearce.org>
Subject: [PATCH] preserve mtime of local clone
Date: Wed, 9 Sep 2009 21:51:58 +0200	[thread overview]
Message-ID: <20090909195158.GA12968@localhost> (raw)

A local clone without hardlinks copies all objects, including dangling
ones, to the new repository. Since the mtimes are renewed, those
dangling objects cannot be pruned by "git gc --prune", even if they
would have been old enough for pruning in the original repository.

Instead, preserve mtime during copy. "git gc --prune" will then work
in the clone just like it would have in the original.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

I noticed this problem when I cloned a repo with lots of old dangling
objects onto a windows machine. git-gui immediately recommended running
git-gc, and I did. But each time I restarted git-gui, it recommended git-gc
again, because there were still plenty of dangling objects lying around
which could not be removed due to their recent mtimes.

So there is actually a problem with git-gui's recommendation. Especially on
Windows, where it only checks for 1 or more files in .git/objects/42 (as
opposed to 8 files on other platforms). The probability of that happening if
the repo contains about 100 loose objects is 1-(254/255)^100 = 32%. The
probability for the same to happen with at least 2 files is only 6% [*].
Maybe that would be a good compromise?

Alternatively, git-gc could remember the number of dangling objects, and
git-gui can adjust its recommendation accordingly, taking that number and
the date of the lastest repack into account.

Clemens

[*] The following octave script shows the probability for m or more objects
to be in .git/objects/42 for a total of n objects.

m = [1 2 8];
n = 100:100:3000;

P = zeros(length(n), length(m));
for k = 1:length(n)
	P(n(k), :) = 1-binocdf(m-1, n(k), 1/255);
end
plot(n, P);

n \ m	1	2	8
100	32%	6%	0%
500	86%	58%	0%
1000	98%	90%	5%
2000	100%	100%	55%

---
 builtin-clone.c   |    2 +-
 builtin-init-db.c |    2 +-
 cache.h           |    6 ++++--
 copy.c            |   25 ++++++++++++++++++++++---
 lockfile.c        |    2 +-
 rerere.c          |    2 +-
 6 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..cb3c895 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file(dest->buf, src->buf, 0666, 1))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/builtin-init-db.c b/builtin-init-db.c
index dd84cae..5deb81d 100644
--- a/builtin-init-db.c
+++ b/builtin-init-db.c
@@ -100,7 +100,7 @@ static void copy_templates_1(char *path, int baselen,
 				die_errno("cannot symlink '%s' '%s'", lnk, path);
 		}
 		else if (S_ISREG(st_template.st_mode)) {
-			if (copy_file(path, template, st_template.st_mode))
+			if (copy_file(path, template, st_template.st_mode, 0))
 				die_errno("cannot copy '%s' to '%s'", template,
 					  path);
 		}
diff --git a/cache.h b/cache.h
index 5fad24c..1875c97 100644
--- a/cache.h
+++ b/cache.h
@@ -921,8 +921,10 @@ extern const char *git_mailmap_file;
 
 /* IO helper functions */
 extern void maybe_flush_or_die(FILE *, const char *);
-extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_fd(int ifd, int ofd, int preserve_times);
+extern int copy_file(const char *dst, const char *src, int mode, int
+		preserve_times);
+extern int copy_times(int ofd, int ifd);
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern void write_or_die(int fd, const void *buf, size_t count);
diff --git a/copy.c b/copy.c
index e54d15a..fe0380e 100644
--- a/copy.c
+++ b/copy.c
@@ -1,6 +1,6 @@
 #include "cache.h"
 
-int copy_fd(int ifd, int ofd)
+int copy_fd(int ifd, int ofd, int preserve_times)
 {
 	while (1) {
 		char buffer[8192];
@@ -31,11 +31,18 @@ int copy_fd(int ifd, int ofd)
 			}
 		}
 	}
+	if (preserve_times && copy_times(ofd, ifd)) {
+		int time_error = errno;
+		close(ifd);
+		return error("copy-fd: failed to preserve times: %s",
+				strerror(time_error));
+	}
 	close(ifd);
 	return 0;
 }
 
-int copy_file(const char *dst, const char *src, int mode)
+int copy_file(const char *dst, const char *src, int mode,
+		int preserve_times)
 {
 	int fdi, fdo, status;
 
@@ -46,7 +53,7 @@ int copy_file(const char *dst, const char *src, int mode)
 		close(fdi);
 		return fdo;
 	}
-	status = copy_fd(fdi, fdo);
+	status = copy_fd(fdi, fdo, preserve_times);
 	if (close(fdo) != 0)
 		return error("%s: close error: %s", dst, strerror(errno));
 
@@ -55,3 +62,15 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	return status;
 }
+
+int copy_times(int ofd, int ifd)
+{
+	struct stat st;
+	struct timespec times[2];
+	if (fstat(ifd, &st))
+		return -1;
+	times[0].tv_nsec = UTIME_OMIT;
+	times[1].tv_sec = st.st_mtime;
+	times[1].tv_nsec = ST_MTIME_NSEC(st);
+	return futimens(ofd, times);
+}
diff --git a/lockfile.c b/lockfile.c
index eb931ed..c7bbd4d 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -196,7 +196,7 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
 			close(fd);
 			return error("cannot open '%s' for copying", path);
 		}
-	} else if (copy_fd(orig_fd, fd)) {
+	} else if (copy_fd(orig_fd, fd, 0)) {
 		if (flags & LOCK_DIE_ON_ERROR)
 			exit(128);
 		close(fd);
diff --git a/rerere.c b/rerere.c
index 87360dc..d25f5f1 100644
--- a/rerere.c
+++ b/rerere.c
@@ -326,7 +326,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 			continue;
 
 		fprintf(stderr, "Recorded resolution for '%s'.\n", path);
-		copy_file(rerere_path(name, "postimage"), path, 0666);
+		copy_file(rerere_path(name, "postimage"), path, 0666, 0);
 	mark_resolved:
 		rr->items[i].util = NULL;
 	}
-- 
1.6.4.2.266.gbaa17

             reply	other threads:[~2009-09-09 19:52 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-09 19:51 Clemens Buchacher [this message]
2009-09-12  5:09 ` [PATCH] preserve mtime of local clone Junio C Hamano
2009-09-12  8:26   ` Clemens Buchacher
2009-09-12  9:03     ` Clemens Buchacher
2009-09-13  3:06       ` Junio C Hamano
2009-09-13 10:49         ` [PATCH v3] " Clemens Buchacher
2009-09-13 16:06 ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Clemens Buchacher
2009-09-13 17:58   ` Junio C Hamano
2009-09-13 18:41     ` Clemens Buchacher
2009-09-13 20:44       ` Jeff King
2009-09-13 21:19         ` Clemens Buchacher
2009-09-13 22:20           ` [PATCH] git-gui: search 4 directories to improve statistic of gc hint Clemens Buchacher
2009-09-14  3:39           ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Shawn O. Pearce

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090909195158.GA12968@localhost \
    --to=drizzd@aon.at \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=msysgit@googlegroups.com \
    --cc=spearce@spearce.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.