git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Clemens Buchacher <drizzd@aon.at>
To: git@vger.kernel.org
Cc: msysgit@googlegroups.com, Junio C Hamano <gitster@pobox.com>,
	"Shawn O. Pearce" <spearce@spearce.org>
Subject: [PATCH] preserve mtime of local clone
Date: Wed, 9 Sep 2009 21:51:58 +0200	[thread overview]
Message-ID: <20090909195158.GA12968@localhost> (raw)

A local clone without hardlinks copies all objects, including dangling
ones, to the new repository. Since the mtimes are renewed, those
dangling objects cannot be pruned by "git gc --prune", even if they
would have been old enough for pruning in the original repository.

Instead, preserve mtime during copy. "git gc --prune" will then work
in the clone just like it would have in the original.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
---

I noticed this problem when I cloned a repo with lots of old dangling
objects onto a windows machine. git-gui immediately recommended running
git-gc, and I did. But each time I restarted git-gui, it recommended git-gc
again, because there were still plenty of dangling objects lying around
which could not be removed due to their recent mtimes.

So there is actually a problem with git-gui's recommendation. Especially on
Windows, where it only checks for 1 or more files in .git/objects/42 (as
opposed to 8 files on other platforms). The probability of that happening if
the repo contains about 100 loose objects is 1-(254/255)^100 = 32%. The
probability for the same to happen with at least 2 files is only 6% [*].
Maybe that would be a good compromise?

Alternatively, git-gc could remember the number of dangling objects, and
git-gui can adjust its recommendation accordingly, taking that number and
the date of the lastest repack into account.

Clemens

[*] The following octave script shows the probability for m or more objects
to be in .git/objects/42 for a total of n objects.

m = [1 2 8];
n = 100:100:3000;

P = zeros(length(n), length(m));
for k = 1:length(n)
	P(n(k), :) = 1-binocdf(m-1, n(k), 1/255);
end
plot(n, P);

n \ m	1	2	8
100	32%	6%	0%
500	86%	58%	0%
1000	98%	90%	5%
2000	100%	100%	55%

---
 builtin-clone.c   |    2 +-
 builtin-init-db.c |    2 +-
 cache.h           |    6 ++++--
 copy.c            |   25 ++++++++++++++++++++++---
 lockfile.c        |    2 +-
 rerere.c          |    2 +-
 6 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..cb3c895 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file(dest->buf, src->buf, 0666, 1))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/builtin-init-db.c b/builtin-init-db.c
index dd84cae..5deb81d 100644
--- a/builtin-init-db.c
+++ b/builtin-init-db.c
@@ -100,7 +100,7 @@ static void copy_templates_1(char *path, int baselen,
 				die_errno("cannot symlink '%s' '%s'", lnk, path);
 		}
 		else if (S_ISREG(st_template.st_mode)) {
-			if (copy_file(path, template, st_template.st_mode))
+			if (copy_file(path, template, st_template.st_mode, 0))
 				die_errno("cannot copy '%s' to '%s'", template,
 					  path);
 		}
diff --git a/cache.h b/cache.h
index 5fad24c..1875c97 100644
--- a/cache.h
+++ b/cache.h
@@ -921,8 +921,10 @@ extern const char *git_mailmap_file;
 
 /* IO helper functions */
 extern void maybe_flush_or_die(FILE *, const char *);
-extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_fd(int ifd, int ofd, int preserve_times);
+extern int copy_file(const char *dst, const char *src, int mode, int
+		preserve_times);
+extern int copy_times(int ofd, int ifd);
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern void write_or_die(int fd, const void *buf, size_t count);
diff --git a/copy.c b/copy.c
index e54d15a..fe0380e 100644
--- a/copy.c
+++ b/copy.c
@@ -1,6 +1,6 @@
 #include "cache.h"
 
-int copy_fd(int ifd, int ofd)
+int copy_fd(int ifd, int ofd, int preserve_times)
 {
 	while (1) {
 		char buffer[8192];
@@ -31,11 +31,18 @@ int copy_fd(int ifd, int ofd)
 			}
 		}
 	}
+	if (preserve_times && copy_times(ofd, ifd)) {
+		int time_error = errno;
+		close(ifd);
+		return error("copy-fd: failed to preserve times: %s",
+				strerror(time_error));
+	}
 	close(ifd);
 	return 0;
 }
 
-int copy_file(const char *dst, const char *src, int mode)
+int copy_file(const char *dst, const char *src, int mode,
+		int preserve_times)
 {
 	int fdi, fdo, status;
 
@@ -46,7 +53,7 @@ int copy_file(const char *dst, const char *src, int mode)
 		close(fdi);
 		return fdo;
 	}
-	status = copy_fd(fdi, fdo);
+	status = copy_fd(fdi, fdo, preserve_times);
 	if (close(fdo) != 0)
 		return error("%s: close error: %s", dst, strerror(errno));
 
@@ -55,3 +62,15 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	return status;
 }
+
+int copy_times(int ofd, int ifd)
+{
+	struct stat st;
+	struct timespec times[2];
+	if (fstat(ifd, &st))
+		return -1;
+	times[0].tv_nsec = UTIME_OMIT;
+	times[1].tv_sec = st.st_mtime;
+	times[1].tv_nsec = ST_MTIME_NSEC(st);
+	return futimens(ofd, times);
+}
diff --git a/lockfile.c b/lockfile.c
index eb931ed..c7bbd4d 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -196,7 +196,7 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
 			close(fd);
 			return error("cannot open '%s' for copying", path);
 		}
-	} else if (copy_fd(orig_fd, fd)) {
+	} else if (copy_fd(orig_fd, fd, 0)) {
 		if (flags & LOCK_DIE_ON_ERROR)
 			exit(128);
 		close(fd);
diff --git a/rerere.c b/rerere.c
index 87360dc..d25f5f1 100644
--- a/rerere.c
+++ b/rerere.c
@@ -326,7 +326,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 			continue;
 
 		fprintf(stderr, "Recorded resolution for '%s'.\n", path);
-		copy_file(rerere_path(name, "postimage"), path, 0666);
+		copy_file(rerere_path(name, "postimage"), path, 0666, 0);
 	mark_resolved:
 		rr->items[i].util = NULL;
 	}
-- 
1.6.4.2.266.gbaa17

             reply	other threads:[~2009-09-09 19:52 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-09 19:51 Clemens Buchacher [this message]
2009-09-12  5:09 ` [PATCH] preserve mtime of local clone Junio C Hamano
2009-09-12  8:26   ` Clemens Buchacher
2009-09-12  9:03     ` Clemens Buchacher
2009-09-13  3:06       ` Junio C Hamano
2009-09-13 10:49         ` [PATCH v3] " Clemens Buchacher
2009-09-13 16:06 ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Clemens Buchacher
2009-09-13 17:58   ` Junio C Hamano
2009-09-13 18:41     ` Clemens Buchacher
2009-09-13 20:44       ` Jeff King
2009-09-13 21:19         ` Clemens Buchacher
2009-09-13 22:20           ` [PATCH] git-gui: search 4 directories to improve statistic of gc hint Clemens Buchacher
2009-09-14  3:39           ` [PATCH] git-gui: suggest gc only when counting at least 2 objects Shawn O. Pearce

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090909195158.GA12968@localhost \
    --to=drizzd@aon.at \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=msysgit@googlegroups.com \
    --cc=spearce@spearce.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).