git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Ryan Anderson <ryan@michonline.com>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org, Junio C Hamano <junkio@cox.net>
Subject: [PATCH] Add git-relink-script to fix up missing hardlinks
Date: Sun, 26 Jun 2005 14:15:16 -0400	[thread overview]
Message-ID: <20050626181516.GC20369@mythryan2.michonline.com> (raw)


Add git-relink-script

This will scan 2 or more object repositories and look for common objects, check
if they are hardlinked, and replace one with a hardlink to the other if not.

This version warns when skipping files because of size differences, and
handle more than 2 repositories automatically.

Signed-off-by: Ryan Anderson <ryan@michonline.com>

diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,7 @@ SCRIPTS=git git-apply-patch-script git-m
 	git-deltafy-script git-fetch-script git-status-script git-commit-script \
 	git-log-script git-shortlog git-cvsimport-script git-diff-script \
 	git-reset-script git-add-script git-checkout-script git-clone-script \
-	gitk git-cherry git-rebase-script
+	gitk git-cherry git-rebase-script git-relink-script
 
 PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
 	git-read-tree git-commit-tree git-cat-file git-fsck-cache \
diff --git a/git-relink-script b/git-relink-script
new file mode 100644
--- /dev/null
+++ b/git-relink-script
@@ -0,0 +1,173 @@
+#!/usr/bin/env perl
+# Copyright 2005, Ryan Anderson <ryan@michonline.com>
+# Distribution permitted under the GPL v2, as distributed
+# by the Free Software Foundation.
+# Later versions of the GPL at the discretion of Linus Torvalds
+#
+# Scan two git object-trees, and hardlink any common objects between them.
+
+use 5.006;
+use strict;
+use warnings;
+use Getopt::Long;
+
+sub get_canonical_form($);
+sub do_scan_directory($$$);
+sub compare_two_files($$);
+sub usage();
+sub link_two_files($$);
+
+# stats
+my $total_linked = 0;
+my $total_already = 0;
+my ($linked,$already);
+
+my $fail_on_different_sizes = 0;
+my $help = 0;
+GetOptions("safe" => \$fail_on_different_sizes,
+	   "help" => \$help);
+
+usage() if $help;
+
+my (@dirs) = @ARGV;
+
+usage() if (!defined $dirs[0] || !defined $dirs[1]);
+
+$_ = get_canonical_form($_) foreach (@dirs);
+
+my $master_dir = pop @dirs;
+
+opendir(D,$master_dir . "objects/")
+	or die "Failed to open $master_dir/objects/ : $!";
+
+my @hashdirs = grep !/^\.{1,2}$/, readdir(D);
+
+foreach my $repo (@dirs) {
+	$linked = 0;
+	$already = 0;
+	printf("Searching '%s' and '%s' for common objects and hardlinking them...\n",
+		$master_dir,$repo);
+
+	foreach my $hashdir (@hashdirs) {
+		do_scan_directory($master_dir, $hashdir, $repo);
+	}
+
+	printf("Linked %d files, %d were already linked.\n",$linked, $already);
+
+	$total_linked += $linked;
+	$total_already += $already;
+}
+
+printf("Totals: Linked %d files, %d were already linked.\n",
+	$total_linked, $total_already);
+
+
+sub do_scan_directory($$$) {
+	my ($srcdir, $subdir, $dstdir) = @_;
+
+	my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir);
+	my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir);
+
+	opendir(S,$sfulldir)
+		or die "Failed to opendir $sfulldir: $!";
+
+	foreach my $file (grep(!/\.{1,2}$/, readdir(S))) {
+		my $sfilename = $sfulldir . $file;
+		my $dfilename = $dfulldir . $file;
+
+		compare_two_files($sfilename,$dfilename);
+
+	}
+	closedir(S);
+}
+
+sub compare_two_files($$) {
+	my ($sfilename, $dfilename) = @_;
+
+	# Perl's stat returns relevant information as follows:
+	# 0 = dev number
+	# 1 = inode number
+	# 7 = size
+	my @sstatinfo = stat($sfilename);
+	my @dstatinfo = stat($dfilename);
+
+	if (@sstatinfo == 0 && @dstatinfo == 0) {
+		die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!);
+
+	} elsif (@dstatinfo == 0) {
+		return;
+	}
+
+	if ( ($sstatinfo[0] == $dstatinfo[0]) &&
+	     ($sstatinfo[1] != $dstatinfo[1])) {
+		if ($sstatinfo[7] == $dstatinfo[7]) {
+			link_two_files($sfilename, $dfilename);
+
+		} else {
+			my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n",
+				$sfilename, $dfilename);
+			if ($fail_on_different_sizes) {
+				die $err;
+			} else {
+				warn $err;
+			}
+		}
+
+	} elsif ( ($sstatinfo[0] == $dstatinfo[0]) &&
+	     ($sstatinfo[1] == $dstatinfo[1])) {
+		$already++;
+	}
+}
+
+sub get_canonical_form($) {
+	my $dir = shift;
+	my $original = $dir;
+
+	die "$dir is not a directory." unless -d $dir;
+
+	$dir .= "/" unless $dir =~ m#/$#;
+	$dir .= ".git/" unless $dir =~ m#\.git/$#;
+
+	die "$original does not have a .git/ subdirectory.\n" unless -d $dir;
+
+	return $dir;
+}
+
+sub link_two_files($$) {
+	my ($sfilename, $dfilename) = @_;
+	my $tmpdname = sprintf("%s.old",$dfilename);
+	rename($dfilename,$tmpdname)
+		or die sprintf("Failure renaming %s to %s: %s",
+			$dfilename, $tmpdname, $!);
+
+	if (! link($sfilename,$dfilename)) {
+		my $failtxt = "";
+		unless (rename($tmpdname,$dfilename)) {
+			$failtxt = sprintf(
+				"Git Repository containing %s is probably corrupted, " .
+				"please copy '%s' to '%s' to fix.\n",
+				$tmpdname, $dfilename);
+		}
+
+		die sprintf("Failed to link %s to %s: %s\n%s" .
+			$sfilename, $dfilename,
+			$!, $dfilename, $failtxt);
+	}
+
+	unlink($tmpdname)
+		or die sprintf("Unlink of %s failed: %s\n",
+			$dfilename, $!);
+
+	$linked++;
+}
+
+
+sub usage() {
+	print("Usage: $0 [--safe] <dir> [<dir> ...] <master_dir> \n");
+	print("All directories should contain a .git/objects/ subdirectory.\n");
+	print("Options\n");
+	print("\t--safe\t" .
+		"Stops if two objects with the same hash exist but " .
+		"have different sizes.  Default is to warn and continue.\n");
+	exit(1);
+}
-- 

Ryan Anderson
  sometimes Pug Majere

             reply	other threads:[~2005-06-26 18:15 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-06-26 18:15 Ryan Anderson [this message]
2005-06-26 18:36 ` [PATCH] Add git-relink-script to fix up missing hardlinks Jeff Garzik
2005-06-26 19:07 ` Junio C Hamano
2005-06-26 19:31   ` Junio C Hamano
2005-06-26 19:44     ` Jeff Garzik
2005-06-27  1:11       ` Jan Harkes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050626181516.GC20369@mythryan2.michonline.com \
    --to=ryan@michonline.com \
    --cc=git@vger.kernel.org \
    --cc=junkio@cox.net \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).