git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] Add git-annotate, a tool for assigning blame.
@ 2006-02-20 10:46 Ryan Anderson
  2006-02-20 22:54 ` Junio C Hamano
  2006-02-20 23:40 ` Fredrik Kuivinen
  0 siblings, 2 replies; 15+ messages in thread
From: Ryan Anderson @ 2006-02-20 10:46 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Ryan Anderson

Signed-off-by: Ryan Anderson <ryan@michonline.com>

---

(Pull from http://h4x0r5.com/~ryan/git/ryan.git/ annotate-upstream )

I'm pretty sure this version (finally) gets the edge cases correct.

I would appreciate some other testing on this, as I can't find a case
where it falls down, but the files with a lot of history tend to have a
lot of lines, making them hard to spotcheck without having been an
intimate part of that history.

Oh, this is the "functional" version, but it might not qualify as "nice
looking" yet, pleaes, feel free to complain.

 Makefile          |    1 
 git-annotate.perl |  321 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 322 insertions(+), 0 deletions(-)
 create mode 100755 git-annotate.perl

107045e8abb674a66ee7c682dd85a3d303f26e3c
diff --git a/Makefile b/Makefile
index 317be3c..86ffcf4 100644
--- a/Makefile
+++ b/Makefile
@@ -119,6 +119,7 @@ SCRIPT_SH = \
 SCRIPT_PERL = \
 	git-archimport.perl git-cvsimport.perl git-relink.perl \
 	git-shortlog.perl git-fmt-merge-msg.perl git-rerere.perl \
+	git-annotate.perl \
 	git-svnimport.perl git-mv.perl git-cvsexportcommit.perl
 
 SCRIPT_PYTHON = \
diff --git a/git-annotate.perl b/git-annotate.perl
new file mode 100755
index 0000000..8f98431
--- /dev/null
+++ b/git-annotate.perl
@@ -0,0 +1,321 @@
+#!/usr/bin/perl
+# Copyright 2006, Ryan Anderson <ryan@michonline.com>
+#
+# GPL v2 (See COPYING)
+#
+# This file is licensed under the GPL v2, or a later version
+# at the discretion of Linus Torvalds.
+
+use warnings;
+use strict;
+
+my $filename = shift @ARGV;
+
+
+my @stack = (
+	{
+		'rev' => "HEAD",
+		'filename' => $filename,
+	},
+);
+
+our (@lineoffsets, @pendinglineoffsets);
+our @filelines = ();
+open(F,"<",$filename)
+	or die "Failed to open filename: $!";
+
+while(<F>) {
+	chomp;
+	push @filelines, $_;
+}
+close(F);
+our $leftover_lines = @filelines;
+our %revs;
+our @revqueue;
+our $head;
+
+my $revsprocessed = 0;
+while (my $bound = pop @stack) {
+	my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
+	foreach my $revinst (@revisions) {
+		my ($rev, @parents) = @$revinst;
+		$head ||= $rev;
+
+		$revs{$rev}{'filename'} = $bound->{'filename'};
+		if (scalar @parents > 0) {
+			$revs{$rev}{'parents'} = \@parents;
+			next;
+		}
+
+		my $newbound = find_parent_renames($rev, $bound->{'filename'});
+		if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
+			push @stack, $newbound;
+			$revs{$rev}{'parents'} = [$newbound->{'rev'}];
+		}
+	}
+}
+push @revqueue, $head;
+init_claim($head);
+$revs{$head}{'lineoffsets'} = {};
+handle_rev();
+
+
+my $i = 0;
+foreach my $l (@filelines) {
+	my ($output, $rev, $committer, $date);
+	if (ref $l eq 'ARRAY') {
+		($output, $rev, $committer, $date) = @$l;
+		if (length($rev) > 8) {
+			$rev = substr($rev,0,8);
+		}
+	} else {
+		$output = $l;
+		($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
+	}
+
+	printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output);
+}
+
+sub init_claim {
+	my ($rev) = @_;
+	my %revinfo = git_commit_info($rev);
+	for (my $i = 0; $i < @filelines; $i++) {
+		$filelines[$i] = [ $filelines[$i], '', '', '', 1];
+			# line,
+			# rev,
+			# author,
+			# date,
+			# 1 <-- belongs to the original file.
+	}
+	$revs{$rev}{'lines'} = \@filelines;
+}
+
+
+sub handle_rev {
+	my $i = 0;
+	while (my $rev = shift @revqueue) {
+
+		my %revinfo = git_commit_info($rev);
+
+		foreach my $p (@{$revs{$rev}{'parents'}}) {
+
+			git_diff_parse($p, $rev, %revinfo);
+			push @revqueue, $p;
+		}
+
+
+		if (scalar @{$revs{$rev}{parents}} == 0) {
+			# We must be at the initial rev here, so claim everything that is left.
+			for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
+				if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
+					claim_line($i, $rev, $revs{$rev}{lines}, %revinfo);
+				}
+			}
+		}
+	}
+}
+
+
+sub git_rev_list {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+		or die "Failed to exec git-rev-list: $!";
+
+	my @revs;
+	while(my $line = <P>) {
+		chomp $line;
+		my ($rev, @parents) = split /\s+/, $line;
+		push @revs, [ $rev, @parents ];
+	}
+	close(P);
+
+	printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
+	return @revs;
+}
+
+sub find_parent_renames {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
+		or die "Failed to exec git-diff: $!";
+
+	local $/ = "\0";
+	my %bound;
+	my $junk = <P>;
+	while (my $change = <P>) {
+		chomp $change;
+		my $filename = <P>;
+		chomp $filename;
+
+		if ($change =~ m/^[AMD]$/ ) {
+			next;
+		} elsif ($change =~ m/^R/ ) {
+			my $oldfilename = $filename;
+			$filename = <P>;
+			chomp $filename;
+			if ( $file eq $filename ) {
+				my $parent = git_find_parent($rev, $oldfilename);
+				@bound{'rev','filename'} = ($parent, $oldfilename);
+				last;
+			}
+		}
+	}
+	close(P);
+
+	return \%bound;
+}
+
+
+sub git_find_parent {
+	my ($rev, $filename) = @_;
+
+	open(REVPARENT,"-|","git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
+		or die "Failed to open git-rev-list to find a single parent: $!";
+
+	my $parentline = <REVPARENT>;
+	chomp $parentline;
+	my ($revfound,$parent) = split m/\s+/, $parentline;
+
+	close(REVPARENT);
+
+	return $parent;
+}
+
+
+# Get a diff between the current revision and a parent.
+# Record the commit information that results.
+sub git_diff_parse {
+	my ($parent, $rev, %revinfo) = @_;
+
+	my ($ri, $pi) = (0,0);
+	open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--",
+			$revs{$rev}{'filename'}, $revs{$parent}{'filename'})
+		or die "Failed to call git-diff for annotation: $!";
+
+	my $slines = $revs{$rev}{'lines'};
+	my @plines;
+
+	my $gotheader = 0;
+	my ($remstart, $remlength, $addstart, $addlength);
+	my ($hunk_start, $hunk_index, $hunk_adds);
+	while(<DIFF>) {
+		chomp;
+		if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
+			($remstart, $remlength, $addstart, $addlength) = ($1, $2, $3, $4);
+			# Adjust for 0-based arrays
+			$remstart--;
+			$addstart--;
+			# Reinit hunk tracking.
+			$hunk_start = $remstart;
+			$hunk_index = 0;
+			$gotheader = 1;
+
+			for (my $i = $ri; $i < $remstart; $i++) {
+				$plines[$pi++] = $slines->[$i];
+				$ri++;
+			}
+			next;
+		} elsif (!$gotheader) {
+			next;
+		}
+
+		if (m/^\+(.*)$/) {
+			my $line = $1;
+			$plines[$pi++] = [ $line, '', '', '', 0 ];
+			next;
+
+		} elsif (m/^-(.*)$/) {
+			my $line = $1;
+			if (get_line($slines, $ri) eq $line) {
+				# Found a match, claim
+				claim_line($ri, $rev, $slines, %revinfo);
+			} else {
+				die sprintf("Sync error: %d/%d\n|%s\n|%s\n%s => %s\n",
+						$ri, $hunk_start + $hunk_index,
+						$line,
+						get_line($slines, $ri),
+						$rev, $parent);
+			}
+			$ri++;
+
+		} else {
+			if (substr($_,1) ne get_line($slines,$ri) ) {
+				die sprintf("Line %d (%d) does not match:\n|%s\n|%s\n%s => %s\n",
+						$hunk_start + $hunk_index, $ri,
+						substr($_,1),
+						get_line($slines,$ri),
+						$rev, $parent);
+			}
+			$plines[$pi++] = $slines->[$ri++];
+		}
+		$hunk_index++;
+	}
+	close(DIFF);
+	for (my $i = $ri; $i < @{$slines} ; $i++) {
+		push @plines, $slines->[$ri++];
+	}
+
+	$revs{$parent}{lines} = \@plines;
+	return;
+}
+
+sub get_line {
+	my ($lines, $index) = @_;
+
+	return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index];
+}
+
+sub git_cat_file {
+	my ($parent, $filename) = @_;
+	return () unless defined $parent && defined $filename;
+	my $blobline = `git-ls-tree $parent $filename`;
+	my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4);
+
+	open(C,"-|","git-cat-file", "blob", $blob)
+		or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!;
+
+	my @lines;
+	while(<C>) {
+		chomp;
+		push @lines, $_;
+	}
+	close(C);
+
+	return @lines;
+}
+
+
+sub claim_line {
+	my ($floffset, $rev, $lines, %revinfo) = @_;
+	my $oline = get_line($lines, $floffset);
+	@{$lines->[$floffset]} = ( $oline, $rev,
+		$revinfo{'author'}, $revinfo{'author_date'} );
+	#printf("Claiming line %d with rev %s: '%s'\n",
+	#		$floffset, $rev, $oline) if 1;
+}
+
+sub git_commit_info {
+	my ($rev) = @_;
+	open(COMMIT, "-|","git-cat-file", "commit", $rev)
+		or die "Failed to call git-cat-file: $!";
+
+	my %info;
+	while(<COMMIT>) {
+		chomp;
+		last if (length $_ == 0);
+
+		if (m/^author (.*) <(.*)> (.*)$/) {
+			$info{'author'} = $1;
+			$info{'author_email'} = $2;
+			$info{'author_date'} = $3;
+		} elsif (m/^committer (.*) <(.*)> (.*)$/) {
+			$info{'committer'} = $1;
+			$info{'committer_email'} = $2;
+			$info{'committer_date'} = $3;
+		}
+	}
+	close(COMMIT);
+
+	return %info;
+}
-- 
1.2.2.gb342

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-20 10:46 [PATCH] Add git-annotate, a tool for assigning blame Ryan Anderson
@ 2006-02-20 22:54 ` Junio C Hamano
  2006-02-20 23:40 ` Fredrik Kuivinen
  1 sibling, 0 replies; 15+ messages in thread
From: Junio C Hamano @ 2006-02-20 22:54 UTC (permalink / raw
  To: Ryan Anderson; +Cc: git

Ryan Anderson <ryan@michonline.com> writes:

> I would appreciate some other testing on this, as I can't find a case
> where it falls down, but the files with a lot of history tend to have a
> lot of lines, making them hard to spotcheck without having been an
> intimate part of that history.

I looked at a couple of files, including pack-objects.c,
rev-list.c (in "pu"), and svnimport.perl; what I saw made sense.
I think however we would want to check things with a lot of
merges, so git repository may not be a good guinea pig.

> Oh, this is the "functional" version, but it might not qualify as "nice
> looking" yet, pleaes, feel free to complain.

Nice.

Two design glitches and an implementation:

 - You seem to rely on the working tree file to be clean relative to
   HEAD.

 - You do not take anything other than HEAD as the starting point.

Maybe an additional -r<this-version> option which defaults to
HEAD, plus reading the blob always from that tree as the
starting point would be helpful.

> +	open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)

Johannes noticed I slipped this form which Perl 5.6 does not
grok in another program.  Eric pointed out he uses a backward
compatible idiom in his code.

We would need to make this one safe; something like this, perhaps:

	sub open_read_pipe {
        	my ($fh, @cmd) = @_;
                my $pid = open($fh, '-|');
		die "$!" unless defined($pid);
		if (!$pid) {
			exec(@cmd) or die "$!";
		}
		return $fh;
	}
	...
        open_read_pipe(\*P, qw(git-rev-list --parents --remove-empty),
			$rev, '--', $file);


> +	open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
> +		or die "Failed to exec git-diff: $!";

If you do not mean "I want -M50" but you meant "I want whatever
is default", I'd leave that 50 out.

It is probably premature to talk about issues for UI that can be
built on top of this, but what I found interesting was this.
While looking at the annotate output, whenever I got curious
about one line ("Oh, this is an ancient change and by somebody
who does not feed patches to git regularly -- what was the
change about???"), I grabbed the SHA1 of the commit on the line
and threw it at "git show".  It was a good way to see how the
change to the line was done in context.  Maybe a two-pane UI
that shows annotate on the top pane, and activating one line
from it shows "git show" output on the bottom pane to show the
commit log plus changes the commit introduced to the file and
other files.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-20 10:46 [PATCH] Add git-annotate, a tool for assigning blame Ryan Anderson
  2006-02-20 22:54 ` Junio C Hamano
@ 2006-02-20 23:40 ` Fredrik Kuivinen
  2006-02-21  0:01   ` Junio C Hamano
  2006-02-23 22:10   ` Ryan Anderson
  1 sibling, 2 replies; 15+ messages in thread
From: Fredrik Kuivinen @ 2006-02-20 23:40 UTC (permalink / raw
  To: Ryan Anderson; +Cc: Junio C Hamano, git

On Mon, Feb 20, 2006 at 05:46:09AM -0500, Ryan Anderson wrote:
> Signed-off-by: Ryan Anderson <ryan@michonline.com>
> 
> ---
> 
> (Pull from http://h4x0r5.com/~ryan/git/ryan.git/ annotate-upstream )
> 
> I'm pretty sure this version (finally) gets the edge cases correct.
> 
> I would appreciate some other testing on this, as I can't find a case
> where it falls down, but the files with a lot of history tend to have a
> lot of lines, making them hard to spotcheck without having been an
> intimate part of that history.
> 
> Oh, this is the "functional" version, but it might not qualify as "nice
> looking" yet, pleaes, feel free to complain.
> 

Nice work!


I have also been working on a blame program. The algorithm is pretty
much the one described by Junio in his blame.perl. My variant doesn't
handle renames, but it shouldn't be too hard to add that. The output
is minimal, just the line number followed by the commit SHA1.

An interesting observation is that the output from my git-blame and
your git-annotate doesn't match on all files in the git
repository. One example where several lines differ is read-cache.c. I
haven't investigated it further to find out which one is correct.


The code should be considered as a work in progress. It certainly has
a couple of rough edges. The output looks fairly sane on the few files
I have tested it on, but it wouldn't be too surprising if it gets some
cases wrong.


Signed-off-by: Fredrik Kuivinen <freku045@student.liu.se>


---

 Makefile |    2 
 blame.c  |  444 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 445 insertions(+), 1 deletions(-)
 create mode 100644 blame.c

afe1466a4f94bc72b42dce5fd1fed480c5aa4c49
diff --git a/Makefile b/Makefile
index 317be3c..4a3ffe9 100644
--- a/Makefile
+++ b/Makefile
@@ -153,7 +153,7 @@ PROGRAMS = \
 	git-upload-pack$X git-verify-pack$X git-write-tree$X \
 	git-update-ref$X git-symbolic-ref$X git-check-ref-format$X \
 	git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \
-	git-describe$X
+	git-describe$X git-blame$X
 
 # what 'all' will build and 'install' will install, in gitexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS)
diff --git a/blame.c b/blame.c
new file mode 100644
index 0000000..d4a2fad
--- /dev/null
+++ b/blame.c
@@ -0,0 +1,444 @@
+#include <assert.h>
+
+#include "cache.h"
+#include "refs.h"
+#include "tag.h"
+#include "commit.h"
+#include "tree.h"
+#include "blob.h"
+#include "epoch.h"
+#include "diff.h"
+
+#define DEBUG 0
+
+struct commit** blame_lines;
+int num_blame_lines;
+
+struct util_info
+{
+    int* line_map;
+    int num_lines;
+    unsigned char sha1[20]; /* blob sha, not commit! */
+    char* buf;
+    unsigned long size;
+//    const char* path;
+};
+
+struct chunk
+{
+    int off1, len1; // ---
+    int off2, len2; // +++
+};
+
+struct patch
+{
+    struct chunk* chunks;
+    int num;
+};
+
+static void get_blob(struct commit* commit);
+
+int num_get_patch = 0;
+int num_commits = 0;
+
+struct patch* get_patch(struct commit* commit, struct commit* other)
+{
+    struct patch* ret = xmalloc(sizeof(struct patch));
+    ret->chunks = NULL;
+    ret->num = 0;
+
+    struct util_info* info_c = (struct util_info*) commit->object.util;
+    struct util_info* info_o = (struct util_info*) other->object.util;
+
+    if(!memcmp(info_c->sha1, info_o->sha1, 20))
+        return ret;
+
+    get_blob(commit);
+    get_blob(other);
+
+    FILE* fout = fopen("/tmp/git-blame-tmp1", "w");
+    if(!fout)
+        die("fopen tmp1 failed: %s", strerror(errno));
+
+    if(fwrite(info_c->buf, info_c->size, 1, fout) != 1)
+        die("fwrite 1 failed: %s", strerror(errno));
+    fclose(fout);
+
+    fout = fopen("/tmp/git-blame-tmp2", "w");
+    if(!fout)
+        die("fopen tmp2 failed: %s", strerror(errno));
+
+    if(fwrite(info_o->buf, info_o->size, 1, fout) != 1)
+        die("fwrite 2 failed: %s", strerror(errno));
+    fclose(fout);
+
+    FILE* fin = popen("diff -u0 /tmp/git-blame-tmp1 /tmp/git-blame-tmp2", "r");
+    if(!fin)
+        die("popen failed: %s", strerror(errno));
+
+    
+    char buf[1024];
+    while(fgets(buf, sizeof(buf), fin)) {
+        if(buf[0] != '@' || buf[1] != '@')
+            continue;
+
+        if(DEBUG)
+            printf("chunk line: %s", buf);
+        ret->num++;
+        ret->chunks = xrealloc(ret->chunks, sizeof(struct chunk)*ret->num);
+        struct chunk* chunk = &ret->chunks[ret->num-1];
+        
+        assert(!strncmp(buf, "@@ -", 4));
+
+        char* start = buf+4;
+        char* sp = index(start, ' ');
+        *sp = '\0';
+        if(index(start, ',')) {
+            int ret = sscanf(start, "%d,%d", &chunk->off1, &chunk->len1);
+            assert(ret == 2);  
+        } else {
+            int ret = sscanf(start, "%d", &chunk->off1);
+            assert(ret == 1);
+            chunk->len1 = 1;
+        }
+        *sp = ' ';
+
+        start = sp+1;
+        sp = index(start, ' ');
+        *sp = '\0';
+        if(index(start, ',')) {
+            int ret = sscanf(start, "%d,%d", &chunk->off2, &chunk->len2);
+            assert(ret == 2);
+        } else {
+            int ret = sscanf(start, "%d", &chunk->off2);
+            assert(ret == 1);
+            chunk->len2 = 1;
+        }
+        *sp = ' ';
+
+        if(chunk->off1 > 0)
+            chunk->off1 -= 1;
+        if(chunk->off2 > 0)
+            chunk->off2 -= 1;
+
+        assert(chunk->off1 >= 0);
+        assert(chunk->off2 >= 0);
+    }
+    fclose(fin);
+
+    num_get_patch++;
+    return ret;
+}
+
+void free_patch(struct patch* p)
+{
+    free(p->chunks);
+    free(p);
+}
+
+static int get_blob_sha1_internal(unsigned char *sha1, const char *base, int baselen,
+                                  const char *pathname, unsigned mode, int stage);
+
+
+static unsigned char blob_sha1[20];
+static int get_blob_sha1(struct tree* t, const char* pathname, unsigned char* sha1)
+{
+    const char *pathspec[2];
+    pathspec[0] = pathname;
+    pathspec[1] = NULL;
+    memset(blob_sha1, 0, sizeof(blob_sha1));
+    read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal);
+
+    int i;
+    for(i = 0; i < 20; i++) {
+        if(blob_sha1[i] != 0)
+            break;
+    }
+
+    if(i == 20)
+        return -1;
+    
+    memcpy(sha1, blob_sha1, 20);
+    return 0;
+}
+
+static int get_blob_sha1_internal(unsigned char *sha1, const char *base, int baselen,
+                                  const char *pathname, unsigned mode, int stage)
+{
+//    printf("Got blob: %s base: '%s' baselen: %d pathname: '%s' mode: %o stage: %d\n",
+//           sha1_to_hex(sha1), base, baselen, pathname, mode, stage);
+
+    if(S_ISDIR(mode))
+        return READ_TREE_RECURSIVE;
+    
+    memcpy(blob_sha1, sha1, 20);
+    return -1;
+}
+
+static void get_blob(struct commit* commit)
+{
+    struct util_info* info = commit->object.util;
+    char type[20];    
+
+    if(info->buf)
+        return;
+    
+    info->buf = read_sha1_file(info->sha1, type, &info->size);
+    assert(!strcmp(type, "blob"));
+}
+
+void print_patch(struct patch* p)
+{
+    printf("Num chunks: %d\n", p->num);
+    int i;
+    for(i = 0; i < p->num; i++) {
+        printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1, p->chunks[i].off2, p->chunks[i].len2);
+    }
+}
+
+
+// p is a patch from commit to other.
+void fill_line_map(struct commit* commit, struct commit* other, struct patch* p)
+{
+    int num_lines = ((struct util_info*) commit->object.util)->num_lines;
+    int* line_map = ((struct util_info*) commit->object.util)->line_map;
+    int num_lines2 = ((struct util_info*) other->object.util)->num_lines;
+    int* line_map2 = ((struct util_info*) other->object.util)->line_map;
+    int cur_chunk = 0;
+    int i1, i2;
+
+    if(p->num && DEBUG)
+        print_patch(p);
+    
+    for(i1 = 0; i1 < num_lines; i1++)
+        line_map[i1] = -1;
+
+    if(DEBUG)
+        printf("num lines 1: %d num lines 2: %d\n", num_lines, num_lines2);
+    
+    for(i1 = 0, i2 = 0; i1 < num_lines; i1++, i2++) {
+        if(DEBUG > 1)
+            printf("%d %d\n", i1, i2);
+
+        if(i2 >= num_lines2)
+            break;
+        
+        line_map[i1] = line_map2[i2];
+
+        struct chunk* chunk = NULL;
+        if(cur_chunk < p->num)
+            chunk = &p->chunks[cur_chunk];
+        
+        if(chunk && chunk->off1 == i1) {
+            i2 = chunk->off2;
+
+            if(chunk->len1 > 0)
+                i1 += chunk->len1-1;
+            if(chunk->len2 > 0)
+                i2 += chunk->len2-1;
+            cur_chunk++;
+        }
+    }
+}
+
+int map_line(struct commit* commit, int line)
+{
+    struct util_info* info = commit->object.util;
+    assert(line >= 0 && line < info->num_lines);
+    return info->line_map[line];
+}
+
+int fill_util_info(struct commit* commit, const char* path)
+{
+    if(commit->object.util)
+        return 0;
+    
+    struct util_info* util = xmalloc(sizeof(struct util_info));
+    util->buf = NULL;
+    util->size = 0;
+    util->num_lines = -1;
+    util->line_map = NULL;
+
+    commit->object.util = util;
+    
+    if(get_blob_sha1(commit->tree, path, util->sha1))
+        return -1;
+
+    return 0;
+}
+
+void alloc_line_map(struct commit* commit)
+{
+    struct util_info* util = commit->object.util;
+
+    if(util->line_map)
+        return;
+
+    get_blob(commit);
+    
+    int i;
+    util->num_lines = 0;
+    for(i = 0; i < util->size; i++) {
+        if(util->buf[i] == '\n')
+            util->num_lines++;
+    }
+    util->line_map = xmalloc(sizeof(int)*util->num_lines);
+}
+
+void copy_line_map(struct commit* dst, struct commit* src)
+{
+    struct util_info* u_dst = dst->object.util;
+    struct util_info* u_src = src->object.util;
+
+    u_dst->line_map = u_src->line_map;
+    u_dst->num_lines = u_src->num_lines;
+    u_dst->buf = u_src->buf;
+    u_dst->size = u_src->size;
+}
+    
+void process_commits(struct commit_list* list, const char* path)
+{
+    int i;
+    
+    while(list) {
+        struct commit* commit = pop_commit(&list);
+        struct commit_list* parents;
+        struct util_info* info;
+
+        info = commit->object.util;
+        num_commits++;
+        if(DEBUG)
+            printf("\nProcessing commit: %d %s\n", num_commits, sha1_to_hex(commit->object.sha1));
+        for(parents = commit->parents;
+            parents != NULL; parents = parents->next) {
+            struct commit* parent = parents->item;
+            
+            if(parse_commit(parent) < 0)
+                die("parse_commit error");
+
+            if(DEBUG)
+                printf("parent: %s\n", sha1_to_hex(parent->object.sha1));
+
+            if(fill_util_info(parent, path))
+                continue;
+
+            // Temporarily assign everything to the parent.
+            int num_blame = 0;
+            for(i = 0; i < num_blame_lines; i++) {
+                if(blame_lines[i] == commit) {
+                    num_blame++;
+                    blame_lines[i] = parent;
+                }
+            }
+
+            if(num_blame == 0)
+                continue;
+            
+            struct patch* patch = get_patch(parent, commit);
+            if(patch->num == 0) {
+                copy_line_map(parent, commit);
+            } else {
+                alloc_line_map(parent);
+                fill_line_map(parent, commit, patch);
+            }
+
+            for(i = 0; i < patch->num; i++) {
+                int l;
+                for(l = 0; l < patch->chunks[i].len2; l++) {
+                    int mapped_line = map_line(commit, patch->chunks[i].off2 + l);
+                    if(mapped_line != -1 && blame_lines[mapped_line] == parent)
+                        blame_lines[mapped_line] = commit;
+                }
+            }
+            free_patch(patch);
+        }
+    }
+}
+
+#define SEEN 1
+struct commit_list* get_commit_list(struct commit* commit, const char* pathname)
+{
+    struct commit_list* ret = NULL;
+    struct commit_list* process = NULL;
+    unsigned char sha1[20];
+    
+    commit_list_insert(commit, &process);
+
+    while(process) {
+        struct commit* com = pop_commit(&process);
+        if(com->object.flags & SEEN)
+            continue;
+
+        com->object.flags |= SEEN;
+        commit_list_insert(com, &ret);
+        struct commit_list* parents;
+
+        parse_commit(com);
+        
+        for(parents = com->parents;
+            parents != NULL; parents = parents->next) {
+            struct commit* parent = parents->item;
+
+            parse_commit(parent);
+            
+            if(!get_blob_sha1(parent->tree, pathname, sha1))
+                commit_list_insert(parent, &process);
+        }
+    }
+    
+    return ret;
+}
+
+int main(int argc, const char **argv)
+{
+    unsigned char sha1[20];
+    struct commit *commit;
+    const char* filename;
+    int i;
+    
+    setup_git_directory();
+
+    if (argc != 3)
+        die("Usage: blame commit-ish file");
+        
+    if (get_sha1(argv[1], sha1))
+        die("get_sha1 failed");
+
+    commit = lookup_commit_reference(sha1);
+
+    filename = argv[2];
+
+    struct commit_list* list = get_commit_list(commit, filename);
+    sort_in_topological_order(&list, 1);
+
+    if(fill_util_info(commit, filename)) {
+        printf("%s not found in %s\n", filename, argv[1]);
+        return 0;
+    }
+    alloc_line_map(commit);
+
+    struct util_info* util = commit->object.util;
+    num_blame_lines = util->num_lines;
+    blame_lines = xmalloc(sizeof(struct commit*)*num_blame_lines);
+
+
+    for(i = 0; i < num_blame_lines; i++) {
+        blame_lines[i] = commit;
+
+        ((struct util_info*) commit->object.util)->line_map[i] = i;
+    }    
+    
+    process_commits(list, filename);
+
+    for(i = 0; i < num_blame_lines; i++) {
+        printf("%d %s\n", i+1-1, sha1_to_hex(blame_lines[i]->object.sha1));
+//        printf("%d %s\n", i+1-1, find_unique_abbrev(blame_lines[i]->object.sha1, 6));
+    }
+    
+    if(DEBUG) {
+        printf("num get patch: %d\n", num_get_patch);
+        printf("num commits: %d\n", num_commits);
+    }
+
+    return 0;
+}
-- 
1.2.1.g62a4-dirty

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-20 23:40 ` Fredrik Kuivinen
@ 2006-02-21  0:01   ` Junio C Hamano
  2006-02-28  8:27     ` Fredrik Kuivinen
  2006-02-23 22:10   ` Ryan Anderson
  1 sibling, 1 reply; 15+ messages in thread
From: Junio C Hamano @ 2006-02-21  0:01 UTC (permalink / raw
  To: Fredrik Kuivinen; +Cc: git, Ryan Anderson

Fredrik Kuivinen <freku045@student.liu.se> writes:

> I have also been working on a blame program.

Very nice to see these two.

Obviously I prefer this one for its performance ;-).

Its interface is probably friendlier when used as a preprocessor
by other tools.  I can imagine GUI source viewer that fontifies
the source code and prefers to get just the origin information
from a "blame backend".

BTW, these days I always compile things with 

	-Wall -Wdeclaration-after-statement

which caught quite a many.

Also I have my templates/hooks--pre-commit enabled so you might
want to lindent it before inclusion.

I'll play with both a bit.  Thanks for nice toys, both of you!

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-20 23:40 ` Fredrik Kuivinen
  2006-02-21  0:01   ` Junio C Hamano
@ 2006-02-23 22:10   ` Ryan Anderson
  2006-02-23 22:55     ` Fredrik Kuivinen
  1 sibling, 1 reply; 15+ messages in thread
From: Ryan Anderson @ 2006-02-23 22:10 UTC (permalink / raw
  To: Fredrik Kuivinen; +Cc: Junio C Hamano, git

(Biased critique since I have the other tool in the tree, but still...)

On Tue, Feb 21, 2006 at 12:40:54AM +0100, Fredrik Kuivinen wrote:
> diff --git a/blame.c b/blame.c
> new file mode 100644
> index 0000000..d4a2fad
> --- /dev/null
> +++ b/blame.c
> @@ -0,0 +1,444 @@
> +#include <assert.h>
> +
> +#include "cache.h"
> +#include "refs.h"
> +#include "tag.h"
> +#include "commit.h"
> +#include "tree.h"
> +#include "blob.h"
> +#include "epoch.h"
> +#include "diff.h"
> +
> +#define DEBUG 0
> +
> +struct commit** blame_lines;
> +int num_blame_lines;
> +
> +struct util_info
> +{
> +    int* line_map;
> +    int num_lines;
> +    unsigned char sha1[20]; /* blob sha, not commit! */
> +    char* buf;
> +    unsigned long size;
> +//    const char* path;
> +};
> +
> +struct chunk
> +{
> +    int off1, len1; // ---
> +    int off2, len2; // +++
> +};
> +
> +struct patch
> +{
> +    struct chunk* chunks;
> +    int num;
> +};
> +
> +static void get_blob(struct commit* commit);
> +
> +int num_get_patch = 0;
> +int num_commits = 0;
> +
> +struct patch* get_patch(struct commit* commit, struct commit* other)
> +{
> +    struct patch* ret = xmalloc(sizeof(struct patch));
> +    ret->chunks = NULL;
> +    ret->num = 0;
> +
> +    struct util_info* info_c = (struct util_info*) commit->object.util;
> +    struct util_info* info_o = (struct util_info*) other->object.util;
> +
> +    if(!memcmp(info_c->sha1, info_o->sha1, 20))
> +        return ret;
> +
> +    get_blob(commit);
> +    get_blob(other);
> +
> +    FILE* fout = fopen("/tmp/git-blame-tmp1", "w");

Probably should be using something like mkstemp (mkstmp?) here, so if
someone is runnign things as root or with a malicous user around, things
don't collide.

Hell, so on a multi-user machine this doesn't blow up on you.

But, read down for a related comment.

> +    if(!fout)
> +        die("fopen tmp1 failed: %s", strerror(errno));
> +
> +    if(fwrite(info_c->buf, info_c->size, 1, fout) != 1)
> +        die("fwrite 1 failed: %s", strerror(errno));
> +    fclose(fout);
> +
> +    fout = fopen("/tmp/git-blame-tmp2", "w");
> +    if(!fout)
> +        die("fopen tmp2 failed: %s", strerror(errno));
> +
> +    if(fwrite(info_o->buf, info_o->size, 1, fout) != 1)
> +        die("fwrite 2 failed: %s", strerror(errno));
> +    fclose(fout);
> +
> +    FILE* fin = popen("diff -u0 /tmp/git-blame-tmp1 /tmp/git-blame-tmp2", "r");
> +    if(!fin)
> +        die("popen failed: %s", strerror(errno));

Can't git-diff-tree do this sufficiently, anyway?  See my Perl script
for an example, you just need both commit IDs and both filenames and the
appropriate -M and you get the right results.

(It's possible that's part of where the performance differences are,
though, not really sure at the moment.)

I'm going to stop there for the moment, I'm not really confident in my
understanding of git-internals to say much more just yet.

This could probably benefit a *LOT* from the libification project, I
think, though.


-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-23 22:10   ` Ryan Anderson
@ 2006-02-23 22:55     ` Fredrik Kuivinen
  2006-02-24  0:00       ` Johannes Schindelin
  0 siblings, 1 reply; 15+ messages in thread
From: Fredrik Kuivinen @ 2006-02-23 22:55 UTC (permalink / raw
  To: Ryan Anderson; +Cc: Fredrik Kuivinen, Junio C Hamano, git

On Thu, Feb 23, 2006 at 05:10:49PM -0500, Ryan Anderson wrote:
> (Biased critique since I have the other tool in the tree, but still...)
> 
> > +    FILE* fout = fopen("/tmp/git-blame-tmp1", "w");
> 
> Probably should be using something like mkstemp (mkstmp?) here, so if
> someone is runnign things as root or with a malicous user around, things
> don't collide.
> 
> Hell, so on a multi-user machine this doesn't blow up on you.

Yep, I know. The code is mostly a proof of concept. I didn't submit it
for inclusion.


> 
> But, read down for a related comment.
> 
> > +    if(!fout)
> > +        die("fopen tmp1 failed: %s", strerror(errno));
> > +
> > +    if(fwrite(info_c->buf, info_c->size, 1, fout) != 1)
> > +        die("fwrite 1 failed: %s", strerror(errno));
> > +    fclose(fout);
> > +
> > +    fout = fopen("/tmp/git-blame-tmp2", "w");
> > +    if(!fout)
> > +        die("fopen tmp2 failed: %s", strerror(errno));
> > +
> > +    if(fwrite(info_o->buf, info_o->size, 1, fout) != 1)
> > +        die("fwrite 2 failed: %s", strerror(errno));
> > +    fclose(fout);
> > +
> > +    FILE* fin = popen("diff -u0 /tmp/git-blame-tmp1 /tmp/git-blame-tmp2", "r");
> > +    if(!fin)
> > +        die("popen failed: %s", strerror(errno));
> 
> Can't git-diff-tree do this sufficiently, anyway?  See my Perl script
> for an example, you just need both commit IDs and both filenames and the
> appropriate -M and you get the right results.
> 
> (It's possible that's part of where the performance differences are,
> though, not really sure at the moment.)
> 

Yeah.. maybe. My first thought was to avoid forking and execing diff
and use some C library for doing the diffing instead (libxdiff). But
then I just wanted to get some code working and the simplest solution
I could think of was to fork and exec diff.

> I'm going to stop there for the moment, I'm not really confident in my
> understanding of git-internals to say much more just yet.
> 
> This could probably benefit a *LOT* from the libification project, I
> think, though.

Yes, perhaps. Some of the git-rev-list bits might simplify a couple of
things.

I have found some severe problems with the code I posted, in
particular it doesn't handle parallel development tracks at all. I am
working on fixing it, but it isn't finished yet.

Thanks for the comments.

- Fredrik

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-23 22:55     ` Fredrik Kuivinen
@ 2006-02-24  0:00       ` Johannes Schindelin
  2006-02-24  0:17         ` Junio C Hamano
  0 siblings, 1 reply; 15+ messages in thread
From: Johannes Schindelin @ 2006-02-24  0:00 UTC (permalink / raw
  To: Fredrik Kuivinen; +Cc: Ryan Anderson, Junio C Hamano, git

Hi,

On Thu, 23 Feb 2006, Fredrik Kuivinen wrote:

> On Thu, Feb 23, 2006 at 05:10:49PM -0500, Ryan Anderson wrote:
> > (Biased critique since I have the other tool in the tree, but still...)
> > 
> > > +    FILE* fout = fopen("/tmp/git-blame-tmp1", "w");
> > 
> > Probably should be using something like mkstemp (mkstmp?) here, so if
> > someone is runnign things as root or with a malicous user around, things
> > don't collide.
> > 
> > Hell, so on a multi-user machine this doesn't blow up on you.
> 
> Yep, I know. The code is mostly a proof of concept. I didn't submit it
> for inclusion.

Ha ha, famous last words!

> > But, read down for a related comment.
> > 
> > > +    if(!fout)
> > > +        die("fopen tmp1 failed: %s", strerror(errno));
> > > +
> > > +    if(fwrite(info_c->buf, info_c->size, 1, fout) != 1)
> > > +        die("fwrite 1 failed: %s", strerror(errno));
> > > +    fclose(fout);
> > > +
> > > +    fout = fopen("/tmp/git-blame-tmp2", "w");
> > > +    if(!fout)
> > > +        die("fopen tmp2 failed: %s", strerror(errno));
> > > +
> > > +    if(fwrite(info_o->buf, info_o->size, 1, fout) != 1)
> > > +        die("fwrite 2 failed: %s", strerror(errno));
> > > +    fclose(fout);
> > > +
> > > +    FILE* fin = popen("diff -u0 /tmp/git-blame-tmp1 /tmp/git-blame-tmp2", "r");
> > > +    if(!fin)
> > > +        die("popen failed: %s", strerror(errno));
> > 
> > Can't git-diff-tree do this sufficiently, anyway?  See my Perl script
> > for an example, you just need both commit IDs and both filenames and the
> > appropriate -M and you get the right results.
> > 
> > (It's possible that's part of where the performance differences are,
> > though, not really sure at the moment.)
> > 
> 
> Yeah.. maybe. My first thought was to avoid forking and execing diff
> and use some C library for doing the diffing instead (libxdiff). But
> then I just wanted to get some code working and the simplest solution
> I could think of was to fork and exec diff.

git-diff-tree fork()s a diff. So, by fork()ing git-diff-tree you get two 
fork()s (and no knife...).

> > I'm going to stop there for the moment, I'm not really confident in my
> > understanding of git-internals to say much more just yet.
> > 
> > This could probably benefit a *LOT* from the libification project, I
> > think, though.
> 
> Yes, perhaps. Some of the git-rev-list bits might simplify a couple of
> things.

The major problem is probably not solved: What Linus calls a "stream 
interface".

I.e. if you pipe the output of git-rev-list to another program, you 
*need* to execute the two semi-simultaneously. The "alternative" would be 
to use buffers, which can get huge (and are sometimes not needed: think 
git-whatchanged, which starts outputting before it's getting no more 
input).

> I have found some severe problems with the code I posted, in
> particular it doesn't handle parallel development tracks at all. I am
> working on fixing it, but it isn't finished yet.

Looking forward to them!

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-24  0:00       ` Johannes Schindelin
@ 2006-02-24  0:17         ` Junio C Hamano
  2006-02-24  0:52           ` Johannes Schindelin
  0 siblings, 1 reply; 15+ messages in thread
From: Junio C Hamano @ 2006-02-24  0:17 UTC (permalink / raw
  To: Johannes Schindelin; +Cc: git, Fredrik Kuivinen

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> > This could probably benefit a *LOT* from the libification project, I
>> > think, though.
>> 
>> Yes, perhaps. Some of the git-rev-list bits might simplify a couple of
>> things.
>
> The major problem is probably not solved: What Linus calls a "stream 
> interface".
>
> I.e. if you pipe the output of git-rev-list to another program, you 
> *need* to execute the two semi-simultaneously. The "alternative" would be 
> to use buffers, which can get huge (and are sometimes not needed: think 
> git-whatchanged, which starts outputting before it's getting no more 
> input).

You need a limited coroutine support, something like generator
functions in Python ;-).  In C, traditional way of doing it is
to make your application specific function a callback of
rev-list or whatever generator is, which is very unpleasant to
code.

>> I have found some severe problems with the code I posted, in
>> particular it doesn't handle parallel development tracks at all. I am
>> working on fixing it, but it isn't finished yet.
>
> Looking forward to them!

Likewise.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-24  0:17         ` Junio C Hamano
@ 2006-02-24  0:52           ` Johannes Schindelin
  0 siblings, 0 replies; 15+ messages in thread
From: Johannes Schindelin @ 2006-02-24  0:52 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Fredrik Kuivinen

Hi,

On Thu, 23 Feb 2006, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> >> > This could probably benefit a *LOT* from the libification project, I
> >> > think, though.
> >> 
> >> Yes, perhaps. Some of the git-rev-list bits might simplify a couple of
> >> things.
> >
> > The major problem is probably not solved: What Linus calls a "stream 
> > interface".
> >
> > I.e. if you pipe the output of git-rev-list to another program, you 
> > *need* to execute the two semi-simultaneously. The "alternative" would be 
> > to use buffers, which can get huge (and are sometimes not needed: think 
> > git-whatchanged, which starts outputting before it's getting no more 
> > input).
> 
> You need a limited coroutine support, something like generator
> functions in Python ;-).  In C, traditional way of doing it is
> to make your application specific function a callback of
> rev-list or whatever generator is, which is very unpleasant to
> code.

The most unpleasant aspect is that you usually need something like "this" 
in C++: a pointer to an object (which you have to pass around all the 
time). Without it you can not use the function in a nested way.

However, I can also see benefits of this when compared to the traditional 
UNIX approach. It should be faster, for one, since you don't need to pass 
data through pipes all the time. (This might be not as true for Linux as 
for other OSes.)

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-21  0:01   ` Junio C Hamano
@ 2006-02-28  8:27     ` Fredrik Kuivinen
  2006-02-28  8:38       ` Junio C Hamano
  2006-02-28  8:47       ` Ryan Anderson
  0 siblings, 2 replies; 15+ messages in thread
From: Fredrik Kuivinen @ 2006-02-28  8:27 UTC (permalink / raw
  To: Junio C Hamano; +Cc: Fredrik Kuivinen, git, Ryan Anderson

On Mon, Feb 20, 2006 at 04:01:56PM -0800, Junio C Hamano wrote:
> Fredrik Kuivinen <freku045@student.liu.se> writes:
> 
> > I have also been working on a blame program.

...

> BTW, these days I always compile things with 
> 
> 	-Wall -Wdeclaration-after-statement
> 
> which caught quite a many.

Just out of curiosity, why do you prefer declarations before
statements?

- Fredrik

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-28  8:27     ` Fredrik Kuivinen
@ 2006-02-28  8:38       ` Junio C Hamano
  2006-02-28  8:47       ` Ryan Anderson
  1 sibling, 0 replies; 15+ messages in thread
From: Junio C Hamano @ 2006-02-28  8:38 UTC (permalink / raw
  To: Fredrik Kuivinen; +Cc: git

Fredrik Kuivinen <freku045@student.liu.se> writes:

> On Mon, Feb 20, 2006 at 04:01:56PM -0800, Junio C Hamano wrote:
>> 
>> BTW, these days I always compile things with 
>> 
>> 	-Wall -Wdeclaration-after-statement
>> 
>> which caught quite a many.
>
> Just out of curiosity, why do you prefer declarations before
> statements?

Inertia, IOW, mostly being used to read code written that way.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-28  8:27     ` Fredrik Kuivinen
  2006-02-28  8:38       ` Junio C Hamano
@ 2006-02-28  8:47       ` Ryan Anderson
  2006-02-28  9:08         ` Andreas Ericsson
  1 sibling, 1 reply; 15+ messages in thread
From: Ryan Anderson @ 2006-02-28  8:47 UTC (permalink / raw
  To: Fredrik Kuivinen; +Cc: Junio C Hamano, git

On Tue, Feb 28, 2006 at 09:27:36AM +0100, Fredrik Kuivinen wrote:
> On Mon, Feb 20, 2006 at 04:01:56PM -0800, Junio C Hamano wrote:
> > Fredrik Kuivinen <freku045@student.liu.se> writes:
> > 
> > > I have also been working on a blame program.
> 
> ...
> 
> > BTW, these days I always compile things with 
> > 
> > 	-Wall -Wdeclaration-after-statement
> > 
> > which caught quite a many.
> 
> Just out of curiosity, why do you prefer declarations before
> statements?

I won't speak for Junio, but the explanations I've heard in the past are
basically:

1) It keeps all declarations in one spot.
2) If your function is complicated enough to not need a variable until
fairly far into the function, it probably should be two (or more) functions.

Basically, I think that there's not wrong with doing it that way, per
se, just that it's sometimes a symptom of other problems, so fi you look
for the symptom, the problem sometimes is more obvious.


-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-02-28  8:47       ` Ryan Anderson
@ 2006-02-28  9:08         ` Andreas Ericsson
  0 siblings, 0 replies; 15+ messages in thread
From: Andreas Ericsson @ 2006-02-28  9:08 UTC (permalink / raw
  To: Ryan Anderson; +Cc: Fredrik Kuivinen, Junio C Hamano, git

Ryan Anderson wrote:
> On Tue, Feb 28, 2006 at 09:27:36AM +0100, Fredrik Kuivinen wrote:
> 
>>On Mon, Feb 20, 2006 at 04:01:56PM -0800, Junio C Hamano wrote:
>>
>>>Fredrik Kuivinen <freku045@student.liu.se> writes:
>>>
>>>
>>>>I have also been working on a blame program.
>>
>>...
>>
>>
>>>BTW, these days I always compile things with 
>>>
>>>	-Wall -Wdeclaration-after-statement
>>>
>>>which caught quite a many.
>>
>>Just out of curiosity, why do you prefer declarations before
>>statements?
> 
> 
> I won't speak for Junio, but the explanations I've heard in the past are
> basically:
> 
> 1) It keeps all declarations in one spot.
> 2) If your function is complicated enough to not need a variable until
> fairly far into the function, it probably should be two (or more) functions.
> 

3) Not all compilers support it.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] Add git-annotate, a tool for assigning blame.
  2006-03-02  5:16 [PATCH 0/3] Annotate updates Ryan Anderson
@ 2006-03-02  5:16 ` Ryan Anderson
  2006-03-02  5:20   ` Ryan Anderson
  0 siblings, 1 reply; 15+ messages in thread
From: Ryan Anderson @ 2006-03-02  5:16 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Ryan Anderson

Signed-off-by: Ryan Anderson <ryan@michonline.com>

---

(Pull from http://h4x0r5.com/~ryan/git/ryan.git/ annotate-upstream )

I'm pretty sure this version (finally) gets the edge cases correct.

I would appreciate some other testing on this, as I can't find a case
where it falls down, but the files with a lot of history tend to have a
lot of lines, making them hard to spotcheck without having been an
intimate part of that history.

Oh, this is the "functional" version, but it might not qualify as "nice
looking" yet, pleaes, feel free to complain.

 Makefile          |    1 
 git-annotate.perl |  321 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 322 insertions(+), 0 deletions(-)
 create mode 100755 git-annotate.perl

107045e8abb674a66ee7c682dd85a3d303f26e3c
diff --git a/Makefile b/Makefile
index 317be3c..86ffcf4 100644
--- a/Makefile
+++ b/Makefile
@@ -119,6 +119,7 @@ SCRIPT_SH = \
 SCRIPT_PERL = \
 	git-archimport.perl git-cvsimport.perl git-relink.perl \
 	git-shortlog.perl git-fmt-merge-msg.perl git-rerere.perl \
+	git-annotate.perl \
 	git-svnimport.perl git-mv.perl git-cvsexportcommit.perl
 
 SCRIPT_PYTHON = \
diff --git a/git-annotate.perl b/git-annotate.perl
new file mode 100755
index 0000000..8f98431
--- /dev/null
+++ b/git-annotate.perl
@@ -0,0 +1,321 @@
+#!/usr/bin/perl
+# Copyright 2006, Ryan Anderson <ryan@michonline.com>
+#
+# GPL v2 (See COPYING)
+#
+# This file is licensed under the GPL v2, or a later version
+# at the discretion of Linus Torvalds.
+
+use warnings;
+use strict;
+
+my $filename = shift @ARGV;
+
+
+my @stack = (
+	{
+		'rev' => "HEAD",
+		'filename' => $filename,
+	},
+);
+
+our (@lineoffsets, @pendinglineoffsets);
+our @filelines = ();
+open(F,"<",$filename)
+	or die "Failed to open filename: $!";
+
+while(<F>) {
+	chomp;
+	push @filelines, $_;
+}
+close(F);
+our $leftover_lines = @filelines;
+our %revs;
+our @revqueue;
+our $head;
+
+my $revsprocessed = 0;
+while (my $bound = pop @stack) {
+	my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
+	foreach my $revinst (@revisions) {
+		my ($rev, @parents) = @$revinst;
+		$head ||= $rev;
+
+		$revs{$rev}{'filename'} = $bound->{'filename'};
+		if (scalar @parents > 0) {
+			$revs{$rev}{'parents'} = \@parents;
+			next;
+		}
+
+		my $newbound = find_parent_renames($rev, $bound->{'filename'});
+		if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
+			push @stack, $newbound;
+			$revs{$rev}{'parents'} = [$newbound->{'rev'}];
+		}
+	}
+}
+push @revqueue, $head;
+init_claim($head);
+$revs{$head}{'lineoffsets'} = {};
+handle_rev();
+
+
+my $i = 0;
+foreach my $l (@filelines) {
+	my ($output, $rev, $committer, $date);
+	if (ref $l eq 'ARRAY') {
+		($output, $rev, $committer, $date) = @$l;
+		if (length($rev) > 8) {
+			$rev = substr($rev,0,8);
+		}
+	} else {
+		$output = $l;
+		($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
+	}
+
+	printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output);
+}
+
+sub init_claim {
+	my ($rev) = @_;
+	my %revinfo = git_commit_info($rev);
+	for (my $i = 0; $i < @filelines; $i++) {
+		$filelines[$i] = [ $filelines[$i], '', '', '', 1];
+			# line,
+			# rev,
+			# author,
+			# date,
+			# 1 <-- belongs to the original file.
+	}
+	$revs{$rev}{'lines'} = \@filelines;
+}
+
+
+sub handle_rev {
+	my $i = 0;
+	while (my $rev = shift @revqueue) {
+
+		my %revinfo = git_commit_info($rev);
+
+		foreach my $p (@{$revs{$rev}{'parents'}}) {
+
+			git_diff_parse($p, $rev, %revinfo);
+			push @revqueue, $p;
+		}
+
+
+		if (scalar @{$revs{$rev}{parents}} == 0) {
+			# We must be at the initial rev here, so claim everything that is left.
+			for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
+				if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
+					claim_line($i, $rev, $revs{$rev}{lines}, %revinfo);
+				}
+			}
+		}
+	}
+}
+
+
+sub git_rev_list {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+		or die "Failed to exec git-rev-list: $!";
+
+	my @revs;
+	while(my $line = <P>) {
+		chomp $line;
+		my ($rev, @parents) = split /\s+/, $line;
+		push @revs, [ $rev, @parents ];
+	}
+	close(P);
+
+	printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
+	return @revs;
+}
+
+sub find_parent_renames {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
+		or die "Failed to exec git-diff: $!";
+
+	local $/ = "\0";
+	my %bound;
+	my $junk = <P>;
+	while (my $change = <P>) {
+		chomp $change;
+		my $filename = <P>;
+		chomp $filename;
+
+		if ($change =~ m/^[AMD]$/ ) {
+			next;
+		} elsif ($change =~ m/^R/ ) {
+			my $oldfilename = $filename;
+			$filename = <P>;
+			chomp $filename;
+			if ( $file eq $filename ) {
+				my $parent = git_find_parent($rev, $oldfilename);
+				@bound{'rev','filename'} = ($parent, $oldfilename);
+				last;
+			}
+		}
+	}
+	close(P);
+
+	return \%bound;
+}
+
+
+sub git_find_parent {
+	my ($rev, $filename) = @_;
+
+	open(REVPARENT,"-|","git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
+		or die "Failed to open git-rev-list to find a single parent: $!";
+
+	my $parentline = <REVPARENT>;
+	chomp $parentline;
+	my ($revfound,$parent) = split m/\s+/, $parentline;
+
+	close(REVPARENT);
+
+	return $parent;
+}
+
+
+# Get a diff between the current revision and a parent.
+# Record the commit information that results.
+sub git_diff_parse {
+	my ($parent, $rev, %revinfo) = @_;
+
+	my ($ri, $pi) = (0,0);
+	open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--",
+			$revs{$rev}{'filename'}, $revs{$parent}{'filename'})
+		or die "Failed to call git-diff for annotation: $!";
+
+	my $slines = $revs{$rev}{'lines'};
+	my @plines;
+
+	my $gotheader = 0;
+	my ($remstart, $remlength, $addstart, $addlength);
+	my ($hunk_start, $hunk_index, $hunk_adds);
+	while(<DIFF>) {
+		chomp;
+		if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
+			($remstart, $remlength, $addstart, $addlength) = ($1, $2, $3, $4);
+			# Adjust for 0-based arrays
+			$remstart--;
+			$addstart--;
+			# Reinit hunk tracking.
+			$hunk_start = $remstart;
+			$hunk_index = 0;
+			$gotheader = 1;
+
+			for (my $i = $ri; $i < $remstart; $i++) {
+				$plines[$pi++] = $slines->[$i];
+				$ri++;
+			}
+			next;
+		} elsif (!$gotheader) {
+			next;
+		}
+
+		if (m/^\+(.*)$/) {
+			my $line = $1;
+			$plines[$pi++] = [ $line, '', '', '', 0 ];
+			next;
+
+		} elsif (m/^-(.*)$/) {
+			my $line = $1;
+			if (get_line($slines, $ri) eq $line) {
+				# Found a match, claim
+				claim_line($ri, $rev, $slines, %revinfo);
+			} else {
+				die sprintf("Sync error: %d/%d\n|%s\n|%s\n%s => %s\n",
+						$ri, $hunk_start + $hunk_index,
+						$line,
+						get_line($slines, $ri),
+						$rev, $parent);
+			}
+			$ri++;
+
+		} else {
+			if (substr($_,1) ne get_line($slines,$ri) ) {
+				die sprintf("Line %d (%d) does not match:\n|%s\n|%s\n%s => %s\n",
+						$hunk_start + $hunk_index, $ri,
+						substr($_,1),
+						get_line($slines,$ri),
+						$rev, $parent);
+			}
+			$plines[$pi++] = $slines->[$ri++];
+		}
+		$hunk_index++;
+	}
+	close(DIFF);
+	for (my $i = $ri; $i < @{$slines} ; $i++) {
+		push @plines, $slines->[$ri++];
+	}
+
+	$revs{$parent}{lines} = \@plines;
+	return;
+}
+
+sub get_line {
+	my ($lines, $index) = @_;
+
+	return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index];
+}
+
+sub git_cat_file {
+	my ($parent, $filename) = @_;
+	return () unless defined $parent && defined $filename;
+	my $blobline = `git-ls-tree $parent $filename`;
+	my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4);
+
+	open(C,"-|","git-cat-file", "blob", $blob)
+		or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!;
+
+	my @lines;
+	while(<C>) {
+		chomp;
+		push @lines, $_;
+	}
+	close(C);
+
+	return @lines;
+}
+
+
+sub claim_line {
+	my ($floffset, $rev, $lines, %revinfo) = @_;
+	my $oline = get_line($lines, $floffset);
+	@{$lines->[$floffset]} = ( $oline, $rev,
+		$revinfo{'author'}, $revinfo{'author_date'} );
+	#printf("Claiming line %d with rev %s: '%s'\n",
+	#		$floffset, $rev, $oline) if 1;
+}
+
+sub git_commit_info {
+	my ($rev) = @_;
+	open(COMMIT, "-|","git-cat-file", "commit", $rev)
+		or die "Failed to call git-cat-file: $!";
+
+	my %info;
+	while(<COMMIT>) {
+		chomp;
+		last if (length $_ == 0);
+
+		if (m/^author (.*) <(.*)> (.*)$/) {
+			$info{'author'} = $1;
+			$info{'author_email'} = $2;
+			$info{'author_date'} = $3;
+		} elsif (m/^committer (.*) <(.*)> (.*)$/) {
+			$info{'committer'} = $1;
+			$info{'committer_email'} = $2;
+			$info{'committer_date'} = $3;
+		}
+	}
+	close(COMMIT);
+
+	return %info;
+}
-- 
1.2.2.gb342

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Add git-annotate, a tool for assigning blame.
  2006-03-02  5:16 ` [PATCH] Add git-annotate, a tool for assigning blame Ryan Anderson
@ 2006-03-02  5:20   ` Ryan Anderson
  0 siblings, 0 replies; 15+ messages in thread
From: Ryan Anderson @ 2006-03-02  5:20 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

Sorry about this - I was in the wrong directory (apparently) when
running git-send-email.  Actual patches in a second.

-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2006-03-02  5:21 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-20 10:46 [PATCH] Add git-annotate, a tool for assigning blame Ryan Anderson
2006-02-20 22:54 ` Junio C Hamano
2006-02-20 23:40 ` Fredrik Kuivinen
2006-02-21  0:01   ` Junio C Hamano
2006-02-28  8:27     ` Fredrik Kuivinen
2006-02-28  8:38       ` Junio C Hamano
2006-02-28  8:47       ` Ryan Anderson
2006-02-28  9:08         ` Andreas Ericsson
2006-02-23 22:10   ` Ryan Anderson
2006-02-23 22:55     ` Fredrik Kuivinen
2006-02-24  0:00       ` Johannes Schindelin
2006-02-24  0:17         ` Junio C Hamano
2006-02-24  0:52           ` Johannes Schindelin
  -- strict thread matches above, loose matches on Subject: below --
2006-03-02  5:16 [PATCH 0/3] Annotate updates Ryan Anderson
2006-03-02  5:16 ` [PATCH] Add git-annotate, a tool for assigning blame Ryan Anderson
2006-03-02  5:20   ` Ryan Anderson

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).