about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-04-01 23:15:04 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-04-01 23:25:04 +0000
commitc34a83286234ea1e876ebdf92a33744272bb6f4e (patch)
treea04f43151284e6893e23780438773f9ea07c9fba /lib/PublicInbox
parent0321a1a9e7ae9c9d878d547ee67659ef8aa95689 (diff)
downloadpublic-inbox-c34a83286234ea1e876ebdf92a33744272bb6f4e.tar.gz
We need to ensure there is only one file in the top-level tree
at any commit so the "add; remove; add;" sequence on the same
message is detected properly.

Otherwise, git will not detect the second "add" unless
a second message is added to history.

Deletes are now stored in "d" (and not "D" or "_/D") at the
top-level, now.  There's no need to have a "_" to reduce churn
as "m" and "d" should never co-exist.  It's now lowercased to
make it easier-to-distinguish from "D" in git-log output.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Import.pm61
-rw-r--r--lib/PublicInbox/V2Writable.pm12
2 files changed, 50 insertions, 23 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 463b44e2..b2aae9a7 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -49,7 +49,14 @@ sub gfi_start {
         $self->lock_acquire;
 
         local $/ = "\n";
-        chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $self->{ref}));
+        my $ref = $self->{ref};
+        chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
+        if ($self->{path_type} ne '2/38' && $self->{tip}) {
+                local $/ = "\0";
+                my @tree = $git->qx(qw(ls-tree -r -z --name-only), $ref);
+                chomp @tree;
+                $self->{-tree} = { map { $_ => 1 } @tree };
+        }
 
         my $git_dir = $git->{git_dir};
         my @cmd = ('git', "--git-dir=$git_dir", qw(fast-import
@@ -238,7 +245,8 @@ sub remove {
         if (defined $path) {
                 print $w "D $path\n\n" or wfail;
         } else {
-                print $w "M 100644 :$blob _/D\n\n" or wfail;
+                clean_tree_v2($self, $w, 'd');
+                print $w "M 100644 :$blob d\n\n" or wfail;
         }
         $self->{nchg}++;
         (($self->{tip} = ":$commit"), $cur);
@@ -317,6 +325,15 @@ sub v1_mid0 ($) {
         }
         $mids->[0];
 }
+sub clean_tree_v2 ($$$) {
+        my ($self, $w, $keep) = @_;
+        my $tree = $self->{-tree} or return; #v2 only
+        delete $tree->{$keep};
+        foreach (keys %$tree) {
+                print $w "D $_\n" or wfail;
+        }
+        %$tree = ($keep => 1);
+}
 
 # returns undef on duplicate
 # returns the :MARK of the most recent commit
@@ -382,6 +399,7 @@ sub add {
         if ($tip ne '') {
                 print $w 'from ', ($parent ? $parent : $tip), "\n" or wfail;
         }
+        clean_tree_v2($self, $w, $path);
         print $w "M 100644 :$blob $path\n\n" or wfail;
         $self->{nchg}++;
         $self->{tip} = ":$commit";
@@ -431,8 +449,9 @@ sub digest2mid ($) {
 }
 
 sub clean_purge_buffer {
-        my ($oid, $buf) = @_;
-        my $cmt_msg = "purged $oid\n";
+        my ($oids, $buf) = @_;
+        my $cmt_msg = 'purged '.join(' ',@$oids)."\n";
+        @$oids = ();
 
         foreach my $i (0..$#$buf) {
                 my $l = $buf->[$i];
@@ -456,6 +475,8 @@ sub purge_oids {
         my ($r, $w) = $self->gfi_start;
         my @buf;
         my $npurge = 0;
+        my @oids;
+        my $tree = $self->{-tree};
         while (<$rd>) {
                 if (/^reset (?:.+)/) {
                         push @buf, "reset $tmp\n";
@@ -472,25 +493,27 @@ sub purge_oids {
                         my $n = read($rd, my $buf, $len) or die "read: $!";
                         $len == $n or die "short read ($n < $len)";
                         push @buf, $buf;
-                } elsif (/^M 100644 ([a-f0-9]+) /) {
-                        my $oid = $1;
+                } elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
+                        my ($oid, $path) = ($1, $2);
                         if ($purge->{$oid}) {
-                                my $lf = <$rd>;
-                                if ($lf eq "\n") {
-                                        my $out = join('', @buf);
-                                        $out =~ s/^/# /sgm;
-                                        warn "purge rewriting\n", $out, "\n";
-                                        clean_purge_buffer($oid, \@buf);
-                                        $out = join('', @buf);
-                                        $w->print(@buf, "\n") or wfail;
-                                        @buf = ();
-                                        $npurge++;
-                                } else {
-                                        die "expected LF: $lf\n";
-                                }
+                                push @oids, $oid;
+                                delete $tree->{$path};
                         } else {
+                                $tree->{$path} = 1;
                                 push @buf, $_;
                         }
+                } elsif (/^D (\w+)/) {
+                        my $path = $1;
+                        push @buf, $_ if $tree->{$path};
+                } elsif ($_ eq "\n") {
+                        my $out = join('', @buf);
+                        $out =~ s/^/# /sgm;
+                        warn "purge rewriting\n", $out, "\n";
+                        clean_purge_buffer(\@oids, \@buf);
+                        $out = join('', @buf);
+                        $w->print(@buf, "\n") or wfail;
+                        @buf = ();
+                        $npurge++;
                 } else {
                         push @buf, $_;
                 }
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index c4368ccc..c8869bda 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -634,15 +634,19 @@ sub reindex {
                         -d $git->{git_dir} or next; # missing parts are fine
                         chomp($tip = $git->qx('rev-parse', $head)) unless $tip;
                         my $h = $cur == $max_git ? $tip : $head;
-                        my @count = ('rev-list', '--count', $h, '--', 'm');
-                        $regen_max += $git->qx(@count);
+
+                        # can't use 'rev-list --count' if we use --diff-filter
+                        my $fh = $git->popen(qw(log --pretty=tformat:%h
+                                        --no-notes --no-color --no-renames
+                                        --diff-filter=AM), $h, '--', 'm');
+                        ++$regen_max while <$fh>;
                 }
                 die "No messages found in $pfx/*.git, bug?\n" unless $regen_max;
                 $regen = \$regen_max;
         }
         my $D = {};
         my @cmd = qw(log --raw -r --pretty=tformat:%h
-                        --no-notes --no-color --no-abbrev);
+                        --no-notes --no-color --no-abbrev --no-renames);
 
         # if we are regenerating, we must not use a newer tip commit than what
         # the regeneration counter used:
@@ -663,7 +667,7 @@ sub reindex {
                         } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) {
                                 $self->reindex_oid($mm_tmp, $D, $git, $1,
                                                 $regen);
-                        } elsif (m!\A:\d{6} 100644 $x40 ($x40) [AM]\t_/D$!o) {
+                        } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) {
                                 $self->mark_deleted($D, $git, $1);
                         }
                 }