From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id CF6841FAE2 for ; Wed, 4 Apr 2018 21:25:00 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 2/4] import: rewrite less history during purge Date: Wed, 4 Apr 2018 21:24:58 +0000 Message-Id: <20180404212500.1859-3-e@80x24.org> In-Reply-To: <20180404212500.1859-1-e@80x24.org> References: <20180404212500.1859-1-e@80x24.org> List-Id: We do not need to rewrite old commits unaffected by the object_id purge, only newer commits. This was a state management bug :x We will also return the new commit ID of rewritten history to aid in incremental indexing of mirrors for the next change. --- lib/PublicInbox/Import.pm | 25 ++++++++++++++++++------- lib/PublicInbox/V2Writable.pm | 6 ++++-- t/v2writable.t | 3 ++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index b2aae9a..73290ee 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -476,6 +476,7 @@ sub purge_oids { my @buf; my $npurge = 0; my @oids; + my ($done, $mark); my $tree = $self->{-tree}; while (<$rd>) { if (/^reset (?:.+)/) { @@ -506,14 +507,20 @@ sub purge_oids { my $path = $1; push @buf, $_ if $tree->{$path}; } elsif ($_ eq "\n") { - my $out = join('', @buf); - $out =~ s/^/# /sgm; - warn "purge rewriting\n", $out, "\n"; - clean_purge_buffer(\@oids, \@buf); - $out = join('', @buf); + if (@oids) { + my $out = join('', @buf); + $out =~ s/^/# /sgm; + warn "purge rewriting\n", $out, "\n"; + clean_purge_buffer(\@oids, \@buf); + $npurge++; + } $w->print(@buf, "\n") or wfail; @buf = (); - $npurge++; + } elsif ($_ eq "done\n") { + $done = 1; + } elsif (/^mark :(\d+)$/) { + push @buf, $_; + $mark = $1; } else { push @buf, $_; } @@ -521,7 +528,9 @@ sub purge_oids { if (@buf) { $w->print(@buf) or wfail; } - $w = $r = undef; + die 'done\n not seen from fast-export' unless $done; + chomp(my $cmt = $self->get_mark(":$mark")) if $npurge; + $self->{nchg} = 0; # prevent _update_git_info until update-ref: $self->done; my @git = ('git', "--git-dir=$git->{git_dir}"); @@ -540,7 +549,9 @@ sub purge_oids { $err++; } } + _update_git_info($self, 0); die "Failed to purge $err object(s)\n" if $err; + $cmt; } 1; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 479e2b5..b6532ac 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -224,11 +224,13 @@ sub purge_oids { my ($self, $purge) = @_; # $purge = { $object_id => 1, ... } $self->done; my $pfx = "$self->{-inbox}->{mainrepo}/git"; + my $purges = []; foreach my $i (0..$self->{max_git}) { my $git = PublicInbox::Git->new("$pfx/$i.git"); my $im = $self->import_init($git, 0); - $im->purge_oids($purge); + $purges->[$i] = $im->purge_oids($purge); } + $purges; } sub remove_internal { @@ -285,7 +287,7 @@ sub remove_internal { $self->barrier; } if ($purge && scalar keys %$purge) { - purge_oids($self, $purge); + return purge_oids($self, $purge); } $removed; } diff --git a/t/v2writable.t b/t/v2writable.t index 2f83977..e49c06b 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -248,7 +248,8 @@ EOF { ok($im->add($mime), 'add message to be purged'); local $SIG{__WARN__} = sub {}; - ok($im->purge($mime), 'purged message'); + ok(my $cmts = $im->purge($mime), 'purged message'); + like($cmts->[0], qr/\A[a-f0-9]{40}\z/, 'purge returned current commit'); $im->done; } -- EW