From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0C3461FAE4 for ; Wed, 4 Apr 2018 21:25:01 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 3/4] v2: support incremental indexing + purge Date: Wed, 4 Apr 2018 21:24:59 +0000 Message-Id: <20180404212500.1859-4-e@80x24.org> In-Reply-To: <20180404212500.1859-1-e@80x24.org> References: <20180404212500.1859-1-e@80x24.org> List-Id: This is important for people running mirrors via "git fetch", as they need to be kept up-to-date. Purging is also now supported in mirrors. The short-lived "--regenerate" option is gone and is now implicitly enabled as a result. It's still cheap when article number regeneration is unnecessary, as we track the range for each git repository. --- MANIFEST | 1 + lib/PublicInbox/Import.pm | 3 +- lib/PublicInbox/Msgmap.pm | 5 + lib/PublicInbox/V2Writable.pm | 275 ++++++++++++++++++++++++++++++++---------- script/public-inbox-convert | 2 +- script/public-inbox-index | 15 +-- t/v2mirror.t | 176 +++++++++++++++++++++++++++ t/v2reindex.t | 10 +- 8 files changed, 406 insertions(+), 81 deletions(-) create mode 100644 t/v2mirror.t diff --git a/MANIFEST b/MANIFEST index 2dad988..b17f1be 100644 --- a/MANIFEST +++ b/MANIFEST @@ -193,6 +193,7 @@ t/time.t t/utf8.mbox t/v2-add-remove-add.t t/v2mda.t +t/v2mirror.t t/v2reindex.t t/v2writable.t t/view.t diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 73290ee..2529798 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -192,6 +192,7 @@ sub get_mark { my ($r, $w) = $self->gfi_start; print $w "get-mark $mark\n" or wfail; defined(my $oid = <$r>) or die "get-mark failed, need git 2.6.0+\n"; + chomp($oid); $oid; } @@ -379,7 +380,7 @@ sub add { # v2: we need this for Xapian if ($self->{want_object_info}) { - chomp(my $oid = $self->get_mark(":$blob")); + my $oid = $self->get_mark(":$blob"); $self->{last_object} = [ $oid, $n, \$str ]; } my $ref = $self->{ref}; diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index c6a7315..5c37e16 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -92,6 +92,11 @@ sub last_commit { $self->meta_accessor('last_commit', $commit); } +sub last_commit_n { + my ($self, $i, $commit) = @_; + $self->meta_accessor('last_commit'.$i, $commit); +} + sub created_at { my ($self, $second) = @_; $self->meta_accessor('created_at', $second); diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index b6532ac..5b4d9c0 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -15,6 +15,7 @@ use PublicInbox::ContentId qw(content_id content_digest); use PublicInbox::Inbox; use PublicInbox::OverIdxFork; use PublicInbox::Msgmap; +use PublicInbox::Spawn; use IO::Handle; # an estimate of the post-packed size to the raw uncompressed size @@ -63,6 +64,7 @@ sub new { lock_path => "$dir/inbox.lock", # limit each repo to 1GB or so rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR), + last_commit => [], }; bless $self, $class; } @@ -105,6 +107,7 @@ sub add { my $nparts = $self->{partitions}; my $part = $num % $nparts; + $self->{last_commit}->[$self->{max_git}] = $cmt; my $idx = $self->idx_part($part); $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime); my $n = $self->{transact_bytes} += $len; @@ -112,7 +115,7 @@ sub add { $self->checkpoint; } - $mime; + $cmt; } sub num_for { @@ -227,7 +230,7 @@ sub purge_oids { my $purges = []; foreach my $i (0..$self->{max_git}) { my $git = PublicInbox::Git->new("$pfx/$i.git"); - my $im = $self->import_init($git, 0); + my $im = $self->import_init($git, 0, 1); $purges->[$i] = $im->purge_oids($purge); } $purges; @@ -250,6 +253,7 @@ sub remove_internal { # can be slightly different, so we do not need the user-supplied # message now that we have the mids and content_id $mime = undef; + my $mark; foreach my $mid (@$mids) { $srch->reopen->each_smsg_by_mid($mid, sub { @@ -272,7 +276,8 @@ sub remove_internal { if ($purge) { $purge->{$oid} = 1; } else { - $im->remove(\$orig, $cmt_msg); + ($mark, undef) = + $im->remove(\$orig, $cmt_msg); } $orig = undef; $removed->num; # memoize this for callers @@ -286,6 +291,11 @@ sub remove_internal { }); $self->barrier; } + + if (defined $mark) { + my $cmt = $im->get_mark($mark); + $self->{last_commit}->[$self->{max_git}] = $cmt; + } if ($purge && scalar keys %$purge) { return purge_oids($self, $purge); } @@ -299,18 +309,34 @@ sub remove { sub purge { my ($self, $mime) = @_; - remove_internal($self, $mime, undef, {}); + my $purges = remove_internal($self, $mime, undef, {}); + $self->idx_init if @$purges; # ->done is called on purges + for my $i (0..$#$purges) { + defined(my $cmt = $purges->[$i]) or next; + $self->{last_commit}->[$i] = $cmt; + } + $purges; } +sub set_last_commits ($) { + my ($self) = @_; + defined(my $max_git = $self->{max_git}) or return; + my $mm = $self->{mm}; + my $last_commit = $self->{last_commit}; + foreach my $i (0..$max_git) { + defined(my $cmt = $last_commit->[$i]) or next; + $last_commit->[$i] = undef; + $mm->last_commit_n($i, $cmt); + } +} sub done { my ($self) = @_; my $im = delete $self->{im}; $im->done if $im; # PublicInbox::Import::done - if (my $mm = delete $self->{mm}) { - $mm->{dbh}->commit; - } + my $mm = $self->{mm}; + $mm->{dbh}->commit if $mm; # order matters, we can only close {over} after all partitions # are done because the partitions also write to {over} @@ -323,6 +349,14 @@ sub done { my $over = $self->{over}; $over->remote_commit; $over->remote_close; + + if ($mm) { + $mm->{dbh}->begin_work; + set_last_commits($self); + $mm->{dbh}->commit; + delete $self->{mm}; + } + $self->{transact_bytes} = 0; $self->lock_release if $parts; } @@ -358,6 +392,12 @@ sub barrier { $over->barrier_wait; # wait for each Xapian partition $over->commit_fsync if $fsync; + # last_commit is special, don't commit these until + # remote partitions are done: + $dbh->begin_work; + set_last_commits($self); + $dbh->commit; + $dbh->begin_work; } $self->{transact_bytes} = 0; @@ -449,13 +489,14 @@ sub importer { } sub import_init { - my ($self, $git, $packed_bytes) = @_; + my ($self, $git, $packed_bytes, $tmp) = @_; my $im = PublicInbox::Import->new($git, undef, undef, $self->{-inbox}); $im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR); $im->{want_object_info} = 1; $im->{lock_path} = undef; $im->{path_type} = 'v2'; - $self->{im} = $im; + $self->{im} = $im unless $tmp; + $im; } # XXX experimental @@ -608,63 +649,181 @@ sub reindex_oid { } } -sub reindex { - my ($self, $regen) = @_; +# only update last_commit for $i on reindex iff newer than current +sub update_last_commit { + my ($self, $git, $i, $cmt) = @_; + my $last = $self->{mm}->last_commit_n($i); + if (defined $last && is_ancestor($git, $last, $cmt)) { + my @cmd = (qw(rev-list --count), "$last..$cmt"); + chomp(my $n = $git->qx(@cmd)); + return if $n ne '' && $n == 0; + } + $self->{mm}->last_commit_n($i, $cmt); +} + +sub git_dir_n ($$) { "$_[0]->{-inbox}->{mainrepo}/git/$_[1].git" } + +sub last_commits { + my ($self, $max_git) = @_; + my $heads = []; + for (my $i = $max_git; $i >= 0; $i--) { + $heads->[$i] = $self->{mm}->last_commit_n($i); + } + $heads; +} + +sub is_ancestor ($$$) { + my ($git, $cur, $tip) = @_; + return 0 unless $git->check($cur); + my $cmd = [ 'git', "--git-dir=$git->{git_dir}", + qw(merge-base --is-ancestor), $cur, $tip ]; + my $pid = spawn($cmd); + defined $pid or die "spawning ".join(' ', @$cmd)." failed: $!"; + waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish'; + $? == 0; +} + +sub index_prepare { + my ($self, $opts, $max_git, $ranges) = @_; + my $regen_max = 0; + my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master'; + for (my $i = $max_git; $i >= 0; $i--) { + die "already indexing!\n" if $self->{index_pipe}; + my $git_dir = git_dir_n($self, $i); + -d $git_dir or next; # missing parts are fine + my $git = PublicInbox::Git->new($git_dir); + chomp(my $tip = $git->qx('rev-parse', $head)); + my $range; + if (defined(my $cur = $ranges->[$i])) { + $range = "$cur..$tip"; + if (is_ancestor($git, $cur, $tip)) { # common case + my $n = $git->qx(qw(rev-list --count), $range); + chomp($n); + if ($n == 0) { + $ranges->[$i] = undef; + next; + } + } else { + warn <<""; +discontiguous range: $range +Rewritten history? (in $git_dir) + + my $base = $git->qx('merge-base', $tip, $cur); + chomp $base; + if ($base) { + $range = "$base..$tip"; + warn "found merge-base: $base\n" + } else { + $range = $tip; + warn <<""; +discarding history at $cur + + } + warn <<""; +reindexing $git_dir starting at +$range + + $self->{"unindex-range.$i"} = "$base..$cur"; + } + } else { + $range = $tip; # all of it + } + $ranges->[$i] = $range; + + # can't use 'rev-list --count' if we use --diff-filter + my $fh = $git->popen(qw(log --pretty=tformat:%h + --no-notes --no-color --no-renames + --diff-filter=AM), $range, '--', 'm'); + ++$regen_max while <$fh>; + } + \$regen_max; +} + +sub unindex_oid { + my ($self, $git, $oid) = @_; + my $msgref = $git->cat_file($oid); + my $mime = PublicInbox::MIME->new($msgref); + my $mids = mids($mime->header_obj); + $mime = $msgref = undef; + + foreach my $mid (@$mids) { + my %gone; + $self->{-inbox}->search->reopen->each_smsg_by_mid($mid, sub { + my ($smsg) = @_; + $smsg->load_expand; + $gone{$smsg->num} = 1 if $oid eq $smsg->{blob}; + 1; # continue + }); + my $n = scalar keys %gone; + next unless $n; + if ($n > 1) { + warn "BUG: multiple articles linked to $oid\n", + join(',',sort keys %gone), "\n"; + } + $self->{unindexed}->{$_}++ foreach keys %gone; + $_->remote_remove($oid, $mid) foreach @{$self->{idx_parts}}; + $self->{over}->remove_oid($oid, $mid); + $self->barrier; + } +} + +my $x40 = qr/[a-f0-9]{40}/; +sub unindex { + my ($self, $opts, $git, $unindex_range) = @_; + my $un = $self->{unindexed} ||= {}; # num => removal count + $self->barrier; + my $before = scalar keys %$un; + my @cmd = qw(log --raw -r + --no-notes --no-color --no-abbrev --no-renames); + my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $unindex_range); + while (<$fh>) { + /\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o or next; + $self->unindex_oid($git, $1); + } + delete $self->{reindex_pipe}; + $fh = undef; + + return unless $opts->{prune}; + my $after = scalar keys %$un; + return if $before == $after; + + # ensure any blob can not longer be accessed via dumb HTTP + PublicInbox::Import::run_die(['git', "--git-dir=$git->{git_dir}", + qw(-c gc.reflogExpire=now gc --prune=all)]); +} + +sub index_sync { + my ($self, $opts) = @_; + $opts ||= {}; my $ibx = $self->{-inbox}; - my $pfx = "$ibx->{mainrepo}/git"; my $max_git; my $latest = git_dir_latest($self, \$max_git); return unless defined $latest; - my $head = $ibx->{ref_head} || 'refs/heads/master'; $self->idx_init; # acquire lock - my $x40 = qr/[a-f0-9]{40}/; my $mm_tmp = $self->{mm}->tmp_clone; - if (!$regen) { - my (undef, $max) = $mm_tmp->minmax; - unless (defined $max) { - $regen = 1; - warn -"empty msgmap.sqlite3, regenerating article numbers\n"; - } - } - my $tip; # latest commit out of all git repos - if ($regen) { - my $regen_max = 0; - for (my $cur = $max_git; $cur >= 0; $cur--) { - die "already reindexing!\n" if $self->{reindex_pipe}; - my $git = PublicInbox::Git->new("$pfx/$cur.git"); - -d $git->{git_dir} or next; # missing parts are fine - chomp($tip = $git->qx('rev-parse', $head)) unless $tip; - my $h = $cur == $max_git ? $tip : $head; - - # can't use 'rev-list --count' if we use --diff-filter - my $fh = $git->popen(qw(log --pretty=tformat:%h - --no-notes --no-color --no-renames - --diff-filter=AM), $h, '--', 'm'); - ++$regen_max while <$fh>; - } - die "No messages found in $pfx/*.git, bug?\n" unless $regen_max; - $regen = \$regen_max; - } + my $ranges = $opts->{reindex} ? [] : $self->last_commits($max_git); + + my ($min, $max) = $mm_tmp->minmax; + my $regen = $self->index_prepare($opts, $max_git, $ranges); + $$regen += $max if $max; my $D = {}; my @cmd = qw(log --raw -r --pretty=tformat:%h --no-notes --no-color --no-abbrev --no-renames); - # if we are regenerating, we must not use a newer tip commit than what - # the regeneration counter used: - $tip ||= $head; - # work backwards through history - for (my $cur = $max_git; $cur >= 0; $cur--) { + my $last_commit = []; + for (my $i = $max_git; $i >= 0; $i--) { + my $git_dir = git_dir_n($self, $i); die "already reindexing!\n" if delete $self->{reindex_pipe}; - my $cmt; - my $git_dir = "$pfx/$cur.git"; -d $git_dir or next; # missing parts are fine my $git = PublicInbox::Git->new($git_dir); - my $h = $cur == $max_git ? $tip : $head; - my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $h); + my $unindex = delete $self->{"unindex-range.$i"}; + $self->unindex($opts, $git, $unindex) if $unindex; + defined(my $range = $ranges->[$i]) or next; + my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $range); + my $cmt; while (<$fh>) { - if (/\A$x40$/o) { + if (/\A$x40$/o && !defined($cmt)) { chomp($cmt = $_); } elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) { $self->reindex_oid($mm_tmp, $D, $git, $1, @@ -673,19 +832,10 @@ sub reindex { $self->mark_deleted($D, $git, $1); } } + $fh = undef; delete $self->{reindex_pipe}; + $self->update_last_commit($git, $i, $cmt) if defined $cmt; } - my $gaps; - if ($regen && $$regen != 0) { - warn "W: leftover article number ($$regen)\n"; - $gaps = 1; - } - my ($min, $max) = $mm_tmp->minmax; - if (defined $max) { - warn "W: leftover article numbers at $min..$max\n"; - $gaps = 1; - } - warn "W: were old git partitions deleted?\n" if $gaps; my @d = sort keys %$D; if (@d) { warn "BUG: ", scalar(@d)," unseen deleted messages marked\n"; @@ -694,6 +844,7 @@ sub reindex { warn "<$mid>\n"; } } + $self->done; } 1; diff --git a/script/public-inbox-convert b/script/public-inbox-convert index 56ac44f..9aa2781 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -125,6 +125,6 @@ my $mm = $old->mm; $mm->{dbh}->sqlite_backup_to_file("$new_dir/msgmap.sqlite3") if $mm; $v2w->done; if ($index) { - $v2w->reindex; + $v2w->index_sync; $v2w->done; } diff --git a/script/public-inbox-index b/script/public-inbox-index index 52d6ba7..73f88ac 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -23,12 +23,12 @@ if ($@) { } my $reindex; -my $regen; +my $prune; my $jobs = undef; my %opts = ( '--reindex' => \$reindex, - '--regenerate' => \$regen, '--jobs|j=i' => \$jobs, + '--prune' => \$prune, ); GetOptions(%opts) or die "bad command-line args\n$usage"; die "--jobs must be positive\n" if defined $jobs && $jobs <= 0; @@ -115,16 +115,7 @@ sub index_dir { } } } - my $mm = $repo->mm; - my (undef, $max) = $mm->minmax if $mm; - if (defined($max) && !$reindex && !$regen) { - die -"v2 inboxes may only use --reindex and/or --regenerate once\n". -"msgmap.sqlite3 is initialized\n"; - } - - $v2w->reindex($regen); - $v2w->done; + $v2w->index_sync({ reindex => $reindex, prune => $prune }); } else { my $s = PublicInbox::SearchIdx->new($repo, 1); $s->index_sync({ reindex => $reindex }); diff --git a/t/v2mirror.t b/t/v2mirror.t new file mode 100644 index 0000000..0c66aef --- /dev/null +++ b/t/v2mirror.t @@ -0,0 +1,176 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; + +# Integration tests for HTTP cloning + mirroring +foreach my $mod (qw(Plack::Util Plack::Builder Danga::Socket + HTTP::Date HTTP::Status Search::Xapian DBD::SQLite)) { + eval "require $mod"; + plan skip_all => "$mod missing for v2mirror.t" if $@; +} +use File::Temp qw/tempdir/; +use IO::Socket; +use POSIX qw(dup2); +use PublicInbox::V2Writable; +use PublicInbox::MIME; +use PublicInbox::Config; +use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD); +# FIXME: too much setup +my $tmpdir = tempdir('pi-v2mirror-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $script = 'blib/script/public-inbox'; +my $pi_config = "$tmpdir/config"; +{ + open my $fh, '>', $pi_config or die "open($pi_config): $!"; + print $fh <<"" or die "print $pi_config: $!"; +[publicinbox "v2"] + mainrepo = $tmpdir/in + address = test\@example.com + + close $fh or die "close($pi_config): $!"; +} +local $ENV{PI_CONFIG} = $pi_config; + +my $cfg = PublicInbox::Config->new($pi_config); +my $ibx = $cfg->lookup('test@example.com'); +ok($ibx, 'inbox found'); +$ibx->{version} = 2; +my $v2w = PublicInbox::V2Writable->new($ibx, 1); +ok $v2w, 'v2w loaded'; +$v2w->{parallel} = 0; +my $mime = PublicInbox::MIME->new(<<''); +From: Me +To: You +Subject: a +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +for my $i (1..9) { + $mime->header_set('Message-ID', "<$i\@example.com>"); + $mime->header_set('Subject', "subject = $i"); + ok($v2w->add($mime), "add msg $i OK"); +} +$v2w->barrier; + +my %opts = ( + LocalAddr => '127.0.0.1', + ReuseAddr => 1, + Proto => 'tcp', + Listen => 1024, +); +my ($sock, $pid); +END { kill 'TERM', $pid if defined $pid }; + +$! = 0; +$sock = IO::Socket::INET->new(%opts); +ok($sock, 'sock created'); +my $fl = fcntl($sock, F_GETFD, 0); +$pid = fork; +if ($pid == 0) { + # pretend to be systemd + fcntl($sock, F_SETFD, $fl &= ~FD_CLOEXEC); + dup2(fileno($sock), 3) or die "dup2 failed: $!\n"; + $ENV{LISTEN_PID} = $$; + $ENV{LISTEN_FDS} = 1; + exec "$script-httpd", "--stdout=$tmpdir/out", "--stderr=$tmpdir/err"; + die "FAIL: $!\n"; +} +ok(defined $pid, 'forked httpd process successfully'); +my ($host, $port) = ($sock->sockhost, $sock->sockport); +$sock = undef; + +my @cmd = (qw(git clone --mirror -q), "http://$host:$port/v2/0", + "$tmpdir/m/git/0.git"); + +is(system(@cmd), 0, 'cloned OK'); +ok(-d "$tmpdir/m/git/0.git", 'mirror OK');; + +@cmd = ("$script-init", '-V2', 'm', "$tmpdir/m", 'http://example.com/m', + 'alt@example.com'); +is(system(@cmd), 0, 'initialized public-inbox -V2'); +is(system("$script-index", "$tmpdir/m"), 0, 'indexed'); + +my $mibx = { mainrepo => "$tmpdir/m", address => 'alt@example.com' }; +$mibx = PublicInbox::Inbox->new($mibx); +is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); + +for my $i (10..15) { + $mime->header_set('Message-ID', "<$i\@example.com>"); + $mime->header_set('Subject', "subject = $i"); + ok($v2w->add($mime), "add msg $i OK"); +} +$v2w->barrier; +is(system('git', "--git-dir=$tmpdir/m/git/0.git", 'fetch', '-q'), 0, + 'fetch successful'); + +my $mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1}); +is(scalar($mset->items), 0, 'new message not found in mirror, yet'); +is(system("$script-index", "$tmpdir/m"), 0, 'index updated'); +is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); +$mset = $mibx->search->reopen->query('m:15@example.com', {mset => 1}); +is(scalar($mset->items), 1, 'found message in mirror'); + +# purge: +$mime->header_set('Message-ID', '<10@example.com>'); +$mime->header_set('Subject', 'subject = 10'); +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + ok($v2w->purge($mime), 'purge a message'); + my $warn = join('', @warn); + like($warn, qr/purge rewriting/); + my @subj = ($warn =~ m/^# subject .*$/mg); + is_deeply(\@subj, ["# subject = 10"], "only rewrote one"); +} + +$v2w->barrier; + +my $msgs = $mibx->search->{over_ro}->get_thread('10@example.com'); +my $to_purge = $msgs->[0]->{blob}; +like($to_purge, qr/\A[a-f0-9]{40,}\z/, 'read blob to be purged'); +$mset = $ibx->search->reopen->query('m:10@example.com', {mset => 1}); +is(scalar($mset->items), 0, 'purged message gone from origin'); + +is(system('git', "--git-dir=$tmpdir/m/git/0.git", 'fetch', '-q'), 0, + 'fetch successful'); +{ + open my $err, '+>', "$tmpdir/index-err" or die "open: $!"; + my $ipid = fork; + if ($ipid == 0) { + dup2(fileno($err), 2) or die "dup2 failed: $!"; + exec("$script-index", '--prune', "$tmpdir/m"); + die "exec fail: $!"; + } + ok($ipid, 'running index..'); + is(waitpid($ipid, 0), $ipid, 'index --prune done'); + is($?, 0, 'no error from index'); + ok(seek($err, 0, 0), 'rewound stderr'); + $err = eval { local $/; <$err> }; + like($err, qr/discontiguous range/, 'warned about discontiguous range'); + unlike($err, qr/fatal/, 'no scary fatal error shown'); +} + +$mset = $mibx->search->reopen->query('m:10@example.com', {mset => 1}); +is(scalar($mset->items), 0, 'purged message not found in mirror'); +is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'minmax still synced'); +for my $i ((1..9),(11..15)) { + $mset = $mibx->search->query("m:$i\@example.com", {mset => 1}); + is(scalar($mset->items), 1, "$i\@example.com remains visible"); +} +is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); + +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + $v2w->index_sync; + is_deeply(\@warn, [], 'no warnings from index_sync after purge'); +} + +$v2w->done; +ok(kill('TERM', $pid), 'killed httpd'); +$pid = undef; +waitpid(-1, 0); + +done_testing(); + +1; diff --git a/t/v2reindex.t b/t/v2reindex.t index bf44fa0..9bc271f 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -49,14 +49,14 @@ $im->done; my $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); -eval { $im->reindex }; +eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; my $xap = "$mainrepo/xap".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); -eval { $im->reindex }; +eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); @@ -70,9 +70,9 @@ ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; - eval { $im->reindex }; + eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); - like(join(' ', @warn), qr/regenerat/, 'warned about regenerating'); + is(scalar(@warn), 0, 'no warnings from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; @@ -85,7 +85,7 @@ ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; - eval { $im->reindex(my $regen = 1) }; + eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; -- EW