From 1b7e935ab1690e28d790d8db5af2714f13c258cc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 14 May 2019 02:04:43 +0000 Subject: searchidx: fix incremental index with indexlevel=basic on v1 We were reindexing the full history every invocation of -index when Xapian was not used because we were incorrectly relying on 'last_commit' metadata stored in Xapian. Rewrite the indexing logic to be less confusing while we're at it, since we rely on `git merge-base --is-ancestor' nowadays. Furthermore, we need to handle message removals from the overview index correctly when Xapian is not in use. Co-authored-by: Eric W. Biederman --- t/indexlevels-mirror.t | 125 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 t/indexlevels-mirror.t (limited to 't') diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t new file mode 100644 index 00000000..e25b827f --- /dev/null +++ b/t/indexlevels-mirror.t @@ -0,0 +1,125 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use PublicInbox::MIME; +use PublicInbox::Inbox; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); +my $this = (split('/', __FILE__))[-1]; + +# TODO: remove Search::Xapian as a requirement for basic +foreach my $mod (qw(DBD::SQLite Search::Xapian)) { + eval "require $mod"; + plan skip_all => "$mod missing for $this" if $@; +} + +my $path = 'blib/script'; +my $index = "$path/public-inbox-index"; + +my $mime = PublicInbox::MIME->create( + header => [ + From => 'a@example.com', + To => 'test@example.com', + Subject => 'this is a subject', + Date => 'Fri, 02 Oct 1993 00:00:00 +0000', + ], + body => "hello world\n", +); + +sub import_index_incremental { + my ($v, $level) = @_; + my $tmpdir = tempdir("pi-$this-tmp-XXXXXX", TMPDIR => 1, CLEANUP => 1); + my $ibx = PublicInbox::Inbox->new({ + mainrepo => "$tmpdir/testbox", + name => "$this-$v", + version => $v, + -primary_address => 'test@example.com', + indexlevel => $level, + }); + my $cls = "PublicInbox::V${v}Writable"; + use_ok $cls; + my $im = $cls->new($ibx, {nproc=>1}); + $mime->header_set('Message-ID', ''); + ok($im->add($mime), 'first message added'); + $im->done; + + # index master (required for v1) + is(system($index, $ibx->{mainrepo}), 0, 'index master OK'); + my $ro_master = PublicInbox::Inbox->new({mainrepo => $ibx->{mainrepo}}); + my ($nr, $msgs) = $ro_master->recent; + is($nr, 1, 'only one message in master, so far'); + is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed'); + + # clone + my @cmd = (qw(git clone --mirror -q)); + my $mirror = "$tmpdir/mirror-$v"; + if ($v == 1) { + push @cmd, $ibx->{mainrepo}, $mirror; + } else { + push @cmd, "$ibx->{mainrepo}/git/0.git", "$mirror/git/0.git"; + } + my $fetch_dir = $cmd[-1]; + is(system(@cmd), 0, "v$v clone OK"); + + # inbox init + local $ENV{PI_CONFIG} = "$tmpdir/.picfg"; + @cmd = ("$path/public-inbox-init", '-L', $level, + 'mirror', $mirror, '//example.com/test', 'test@example.com'); + push @cmd, '-V2' if $v == 2; + is(system(@cmd), 0, "v$v init OK"); + + # index mirror + is(system($index, $mirror), 0, "v$v index mirror OK"); + + # read-only access + my $ro_mirror = PublicInbox::Inbox->new({mainrepo => $mirror}); + ($nr, $msgs) = $ro_mirror->recent; + is($nr, 1, 'only one message, so far'); + is($msgs->[0]->{mid}, 'm@1', 'read first message'); + + # update master + $mime->header_set('Message-ID', ''); + ok($im->add($mime), '2nd message added'); + $im->done; + + # mirror updates + is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(system($index, $mirror), 0, "v$v index mirror again OK"); + ($nr, $msgs) = $ro_mirror->recent; + is($nr, 2, '2nd message seen in mirror'); + is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], + ['m@1','m@2'], 'got both messages in mirror'); + + # incremental index master (required for v1) + is(system($index, $ibx->{mainrepo}), 0, 'index master OK'); + ($nr, $msgs) = $ro_master->recent; + is($nr, 2, '2nd message seen in master'); + is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], + ['m@1','m@2'], 'got both messages in master'); + + # remove message from master + ok($im->remove($mime), '2nd message removed'); + $im->done; + + # sync the mirror + is(system('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); + is(system($index, $mirror), 0, "v$v index mirror again OK"); + ($nr, $msgs) = $ro_mirror->recent; + is($nr, 1, '2nd message gone from mirror'); + is_deeply([map { $_->{mid} } @$msgs], ['m@1'], + 'message unavailable in mirror'); +} + +# we can probably cull some other tests and put full/medium tests, here +for my $level (qw(basic)) { + for my $v (1..2) { + subtest("v$v indexlevel=$level" => sub { + import_index_incremental($v, $level); + }) + } +} + +done_testing(); -- cgit v1.2.3-24-ge0c7