From b02c7a346bd36f1325518ca110a781c619082da1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 24 Jul 2020 05:55:47 +0000 Subject: index: support --rethread switch to fix old indices Older versions of public-inbox < 1.3.0 had subtly different semantics around threading in some corner cases. This switch (when combined with --reindex) allows us to fix them by regenerating associations. --- t/v1reindex.t | 34 ++++++++++++++++++++++++++++++++++ t/v2reindex.t | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 't') diff --git a/t/v1reindex.t b/t/v1reindex.t index 9f23ef01..8cb75188 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -11,6 +11,7 @@ require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::SearchIdx'; use_ok 'PublicInbox::Import'; +use_ok 'PublicInbox::OverIdx'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx_config = { inboxdir => $inboxdir, @@ -427,5 +428,38 @@ ok(!-d $xap, 'Xapian directories removed again'); ], 'msgmap as expected' ); } +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my $ibx = PublicInbox::Inbox->new({ %$ibx_config }); + my $f = $ibx->over->{dbh}->sqlite_db_filename; + my $over = PublicInbox::OverIdx->new($f); + my $dbh = $over->connect; + my $non_ghost_tids = sub { + $dbh->selectall_arrayref(<<''); +SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC + + }; + my $before = $non_ghost_tids->(); + + # mess up threading: + my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread'); + my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid); + + my $rw = PublicInbox::SearchIdx->new($ibx, 1); + my @pr; + my $pr = sub { push @pr, @_ }; + $rw->index_sync({reindex => 1, rethread => 1, -progress => $pr }); + my @n = $dbh->selectrow_array(<(); + ok($after->[0]->[0] > $before->[-1]->[0], + 'all tids greater than before'); + is(scalar @$after, scalar @$before, 'thread count unchanged'); + is_deeply([], \@warn, 'no warnings'); + # diag "@pr"; # XXX do we care? +} done_testing(); diff --git a/t/v2reindex.t b/t/v2reindex.t index 77deffb4..ea2b24e5 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -10,6 +10,7 @@ use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; +use_ok 'PublicInbox::OverIdx'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx_config = { inboxdir => $inboxdir, @@ -423,6 +424,46 @@ ok(!-d $xap, 'Xapian directories removed again'); ], 'msgmap as expected' ); } +my $check_rethread = sub { + my ($desc) = @_; + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + my %config = %$ibx_config; + my $ibx = PublicInbox::Inbox->new(\%config); + my $f = $ibx->over->{dbh}->sqlite_db_filename; + my $over = PublicInbox::OverIdx->new($f); + my $dbh = $over->connect; + my $non_ghost_tids = sub { + $dbh->selectall_arrayref(<<''); +SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC + + }; + my $before = $non_ghost_tids->(); + + # mess up threading: + my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread'); + my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid); + diag "messing up all threads with tid=$tid"; + + my $v2w = PublicInbox::V2Writable->new($ibx); + my @pr; + my $pr = sub { push @pr, @_ }; + $v2w->index_sync({reindex => 1, rethread => 1, -progress => $pr}); + # diag "@pr"; # nobody cares + is_deeply(\@warn, [], 'no warnings on reindex + rethread'); + + my @n = $dbh->selectrow_array(<(); + ok($after->[0]->[0] > $before->[-1]->[0], + 'all tids greater than before'); + is(scalar @$after, scalar @$before, 'thread count unchanged'); +}; + +$check_rethread->('no-monster'); + # A real example from linux-renesas-soc on lore where a 3-headed monster # of a message has 3 sets of common headers. Another normal message # previously existed with a single Message-ID that conflicts with one @@ -497,4 +538,8 @@ EOF is_deeply([values %uniq], [3], 'search on different subjects'); } +# XXX: not deterministic when dealing with ambiguous messages, oh well +$check_rethread->('3-headed-monster once'); +$check_rethread->('3-headed-monster twice'); + done_testing(); -- cgit v1.2.3-24-ge0c7