From 017fed7bc4d33ac474a19356994be5bd0bfe68ba Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Wed, 18 Apr 2018 09:13:13 +0000 Subject: searchidx: regenerate and avoid article number gaps on full index Some messages to git@vger went missing from Msgmap from old bugs and became inaccessible via NNTP. Forcing NNTP article numbers when the overview DB came about made the problem more visible when reindexing old (v1) repositories as all removed spam messages took up AUTOINCREMENT numbers again before they were removed. Having large gaps in NNTP article numbers is not good since it throws off NNTP clients. This does NOT prevent NNTP clients from seeing some messages twice, but is better than having them miss several messages entirely. We also avoid depending on --reverse in git-log, as git requires storing an entire commit list in memory for --reverse, so it's cheaper to store only deleted blobs in the %D hash since they do not live long. --- t/v1-add-remove-add.t | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 t/v1-add-remove-add.t (limited to 't') diff --git a/t/v1-add-remove-add.t b/t/v1-add-remove-add.t new file mode 100644 index 00000000..cd6e2811 --- /dev/null +++ b/t/v1-add-remove-add.t @@ -0,0 +1,45 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use PublicInbox::MIME; +use PublicInbox::Import; +use PublicInbox::SearchIdx; +use File::Temp qw/tempdir/; + +foreach my $mod (qw(DBD::SQLite Search::Xapian)) { + eval "require $mod"; + plan skip_all => "$mod missing for v1-add-remove-add.t" if $@; +} +my $mainrepo = tempdir('pi-add-remove-add-XXXXXX', TMPDIR => 1, CLEANUP => 1); +is(system(qw(git init --bare), $mainrepo), 0); +my $ibx = { + mainrepo => $mainrepo, + name => 'test-add-remove-add', + -primary_address => 'test@example.com', +}; +$ibx = PublicInbox::Inbox->new($ibx); +my $mime = PublicInbox::MIME->create( + header => [ + From => 'a@example.com', + To => 'test@example.com', + Subject => 'this is a subject', + Date => 'Fri, 02 Oct 1993 00:00:00 +0000', + 'Message-ID' => '', + ], + body => "hello world\n", +); +my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); +ok($im->add($mime), 'message added'); +ok($im->remove($mime), 'message added'); +ok($im->add($mime), 'message added again'); +$im->done; +my $rw = PublicInbox::SearchIdx->new($ibx, 1); +$rw->index_sync; +my $msgs = $ibx->recent({limit => 10}); +is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); +is(scalar @$msgs, 1, 'only one message in history'); +is($ibx->mm->num_for('a-mid@b'), 2, 'exists with second article number'); + +done_testing(); -- cgit v1.2.3-24-ge0c7