user/dev discussion of public-inbox itself
 help / Atom feed
* [PATCH] SearchIdx: Decrement regen_down even for added messages that are later deleted.
@ 2018-07-17 22:06 ebiederm
  2018-07-18 10:15 ` Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: ebiederm @ 2018-07-17 22:06 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta


Decrement regen_down when visiting messages that appear in %D that we
know will later be deleted.  This ensures consistent message numbers are
generated no matter which commit number is on top.  Allowing deletes to
propagage separately from the messages they delete without causing
problems.

The v2 trees already do this and when the indexes are deleted and
rebuilt they maintain they commit numbers.

Add a v1 version of the v2reindex test to verify that reindexing is
working properly on v1 as well as v2.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 lib/PublicInbox/SearchIdx.pm |   7 ++-
 t/v1reindex.t                | 109 +++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 t/v1reindex.t

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 107cd3457133..0e0796c12c12 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -561,7 +561,12 @@ sub read_log {
 	while (defined($line = <$log>)) {
 		if ($line =~ /$addmsg/o) {
 			my $blob = $1;
-			delete $D{$blob} and next;
+			if (delete $D{$blob}) {
+				if (defined $self->{regen_down}) {
+					$self->{regen_down}--;
+				}
+				next;
+			}
 			my $mime = do_cat_mail($git, $blob, \$bytes) or next;
 			batch_adjust(\$max, $bytes, $batch_cb, $latest);
 			$add_cb->($self, $mime, $bytes, $blob);
diff --git a/t/v1reindex.t b/t/v1reindex.t
new file mode 100644
index 000000000000..7b8d883753ee
--- /dev/null
+++ b/t/v1reindex.t
@@ -0,0 +1,109 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::MIME;
+use PublicInbox::ContentId qw(content_digest);
+use File::Temp qw/tempdir/;
+use File::Path qw(remove_tree);
+
+foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for v1reindex.t" if $@;
+}
+use_ok 'PublicInbox::SearchIdx';
+use_ok 'PublicInbox::Import';
+my $mainrepo = tempdir('pi-v1reindex-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+is(system(qw(git init --bare), $mainrepo), 0);
+my $ibx_config = {
+	mainrepo => $mainrepo,
+	name => 'test-v1reindex',
+	-primary_address => 'test@example.com',
+};
+my $ibx = PublicInbox::Inbox->new($ibx_config);
+my $mime = PublicInbox::MIME->create(
+	header => [
+		From => 'a@example.com',
+		To => 'test@example.com',
+		Subject => 'this is a subject',
+		Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
+	],
+	body => "hello world\n",
+);
+my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
+foreach my $i (1..10) {
+	$mime->header_set('Message-Id', "<$i\@example.com>");
+	ok($im->add($mime), "message $i added");
+	if ($i == 4) {
+		$im->remove($mime);
+	}
+}
+
+if ('test remove later') {
+	$mime->header_set('Message-Id', "<5\@example.com>");
+	$im->remove($mime);
+}
+
+$im->done;
+my $rw = PublicInbox::SearchIdx->new($ibx, 1);
+eval { $rw->index_sync() };
+is($@, '', 'no error from indexing');
+
+my $minmax = [ $ibx->mm->minmax ];
+ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined');
+is_deeply($minmax, [ 1, 10 ], 'minmax as expected');
+
+$rw = PublicInbox::SearchIdx->new($ibx, 1);
+eval { $rw->index_sync({reindex => 1}) };
+is($@, '', 'no error from reindexing');
+$im->done;
+
+my $xap = "$mainrepo/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION();
+remove_tree($xap);
+ok(!-d $xap, 'Xapian directories removed');
+$rw = PublicInbox::SearchIdx->new($ibx, 1);
+
+eval { $rw->index_sync({reindex => 1}) };
+is($@, '', 'no error from reindexing');
+$im->done;
+ok(-d $xap, 'Xapian directories recreated');
+
+delete $ibx->{mm};
+is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
+
+ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap');
+remove_tree($xap);
+$rw = PublicInbox::SearchIdx->new($ibx, 1);
+
+ok(!-d $xap, 'Xapian directories removed again');
+{
+	my @warn;
+	#local $SIG{__WARN__} = sub { push @warn, @_ };
+	eval { $rw->index_sync({reindex => 1}) };
+	is($@, '', 'no error from reindexing without msgmap');
+	is(scalar(@warn), 0, 'no warnings from reindexing');
+	$im->done;
+	ok(-d $xap, 'Xapian directories recreated');
+	delete $ibx->{mm};
+	is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
+}
+
+ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap');
+remove_tree($xap);
+$rw = PublicInbox::SearchIdx->new($ibx, 1);
+
+ok(!-d $xap, 'Xapian directories removed again');
+{
+	my @warn;
+	local $SIG{__WARN__} = sub { push @warn, @_ };
+	eval { $rw->index_sync({reindex => 1}) };
+	is($@, '', 'no error from reindexing without msgmap');
+	is_deeply(\@warn, [], 'no warnings');
+	$im->done;
+	ok(-d $xap, 'Xapian directories recreated');
+	delete $ibx->{mm};
+	is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
+}
+
+done_testing();
-- 
2.17.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] SearchIdx: Decrement regen_down even for added messages that are later deleted.
  2018-07-17 22:06 [PATCH] SearchIdx: Decrement regen_down even for added messages that are later deleted ebiederm
@ 2018-07-18 10:15 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2018-07-18 10:15 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: meta

"Eric W. Biederman" <ebiederm@xmission.com> wrote:
> Decrement regen_down when visiting messages that appear in %D that we
> know will later be deleted.  This ensures consistent message numbers are
> generated no matter which commit number is on top.  Allowing deletes to
> propagage separately from the messages they delete without causing
> problems.

Thanks!  I completely forgot I did regen_down for
017fed7bc4d33ac474a19356994be5bd0bfe68ba

> The v2 trees already do this and when the indexes are deleted and
> rebuilt they maintain they commit numbers.
> 
> Add a v1 version of the v2reindex test to verify that reindexing is
> working properly on v1 as well as v2.

I noticed a few minor nits in the test, so I will squash the
following in to quiet down git-init and ensure warnings are
tested:

diff --git a/t/v1reindex.t b/t/v1reindex.t
index 7b8d883..0df36d3 100644
--- a/t/v1reindex.t
+++ b/t/v1reindex.t
@@ -15,7 +15,7 @@ foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
 use_ok 'PublicInbox::SearchIdx';
 use_ok 'PublicInbox::Import';
 my $mainrepo = tempdir('pi-v1reindex-XXXXXX', TMPDIR => 1, CLEANUP => 1);
-is(system(qw(git init --bare), $mainrepo), 0);
+is(system(qw(git init -q --bare), $mainrepo), 0);
 my $ibx_config = {
 	mainrepo => $mainrepo,
 	name => 'test-v1reindex',
@@ -79,7 +79,7 @@ $rw = PublicInbox::SearchIdx->new($ibx, 1);
 ok(!-d $xap, 'Xapian directories removed again');
 {
 	my @warn;
-	#local $SIG{__WARN__} = sub { push @warn, @_ };
+	local $SIG{__WARN__} = sub { push @warn, @_ };
 	eval { $rw->index_sync({reindex => 1}) };
 	is($@, '', 'no error from reindexing without msgmap');
 	is(scalar(@warn), 0, 'no warnings from reindexing');

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-17 22:06 [PATCH] SearchIdx: Decrement regen_down even for added messages that are later deleted ebiederm
2018-07-18 10:15 ` Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox