user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/2] fix v2 mirrors of reused Message-IDs
@ 2021-10-18  5:09 Eric Wong
  2021-10-18  5:09 ` [PATCH 1/2] extindex: show mismatches for messages deleted from inbox Eric Wong
  2021-10-18  5:09 ` [PATCH 2/2] v2: mirrors don't clobber msgs w/ reused Message-IDs Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2021-10-18  5:09 UTC (permalink / raw)
  To: meta

Eeep! :<

Eric Wong (2):
  extindex: show mismatches for messages deleted from inbox
  v2: mirrors don't clobber msgs w/ reused Message-IDs

 MANIFEST                        |  1 +
 lib/PublicInbox/ExtSearchIdx.pm | 14 ++++++++++---
 lib/PublicInbox/V2Writable.pm   |  7 ++++++-
 t/v2index-late-dupe.t           | 37 +++++++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100644 t/v2index-late-dupe.t

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] extindex: show mismatches for messages deleted from inbox
  2021-10-18  5:09 [PATCH 0/2] fix v2 mirrors of reused Message-IDs Eric Wong
@ 2021-10-18  5:09 ` Eric Wong
  2021-10-18  5:09 ` [PATCH 2/2] v2: mirrors don't clobber msgs w/ reused Message-IDs Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2021-10-18  5:09 UTC (permalink / raw)
  To: meta

There seems to be a bug in v2 inbox reindexing somewhere...
---
 lib/PublicInbox/ExtSearchIdx.pm | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index f479cf9e1a3f..4b46fa1622ea 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -292,8 +292,8 @@ sub ck_existing { # git->cat_async callback
 
 # is the messages visible in the inbox currently being indexed?
 # return the number if so
-sub cur_ibx_xnum ($$) {
-	my ($req, $bref) = @_;
+sub cur_ibx_xnum ($$;$) {
+	my ($req, $bref, $mismatch) = @_;
 	my $ibx = $req->{ibx} or die 'BUG: current {ibx} missing';
 
 	$req->{eml} = PublicInbox::Eml->new($bref);
@@ -303,6 +303,7 @@ sub cur_ibx_xnum ($$) {
 		my ($id, $prev);
 		while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
 			return $x->{num} if $x->{blob} eq $req->{oid};
+			push @$mismatch, $x if $mismatch;
 		}
 	}
 	undef;
@@ -317,8 +318,15 @@ sub index_oid { # git->cat_async callback for 'm'
 		blob => $oid,
 	}, 'PublicInbox::Smsg';
 	$new_smsg->set_bytes($$bref, $size);
-	defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return;
 	++${$req->{nr}};
+	my $mismatch = [];
+	$req->{xnum} = cur_ibx_xnum($req, $bref, $mismatch) // do {
+		warn "# deleted\n";
+		warn "# mismatch $_->{blob}\n" for @$mismatch;
+		${$req->{latest_cmt}} = $req->{cur_cmt} //
+			die "BUG: {cur_cmt} unset ($oid)\n";
+		return;
+	};
 	do_step($req);
 }
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] v2: mirrors don't clobber msgs w/ reused Message-IDs
  2021-10-18  5:09 [PATCH 0/2] fix v2 mirrors of reused Message-IDs Eric Wong
  2021-10-18  5:09 ` [PATCH 1/2] extindex: show mismatches for messages deleted from inbox Eric Wong
@ 2021-10-18  5:09 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2021-10-18  5:09 UTC (permalink / raw)
  To: meta

For odd messages with reused Message-IDs, the second message
showing up in a mirror (via git-fetch + -index) should never
clobber an entry with a different blob in over.

This is noticeable only if the messages arrive in-between
indexing runs.

Fixes: 4441a38481ed ("v2: index forwards (via `git log --reverse')")
---
 MANIFEST                      |  1 +
 lib/PublicInbox/V2Writable.pm |  7 ++++++-
 t/v2index-late-dupe.t         | 37 +++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 t/v2index-late-dupe.t

diff --git a/MANIFEST b/MANIFEST
index b5aae77747dd..af1522d71bd1 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -552,6 +552,7 @@ t/v1-add-remove-add.t
 t/v1reindex.t
 t/v2-add-remove-add.t
 t/v2dupindex.t
+t/v2index-late-dupe.t
 t/v2mda.t
 t/v2mirror.t
 t/v2reindex.t
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 3914383cc9d3..ed5182ae8460 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -813,8 +813,8 @@ sub index_oid { # cat_async callback
 			}
 		}
 	}
+	my $oidx = $self->{oidx};
 	if (!defined($num)) { # reuse if reindexing (or duplicates)
-		my $oidx = $self->{oidx};
 		for my $mid (@$mids) {
 			($num, $mid0) = $oidx->num_mid0_for_oid($oid, $mid);
 			last if defined $num;
@@ -822,6 +822,11 @@ sub index_oid { # cat_async callback
 	}
 	$mid0 //= do { # is this a number we got before?
 		$num = $arg->{mm_tmp}->num_for($mids->[0]);
+
+		# don't clobber existing if Message-ID is reused:
+		if (my $x = defined($num) ? $oidx->get_art($num) : undef) {
+			undef($num) if $x->{blob} ne $oid;
+		}
 		defined($num) ? $mids->[0] : undef;
 	};
 	if (!defined($num)) {
diff --git a/t/v2index-late-dupe.t b/t/v2index-late-dupe.t
new file mode 100644
index 000000000000..c83e3409044f
--- /dev/null
+++ b/t/v2index-late-dupe.t
@@ -0,0 +1,37 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# this simulates a mirror path: git fetch && -index
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+use Test::More; # redundant, used for bisect
+require_mods 'v2';
+require PublicInbox::Import;
+require PublicInbox::Inbox;
+require PublicInbox::Git;
+my ($tmpdir, $for_destroy) = tmpdir();
+my $inboxdir = "$tmpdir/i";
+PublicInbox::Import::init_bare(my $e0 = "$inboxdir/git/0.git");
+open my $fh, '>', "$inboxdir/inbox.lock" or xbail $!;
+my $git = PublicInbox::Git->new($e0);
+my $im = PublicInbox::Import->new($git, qw(i i@example.com));
+$im->{lock_path} = undef;
+$im->{path_type} = 'v2';
+my $eml = eml_load('t/plack-qp.eml');
+ok($im->add($eml), 'add original');
+$im->done;
+run_script([qw(-index -Lbasic), $inboxdir]);
+is($?, 0, 'basic index');
+my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir });
+my $orig = $ibx->over->get_art(1);
+
+my @mid = $eml->header_raw('Message-ID');
+$eml->header_set('Message-ID', @mid, '<extra@z>');
+ok($im->add($eml), 'add another');
+$im->done;
+run_script([qw(-index -Lbasic), $inboxdir]);
+is($?, 0, 'basic index again');
+
+my $after = $ibx->over->get_art(1);
+is_deeply($after, $orig, 'original unchanged') or note explain([$orig,$after]);
+
+done_testing;

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-10-18  5:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-18  5:09 [PATCH 0/2] fix v2 mirrors of reused Message-IDs Eric Wong
2021-10-18  5:09 ` [PATCH 1/2] extindex: show mismatches for messages deleted from inbox Eric Wong
2021-10-18  5:09 ` [PATCH 2/2] v2: mirrors don't clobber msgs w/ reused Message-IDs Eric Wong

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).