From fc62e40e4e3d00a00377ba26aeca010880158caa Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Fri, 16 Feb 2018 02:59:11 +0000 Subject: search: stop assuming Message-ID is unique In general, they are, but there's no way for or general purpose mail server to enforce that. This is a step in allowing us to handle more corner cases which existing lists throw at us. --- lib/PublicInbox/ExtMsg.pm | 2 +- lib/PublicInbox/Search.pm | 14 ++++++++++++-- lib/PublicInbox/SearchIdx.pm | 10 ++++++---- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 4e31ef0f..90d68db8 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -46,7 +46,7 @@ sub ext_msg { } # try to find the URL with Xapian to avoid forking - my $doc_id = eval { $s->find_unique_doc_id('XMID' . $mid) }; + my $doc_id = eval { $s->find_first_doc_id('XMID' . $mid) }; if ($@) { # xapian not configured properly for this repo push @nox, $other; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 3ec96ca9..33a1f2d3 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -56,7 +56,7 @@ my %bool_pfx_internal = ( ); my %bool_pfx_external = ( - mid => 'XMID', # uniQue id (Message-ID) + mid => 'XMID', # Message-ID (full/exact) ); my %prob_prefix = ( @@ -285,7 +285,7 @@ sub lookup_message { my ($self, $mid) = @_; $mid = mid_clean($mid); - my $doc_id = $self->find_unique_doc_id('XMID' . $mid); + my $doc_id = $self->find_first_doc_id('XMID' . $mid); my $smsg; if (defined $doc_id) { # raises on error: @@ -327,6 +327,16 @@ sub find_doc_ids { ($db->postlist_begin($termval), $db->postlist_end($termval)); } +sub find_first_doc_id { + my ($self, $termval) = @_; + + my ($begin, $end) = $self->find_doc_ids($termval); + + return undef if $begin->equal($end); # not found + + $begin->get_docid; +} + # normalize subjects so they are suitable as pathnames for URLs # XXX: consider for removal sub subject_path { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index fa5057fd..265403a3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -366,12 +366,14 @@ sub remove_message { $mid = mid_clean($mid); eval { - $doc_id = $self->find_unique_doc_id('XMID' . $mid); - if (defined $doc_id) { - $db->delete_document($doc_id); - } else { + my ($head, $tail) = $self->find_doc_ids('XMID' . $mid); + if ($head->equal($tail)) { warn "cannot remove non-existent <$mid>\n"; } + for (; $head != $tail; $head->inc) { + my $docid = $head->get_docid; + $db->delete_document($docid); + } }; if ($@) { -- cgit v1.2.3-24-ge0c7