From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E66EF1FAEB; Thu, 29 Mar 2018 10:28:30 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Cc: "Eric Wong (Contractor, The Linux Foundation)" Subject: [PATCH 08/14] search: cleanup uniqueness checking Date: Thu, 29 Mar 2018 10:28:13 +0000 Message-Id: <20180329102819.15234-9-e@80x24.org> In-Reply-To: <20180329102819.15234-1-e@80x24.org> References: <20180329102819.15234-1-e@80x24.org> List-Id: The only Xapian term which should be unique is the NNTP article number; so we no longer need find_unique_doc_id. --- lib/PublicInbox/Search.pm | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index a4e2498..584a508 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -396,9 +396,16 @@ sub lookup_article { retry_reopen($self, sub { my $db = $self->{skel} || $self->{xdb}; my $head = $db->postlist_begin($term); - return if $head == $db->postlist_end($term); + my $tail = $db->postlist_end($term); + return if $head->equal($tail); my $doc_id = $head->get_docid; return unless defined $doc_id; + $head->inc; + if ($head->nequal($tail)) { + my $loc= $self->{mainrepo} . + ($self->{skel} ? 'skel' : 'xdb'); + warn "article #$num is not unique in $loc\n"; + } # raises on error: my $doc = $db->get_document($doc_id); $smsg = PublicInbox::SearchMsg->wrap($doc); @@ -432,21 +439,6 @@ sub each_smsg_by_mid { } } -sub find_unique_doc_id { - my ($self, $termval) = @_; - - my ($begin, $end) = $self->find_doc_ids($termval); - - return undef if $begin->equal($end); # not found - - my $rv = $begin->get_docid; - - # sanity check - $begin->inc; - $begin->equal($end) or die "Term '$termval' is not unique\n"; - $rv; -} - # returns begin and end PostingIterator sub find_doc_ids { my ($self, $termval) = @_; -- EW