From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 971551FAF7 for ; Tue, 6 Mar 2018 08:42:43 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 20/34] searchidx: add NNTP article number as a searchable term Date: Tue, 6 Mar 2018 08:42:28 +0000 Message-Id: <20180306084242.19988-21-e@80x24.org> In-Reply-To: <20180306084242.19988-1-e@80x24.org> References: <20180306084242.19988-1-e@80x24.org> List-Id: Since we support duplicate MIDs in v2, the NNTP article number becomes the true unique identifier and we want a way to do fast lookups on it. While we're at it, stop putting XPATH in the term partitions since we only need it in the skeleton DB. --- lib/PublicInbox/SearchIdx.pm | 8 +++++--- lib/PublicInbox/SearchIdxSkeleton.pm | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f63e072..3ef444d 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -322,7 +322,6 @@ sub add_message { if ($subj ne '') { $xpath = $self->subject_path($subj); $xpath = id_compress($xpath); - $doc->add_boolean_term('XPATH' . $xpath); } my $lines = $mime->body_raw =~ tr!\n!\n!; @@ -398,7 +397,8 @@ sub add_message { $skel->index_skeleton(\@values); $doc_id = $self->{xdb}->add_document($doc); } else { - $doc_id = link_and_save($self, $doc, $mids, $refs); + $doc_id = link_and_save($self, $doc, $mids, $refs, + $num, $xpath); } }; @@ -504,10 +504,12 @@ sub link_doc { } sub link_and_save { - my ($self, $doc, $mids, $refs) = @_; + my ($self, $doc, $mids, $refs, $num, $xpath) = @_; my $db = $self->{xdb}; my $old_tid; my $doc_id; + $doc->add_boolean_term('XNUM' . $num) if defined $num; + $doc->add_boolean_term('XPATH' . $xpath) if defined $xpath; my $vivified = 0; foreach my $mid (@$mids) { $self->each_smsg_by_mid($mid, sub { diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm index 3fe6a4a..4066b59 100644 --- a/lib/PublicInbox/SearchIdxSkeleton.pm +++ b/lib/PublicInbox/SearchIdxSkeleton.pm @@ -98,7 +98,6 @@ sub index_skeleton_real ($$) { my $ts = $values->[PublicInbox::Search::TS]; my $smsg = PublicInbox::SearchMsg->new(undef); my $doc = $smsg->{doc}; - $doc->add_boolean_term('XPATH' . $xpath) if defined $xpath; foreach my $mid (@$mids) { $doc->add_term('Q' . $mid); } @@ -106,8 +105,9 @@ sub index_skeleton_real ($$) { $doc->set_data($doc_data); $smsg->{ts} = $ts; $smsg->load_from_data($doc_data); + my $num = $values->[PublicInbox::Search::NUM]; my @refs = ($smsg->references =~ /<([^>]+)>/g); - $self->link_and_save($doc, $mids, \@refs); + $self->link_and_save($doc, $mids, \@refs, $num, $xpath); } 1; -- EW