diff options
author | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-04-07 03:41:53 +0000 |
---|---|---|
committer | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-04-07 03:42:29 +0000 |
commit | 3348ad4b3b1a0865ee58a902953165ea0f4aa4bd (patch) | |
tree | fd17dd1b4434cad0dc211c5e890e8c0d5a0d07ce /lib/PublicInbox/SearchIdx.pm | |
parent | 42c485400522c7c255f6da11391526cb1bc5931b (diff) | |
download | public-inbox-3348ad4b3b1a0865ee58a902953165ea0f4aa4bd.tar.gz |
Since we only query the SQLite over DB for OVER/XOVER; do not need to waste space storing fields To/Cc/:bytes/:lines or the XNUM term. We only use From/Subject/References/Message-ID/:blob in various places of the PSGI code. For reindexing, we will take advantage of docid stability in "xapian-compact --no-renumber" to ensure duplicates do not show up in search results. Since the PSGI interface is the only consumer of Xapian at the moment, it has no need to search based on NNTP article number.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 37 |
1 files changed, 4 insertions, 33 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 7cfa7452..f9b40b0d 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -273,18 +273,12 @@ sub add_message { my $smsg = PublicInbox::SearchMsg->new($mime); my $doc = $smsg->{doc}; my $subj = $smsg->subject; - - $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!; - defined $bytes or $bytes = length($mime->as_string); - $smsg->{bytes} = $bytes; - add_val($doc, PublicInbox::Search::TS(), $smsg->ts); my @ds = gmtime($smsg->ds); my $yyyymmdd = strftime('%Y%m%d', @ds); add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd); my $dt = strftime('%Y%m%d%H%M%S', @ds); add_val($doc, PublicInbox::Search::DT(), $dt); - my @vals = ($smsg->{ts}, $smsg->{ds}); my $tg = $self->term_generator; @@ -333,11 +327,11 @@ sub add_message { index_body($tg, \@orig, $doc) if @orig; }); - # populates smsg->references for smsg->to_doc_data - my $data = $smsg->to_doc_data($oid, $mid0); foreach my $mid (@$mids) { $tg->index_text($mid, 1, 'XM'); } + $smsg->{to} = $smsg->{cc} = ''; + my $data = $smsg->to_doc_data($oid, $mid0); $doc->set_data($data); if (my $altid = $self->{-altid}) { foreach my $alt (@$altid) { @@ -350,24 +344,11 @@ sub add_message { } } - $self->delete_article($num) if defined $num; # for reindexing - if (my $over = $self->{over}) { - utf8::encode($data); - $data = compress($data); - my $refs = $over->parse_references($smsg, $mid0, $mids); - my $xpath; - if ($subj ne '') { - $xpath = $self->subject_path($subj); - $xpath = id_compress($xpath); - } - - push @vals, $num, $mids, $refs, $xpath, $data; - $over->add_over(\@vals); + $over->add_overview($mime, $bytes, $num, $oid, $mid0); } $doc->add_boolean_term('Q' . $_) foreach @$mids; - $doc->add_boolean_term('XNUM' . $num) if defined $num; - $doc_id = $self->{xdb}->add_document($doc); + $self->{xdb}->replace_document($doc_id = $num, $doc); }; if ($@) { @@ -419,16 +400,6 @@ sub remove_message { } } -sub delete_article { - my ($self, $num) = @_; - my $ndel = 0; - batch_do($self, 'XNUM' . $num, sub { - my ($ids) = @_; - $ndel += scalar @$ids; - $self->{xdb}->delete_document($_) for @$ids; - }); -} - # MID is a hint in V2 sub remove_by_oid { my ($self, $oid, $mid) = @_; |