diff options
-rw-r--r-- | lib/PublicInbox/OverIdx.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 45 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdxShard.pm | 16 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 8 | ||||
-rw-r--r-- | t/search-thr-index.t | 17 |
5 files changed, 66 insertions, 34 deletions
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index f49dfa00..2d71956d 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -245,15 +245,9 @@ sub subject_path ($) { } sub add_overview { - my ($self, $mime, $bytes, $num, $oid, $mid0, $times) = @_; - my $lines = $mime->body_raw =~ tr!\n!\n!; - my $smsg = bless { - mime => $mime, - mid => $mid0, - bytes => $bytes, - lines => $lines, - blob => $oid, - }, 'PublicInbox::Smsg'; + my ($self, $mime, $smsg, $times) = @_; + $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!; + $smsg->{mime} = $mime; # XXX temporary? my $hdr = $mime->header_obj; my $mids = mids_for_index($hdr); my $refs = parse_references($smsg, $hdr, $mids); @@ -268,7 +262,7 @@ sub add_overview { $dd = compress($dd); my $ds = msg_timestamp($hdr, $times->{autime}); my $ts = msg_datestamp($hdr, $times->{cotime}); - my $values = [ $ts, $ds, $num, $mids, $refs, $xpath, $dd ]; + my $values = [ $ts, $ds, $smsg->{num}, $mids, $refs, $xpath, $dd ]; add_over($self, $values); } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 32be9c3f..5ca819c3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -306,9 +306,9 @@ sub index_xapian { # msg_iter callback index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; } -sub add_xapian ($$$$$$) { - my ($self, $mime, $num, $oid, $mids, $mid0) = @_; - my $smsg = PublicInbox::Smsg->new($mime); +sub add_xapian ($$$$) { + my ($self, $mime, $smsg, $mids) = @_; + $smsg->{mime} = $mime; # XXX dangerous my $hdr = $mime->header_obj; $smsg->{ds} = msg_datestamp($hdr, $self->{autime}); $smsg->{ts} = msg_timestamp($hdr, $self->{cotime}); @@ -338,9 +338,7 @@ sub add_xapian ($$$$$$) { index_text($self, join(' ', @long), 1, 'XM'); } } - $smsg->{to} = $smsg->{cc} = ''; - $smsg->{blob} = $oid; - $smsg->{mid} = $mid0; + $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids); my $data = $smsg->to_doc_data; $doc->set_data($data); @@ -355,7 +353,7 @@ sub add_xapian ($$$$$$) { } } $doc->add_boolean_term('Q' . $_) foreach @$mids; - $self->{xdb}->replace_document($num, $doc); + $self->{xdb}->replace_document($smsg->{num}, $doc); } sub _msgmap_init ($) { @@ -369,20 +367,25 @@ sub _msgmap_init ($) { sub add_message { # mime = Email::MIME object - my ($self, $mime, $bytes, $num, $oid, $mid0) = @_; + my ($self, $mime, $smsg) = @_; my $mids = mids_for_index($mime->header_obj); - $mid0 //= $mids->[0]; # v1 compatibility - $num //= do { # v1 + $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat + $smsg->{mid} //= $mids->[0]; # v1 compatibility + $smsg->{num} //= do { # v1 _msgmap_init($self); index_mm($self, $mime); }; eval { - if (need_xapian($self)) { - add_xapian($self, $mime, $num, $oid, $mids, $mid0); + # order matters, overview stores every possible piece of + # data in doc_data (deflated). Xapian only stores a subset + # of the fields which exist in over.sqlite3. We may stop + # storing doc_data in Xapian sometime after we get multi-inbox + # search working. + if (my $over = $self->{over}) { # v1 only + $over->add_overview($mime, $smsg, $self); } - if (my $over = $self->{over}) { - $over->add_overview($mime, $bytes, $num, $oid, $mid0, - $self); + if (need_xapian($self)) { + add_xapian($self, $mime, $smsg, $mids); } }; @@ -390,7 +393,7 @@ sub add_message { warn "failed to index message <".join('> <',@$mids).">: $@\n"; return undef; } - $num; + $smsg->{num}; } # returns begin and end PostingIterator @@ -530,9 +533,10 @@ sub unindex_mm { } sub index_both { - my ($self, $mime, $bytes, $blob) = @_; + my ($self, $mime, $smsg) = @_; my $num = index_mm($self, $mime); - add_message($self, $mime, $bytes, $num, $blob); + $smsg->{num} = $num; + add_message($self, $mime, $smsg); } sub unindex_both { @@ -595,8 +599,11 @@ sub read_log { next; } my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $smsg = bless {}, 'PublicInbox::Smsg'; batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); - $add_cb->($self, $mime, $bytes, $blob); + $smsg->{blob} = $blob; + $smsg->{bytes} = $bytes; + $add_cb->($self, $mime, $smsg); } elsif ($line =~ /$delmsg/o) { my $blob = $1; $D{$blob} = 1; diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 74c624a4..d29e6090 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -76,7 +76,13 @@ sub shard_worker_loop ($$$$$) { $artnum = int($artnum); $self->{autime} = $autime; $self->{cotime} = $cotime; - $self->add_message($mime, $n, $artnum, $oid, $mid0); + my $smsg = bless { + bytes => $len, + num => $artnum, + blob => $oid, + mid => $mid0, + }, 'PublicInbox::Smsg'; + $self->add_message($mime, $smsg); } } $self->worker_done; @@ -95,7 +101,13 @@ sub index_raw { $self->begin_txn_lazy; $self->{autime} = $at; $self->{cotime} = $ct; - $self->add_message($mime, $bytes, $artnum, $oid, $mid0); + my $smsg = bless { + bytes => $bytes, + num => $artnum, + blob => $oid, + mid => $mid0, + }, 'PublicInbox::Smsg'; + $self->add_message($mime, $smsg); } } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index d39a6f89..34dd139b 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -150,7 +150,13 @@ sub add { # indexes a message, returns true if checkpointing is needed sub do_idx ($$$$$$$) { my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_; - $self->{over}->add_overview($mime, $len, $num, $oid, $mid0, $self); + my $smsg = bless { + bytes => $len, + num => $num, + blob => $oid, + mid => $mid0, + }, 'PublicInbox::Smsg'; + $self->{over}->add_overview($mime, $smsg, $self); my $idx = idx_shard($self, $num % $self->{shards}); $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime, $self); my $n = $self->{transact_bytes} += $len; diff --git a/t/search-thr-index.t b/t/search-thr-index.t index 6a5fd919..f073304a 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -9,6 +9,7 @@ use PublicInbox::MID qw(mids); use Email::MIME; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; +require PublicInbox::Smsg; require PublicInbox::Inbox; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; @@ -45,7 +46,13 @@ foreach (reverse split(/\n\n/, $data)) { $mime->header_set('To' => 'git@vger.kernel.org'); my $bytes = bytes::length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; - my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored', $mid); + my $smsg = bless { + bytes => $bytes, + num => ++$num, + mid => $mid, + blob => '', + }, 'PublicInbox::Smsg'; + my $doc_id = $rw->add_message($mime, $smsg); push @mids, $mid; ok($doc_id, 'message added: '. $mid); } @@ -86,7 +93,13 @@ SELECT tid FROM over WHERE num = ? LIMIT 1 my $bytes = bytes::length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; - my $doc_id = $rw->add_message($mime, $bytes, $num, 'ignored', $mid); + my $smsg = bless { + bytes => $bytes, + num => $num, + mid => $mid, + blob => '', + }, 'PublicInbox::Smsg'; + my $doc_id = $rw->add_message($mime, $smsg); ok($doc_id, 'message reindexed'. $mid); is($doc_id, $num, "article number unchanged: $num"); |