about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-03-20 08:18:18 +0000
committerEric Wong <e@yhbt.net>2020-03-22 09:00:23 +0000
commit1218a4126807951a0f47286338dc04d7f197bb78 (patch)
tree5f97519d19a5fdfb92feecd8e64db43c036432d5 /lib
parentd34a4b80724e3f77a507ad08b91039427b0e09d5 (diff)
downloadpublic-inbox-1218a4126807951a0f47286338dc04d7f197bb78.tar.gz
We can pass blessed PublicInbox::Smsg objects to internal
indexing APIs instead of having long parameter lists in some
places.  The end goal is to avoid parsing redundant information
each step of the way and hopefully make things more
understandable.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/OverIdx.pm14
-rw-r--r--lib/PublicInbox/SearchIdx.pm45
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm16
-rw-r--r--lib/PublicInbox/V2Writable.pm8
4 files changed, 51 insertions, 32 deletions
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index f49dfa00..2d71956d 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -245,15 +245,9 @@ sub subject_path ($) {
 }
 
 sub add_overview {
-        my ($self, $mime, $bytes, $num, $oid, $mid0, $times) = @_;
-        my $lines = $mime->body_raw =~ tr!\n!\n!;
-        my $smsg = bless {
-                mime => $mime,
-                mid => $mid0,
-                bytes => $bytes,
-                lines => $lines,
-                blob => $oid,
-        }, 'PublicInbox::Smsg';
+        my ($self, $mime, $smsg, $times) = @_;
+        $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!;
+        $smsg->{mime} = $mime; # XXX temporary?
         my $hdr = $mime->header_obj;
         my $mids = mids_for_index($hdr);
         my $refs = parse_references($smsg, $hdr, $mids);
@@ -268,7 +262,7 @@ sub add_overview {
         $dd = compress($dd);
         my $ds = msg_timestamp($hdr, $times->{autime});
         my $ts = msg_datestamp($hdr, $times->{cotime});
-        my $values = [ $ts, $ds, $num, $mids, $refs, $xpath, $dd ];
+        my $values = [ $ts, $ds, $smsg->{num}, $mids, $refs, $xpath, $dd ];
         add_over($self, $values);
 }
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 32be9c3f..5ca819c3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -306,9 +306,9 @@ sub index_xapian { # msg_iter callback
         index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
 }
 
-sub add_xapian ($$$$$$) {
-        my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
-        my $smsg = PublicInbox::Smsg->new($mime);
+sub add_xapian ($$$$) {
+        my ($self, $mime, $smsg, $mids) = @_;
+        $smsg->{mime} = $mime; # XXX dangerous
         my $hdr = $mime->header_obj;
         $smsg->{ds} = msg_datestamp($hdr, $self->{autime});
         $smsg->{ts} = msg_timestamp($hdr, $self->{cotime});
@@ -338,9 +338,7 @@ sub add_xapian ($$$$$$) {
                         index_text($self, join(' ', @long), 1, 'XM');
                 }
         }
-        $smsg->{to} = $smsg->{cc} = '';
-        $smsg->{blob} = $oid;
-        $smsg->{mid} = $mid0;
+        $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP
         PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids);
         my $data = $smsg->to_doc_data;
         $doc->set_data($data);
@@ -355,7 +353,7 @@ sub add_xapian ($$$$$$) {
                 }
         }
         $doc->add_boolean_term('Q' . $_) foreach @$mids;
-        $self->{xdb}->replace_document($num, $doc);
+        $self->{xdb}->replace_document($smsg->{num}, $doc);
 }
 
 sub _msgmap_init ($) {
@@ -369,20 +367,25 @@ sub _msgmap_init ($) {
 
 sub add_message {
         # mime = Email::MIME object
-        my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
+        my ($self, $mime, $smsg) = @_;
         my $mids = mids_for_index($mime->header_obj);
-        $mid0 //= $mids->[0]; # v1 compatibility
-        $num //= do { # v1
+        $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat
+        $smsg->{mid} //= $mids->[0]; # v1 compatibility
+        $smsg->{num} //= do { # v1
                 _msgmap_init($self);
                 index_mm($self, $mime);
         };
         eval {
-                if (need_xapian($self)) {
-                        add_xapian($self, $mime, $num, $oid, $mids, $mid0);
+                # order matters, overview stores every possible piece of
+                # data in doc_data (deflated).  Xapian only stores a subset
+                # of the fields which exist in over.sqlite3.  We may stop
+                # storing doc_data in Xapian sometime after we get multi-inbox
+                # search working.
+                if (my $over = $self->{over}) { # v1 only
+                        $over->add_overview($mime, $smsg, $self);
                 }
-                if (my $over = $self->{over}) {
-                        $over->add_overview($mime, $bytes, $num, $oid, $mid0,
-                                                $self);
+                if (need_xapian($self)) {
+                        add_xapian($self, $mime, $smsg, $mids);
                 }
         };
 
@@ -390,7 +393,7 @@ sub add_message {
                 warn "failed to index message <".join('> <',@$mids).">: $@\n";
                 return undef;
         }
-        $num;
+        $smsg->{num};
 }
 
 # returns begin and end PostingIterator
@@ -530,9 +533,10 @@ sub unindex_mm {
 }
 
 sub index_both {
-        my ($self, $mime, $bytes, $blob) = @_;
+        my ($self, $mime, $smsg) = @_;
         my $num = index_mm($self, $mime);
-        add_message($self, $mime, $bytes, $num, $blob);
+        $smsg->{num} = $num;
+        add_message($self, $mime, $smsg);
 }
 
 sub unindex_both {
@@ -595,8 +599,11 @@ sub read_log {
                                 next;
                         }
                         my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                        my $smsg = bless {}, 'PublicInbox::Smsg';
                         batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr);
-                        $add_cb->($self, $mime, $bytes, $blob);
+                        $smsg->{blob} = $blob;
+                        $smsg->{bytes} = $bytes;
+                        $add_cb->($self, $mime, $smsg);
                 } elsif ($line =~ /$delmsg/o) {
                         my $blob = $1;
                         $D{$blob} = 1;
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 74c624a4..d29e6090 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -76,7 +76,13 @@ sub shard_worker_loop ($$$$$) {
                         $artnum = int($artnum);
                         $self->{autime} = $autime;
                         $self->{cotime} = $cotime;
-                        $self->add_message($mime, $n, $artnum, $oid, $mid0);
+                        my $smsg = bless {
+                                bytes => $len,
+                                num => $artnum,
+                                blob => $oid,
+                                mid => $mid0,
+                        }, 'PublicInbox::Smsg';
+                        $self->add_message($mime, $smsg);
                 }
         }
         $self->worker_done;
@@ -95,7 +101,13 @@ sub index_raw {
                 $self->begin_txn_lazy;
                 $self->{autime} = $at;
                 $self->{cotime} = $ct;
-                $self->add_message($mime, $bytes, $artnum, $oid, $mid0);
+                my $smsg = bless {
+                        bytes => $bytes,
+                        num => $artnum,
+                        blob => $oid,
+                        mid => $mid0,
+                }, 'PublicInbox::Smsg';
+                $self->add_message($mime, $smsg);
         }
 }
 
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index d39a6f89..34dd139b 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -150,7 +150,13 @@ sub add {
 # indexes a message, returns true if checkpointing is needed
 sub do_idx ($$$$$$$) {
         my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
-        $self->{over}->add_overview($mime, $len, $num, $oid, $mid0, $self);
+        my $smsg = bless {
+                bytes => $len,
+                num => $num,
+                blob => $oid,
+                mid => $mid0,
+        }, 'PublicInbox::Smsg';
+        $self->{over}->add_overview($mime, $smsg, $self);
         my $idx = idx_shard($self, $num % $self->{shards});
         $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime, $self);
         my $n = $self->{transact_bytes} += $len;