about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-03 20:21:05 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-03 20:24:43 +0000
commit031dcde21cd8dab5494d9715ba50d6a539e3fb42 (patch)
tree172df048191ee4cb07388f8d0b2a1e7246552667 /lib/PublicInbox
parentb212aee7e13c460b73a3632458ae96c39d9eac97 (diff)
downloadpublic-inbox-031dcde21cd8dab5494d9715ba50d6a539e3fb42.tar.gz
We can't rely on header order for Message-ID after all
since we fall back to existing MIDs if they exist and
are unseen.  This lets us use SearchMsg->mid to get the
MID we associated with the NNTP article number to ensure
all NNTP article lookups roundtrip correctly.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/SearchIdx.pm6
-rw-r--r--lib/PublicInbox/SearchIdxPart.pm8
-rw-r--r--lib/PublicInbox/SearchMsg.pm9
-rw-r--r--lib/PublicInbox/V2Writable.pm34
4 files changed, 33 insertions, 24 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index a70e1ebf..71469a95 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -302,7 +302,8 @@ sub index_body ($$$) {
 }
 
 sub add_message {
-        my ($self, $mime, $bytes, $num, $blob) = @_; # mime = Email::MIME object
+        # mime = Email::MIME object
+        my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
         my $doc_id;
         my $mids = mids($mime->header_obj);
         my $skel = $self->{skeleton};
@@ -370,7 +371,8 @@ sub add_message {
 
                 # populates smsg->references for smsg->to_doc_data
                 my $refs = parse_references($smsg);
-                my $data = $smsg->to_doc_data($blob);
+                $mid0 = $mids->[0] unless defined $mid0;
+                my $data = $smsg->to_doc_data($oid, $mid0);
                 foreach my $mid (@$mids) {
                         $tg->index_text($mid, 1, 'XM');
                 }
diff --git a/lib/PublicInbox/SearchIdxPart.pm b/lib/PublicInbox/SearchIdxPart.pm
index 2f577ecf..6d8cb2a7 100644
--- a/lib/PublicInbox/SearchIdxPart.pm
+++ b/lib/PublicInbox/SearchIdxPart.pm
@@ -51,7 +51,7 @@ sub partition_worker_loop ($$$) {
                         $xdb = $txn = undef;
                 } else {
                         chomp $line;
-                        my ($len, $artnum, $object_id) = split(/ /, $line);
+                        my ($len, $artnum, $oid, $mid0) = split(/ /, $line);
                         $xdb ||= $self->_xdb_acquire;
                         if (!$txn) {
                                 $xdb->begin_transaction;
@@ -61,7 +61,7 @@ sub partition_worker_loop ($$$) {
                         $n == $len or die "short read: $n != $len\n";
                         my $mime = PublicInbox::MIME->new(\$msg);
                         $artnum = int($artnum);
-                        $self->add_message($mime, $n, $artnum, $object_id);
+                        $self->add_message($mime, $n, $artnum, $oid, $mid0);
                 }
         }
         warn "$$ still in transaction\n" if $txn;
@@ -70,9 +70,9 @@ sub partition_worker_loop ($$$) {
 
 # called by V2Writable
 sub index_raw {
-        my ($self, $len, $msgref, $artnum, $object_id) = @_;
+        my ($self, $len, $msgref, $artnum, $object_id, $mid0) = @_;
         my $w = $self->{w};
-        print $w "$len $artnum $object_id\n", $$msgref or die
+        print $w "$len $artnum $object_id $mid0\n", $$msgref or die
                 "failed to write partition $!\n";
         $w->flush or die "failed to flush: $!\n";
 }
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 93e6fd8b..a62a6490 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -31,13 +31,14 @@ sub get_val ($$) {
 
 sub load_from_data ($$) {
         my ($self) = $_[0]; # data = $_[1]
-        my ($subj, $from, $refs, $to, $cc, $blob) = split(/\n/, $_[1]);
+        my ($subj, $from, $refs, $to, $cc, $blob, $mid0) = split(/\n/, $_[1]);
         $self->{subject} = $subj;
         $self->{from} = $from;
         $self->{references} = $refs;
         $self->{to} = $to;
         $self->{cc} = $cc;
         $self->{blob} = $blob;
+        $self->{mid} = $mid0;
 }
 
 sub load_expand {
@@ -120,11 +121,11 @@ sub ts {
 }
 
 sub to_doc_data {
-        my ($self, $blob) = @_;
+        my ($self, $oid, $mid0) = @_;
         my @rows = ($self->subject, $self->from, $self->references,
                         $self->to, $self->cc);
-        push @rows, $blob if defined $blob;
-        join("\n", @rows);
+        $oid = '' unless defined $oid;
+        join("\n", @rows, $oid, $mid0);
 }
 
 sub references {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index c73d859b..caabc8e4 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -62,8 +62,10 @@ sub add {
         # leaking FDs to it...
         $self->idx_init;
 
-        my $num = num_for($self, $mime);
+        my $mid0;
+        my $num = num_for($self, $mime, \$mid0);
         defined $num or return; # duplicate
+        defined $mid0 or die "BUG: $mid0 undefined\n";
         my $im = $self->importer;
         my $cmt = $im->add($mime);
         $cmt = $im->get_mark($cmt);
@@ -73,7 +75,7 @@ sub add {
         my $nparts = $self->{partitions};
         my $part = $num % $nparts;
         my $idx = $self->idx_part($part);
-        $idx->index_raw($len, $msgref, $num, $oid);
+        $idx->index_raw($len, $msgref, $num, $oid, $mid0);
         my $n = $self->{transact_bytes} += $len;
         if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
                 $self->checkpoint;
@@ -83,12 +85,15 @@ sub add {
 }
 
 sub num_for {
-        my ($self, $mime) = @_;
+        my ($self, $mime, $mid0) = @_;
         my $mids = mids($mime->header_obj);
         if (@$mids) {
                 my $mid = $mids->[0];
                 my $num = $self->{skel}->{mm}->mid_insert($mid);
-                return $num if defined($num); # common case
+                if (defined $num) { # common case
+                        $$mid0 = $mid;
+                        return $num;
+                };
 
                 # crap, Message-ID is already known, hope somebody just resent:
                 $self->done; # write barrier, clears $self->{skel}
@@ -111,38 +116,39 @@ sub num_for {
                         $num = $self->{skel}->{mm}->mid_insert($m);
                         if (defined $num) {
                                 warn "alternative <$m> for <$mid> found\n";
+                                $$mid0 = $m;
                                 return $num;
                         }
                 }
         }
         # none of the existing Message-IDs are good, generate a new one:
-        num_for_harder($self, $mime);
+        num_for_harder($self, $mime, $mid0);
 }
 
 sub num_for_harder {
-        my ($self, $mime) = @_;
+        my ($self, $mime, $mid0) = @_;
 
         my $hdr = $mime->header_obj;
         my $dig = content_digest($mime);
-        my $mid = $dig->clone->hexdigest . '@localhost';
-        my $num = $self->{skel}->{mm}->mid_insert($mid);
+        $$mid0 = $dig->clone->hexdigest . '@localhost';
+        my $num = $self->{skel}->{mm}->mid_insert($$mid0);
         unless (defined $num) {
                 # it's hard to spoof the last Received: header
                 my @recvd = $hdr->header_raw('Received');
                 $dig->add("Received: $_") foreach (@recvd);
-                $mid = $dig->clone->hexdigest . '@localhost';
-                $num = $self->{skel}->{mm}->mid_insert($mid);
+                $$mid0 = $dig->clone->hexdigest . '@localhost';
+                $num = $self->{skel}->{mm}->mid_insert($$mid0);
 
                 # fall back to a random Message-ID and give up determinism:
                 until (defined($num)) {
                         $dig->add(rand);
-                        $mid = $dig->clone->hexdigest . '@localhost';
-                        warn "using random Message-ID <$mid> as fallback\n";
-                        $num = $self->{skel}->{mm}->mid_insert($mid);
+                        $$mid0 = $dig->clone->hexdigest . '@localhost';
+                        warn "using random Message-ID <$$mid0> as fallback\n";
+                        $num = $self->{skel}->{mm}->mid_insert($$mid0);
                 }
         }
         my @cur = $hdr->header_raw('Message-Id');
-        $hdr->header_set('Message-Id', "<$mid>", @cur);
+        $hdr->header_set('Message-Id', "<$$mid0>", @cur);
         $num;
 }