about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-23 01:54:16 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-23 01:58:40 +0000
commit41654a8cd9372c0640c4ca5339e5881927965e41 (patch)
tree60f475bc00157cb124de54bfe63622e34219cb05
parentf6285ab9d73a4eae490dda325096e61eadc415cd (diff)
downloadpublic-inbox-41654a8cd9372c0640c4ca5339e5881927965e41.tar.gz
Since v2 supports duplicate messages, we need to support
looking up different messages with the same Message-Id.
Fortunately, our "raw" endpoint has always been mboxrd,
so users won't need to change their parsing tools.
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/Mbox.pm71
-rw-r--r--lib/PublicInbox/Search.pm1
-rw-r--r--lib/PublicInbox/WWW.pm3
-rw-r--r--t/psgi_v2.t110
5 files changed, 175 insertions, 11 deletions
diff --git a/MANIFEST b/MANIFEST
index 0f889959..8b2b10bd 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -171,6 +171,7 @@ t/psgi_attach.t
 t/psgi_mount.t
 t/psgi_search.t
 t/psgi_text.t
+t/psgi_v2.t
 t/qspawn.t
 t/reply.t
 t/search-thr-index.t
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 84cc3845..79e09a70 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -26,12 +26,68 @@ sub subject_fn ($) {
         $fn eq '' ? 'no-subject' : $fn;
 }
 
-sub emit1 {
-        my ($ctx, $msg) = @_;
-        $msg = Email::Simple->new($msg);
-        my $fn = subject_fn($msg);
+sub smsg_for ($$$) {
+        my ($head, $db, $mid) = @_;
+        my $doc_id = $head->get_docid;
+        my $doc = $db->get_document($doc_id);
+        PublicInbox::SearchMsg->wrap($doc, $mid)->load_expand;
+}
+
+sub mb_stream {
+        my ($more) = @_;
+        bless $more, 'PublicInbox::Mbox';
+}
+
+# called by PSGI server as body response
+sub getline {
+        my ($more) = @_; # self
+        my ($ctx, $head, $tail, $db, $cur) = @$more;
+        if ($cur) {
+                pop @$more;
+                return msg_str($ctx, $cur);
+        }
+        for (; !defined($cur) && $head != $tail; $head++) {
+                my $smsg = smsg_for($head, $db, $ctx->{mid});
+                next if $smsg->type ne 'mail';
+                my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+                $cur = Email::Simple->new($mref);
+                $cur = msg_str($ctx, $cur);
+        }
+        $more->[1] = $head;
+        $cur;
+}
+
+sub close {} # noop
+
+sub emit_raw {
+        my ($ctx) = @_;
+        my $mid = $ctx->{mid};
+        my $ibx = $ctx->{-inbox};
+        my $first;
+        my $more;
+        my ($head, $tail, $db);
+        my %seen;
+        if (my $srch = $ibx->search) {
+                $srch->retry_reopen(sub {
+                        ($head, $tail, $db) = $srch->each_smsg_by_mid($mid);
+                        for (; !defined($first) && $head != $tail; $head++) {
+                                my $smsg = smsg_for($head, $db, $mid);
+                                next if $smsg->type ne 'mail';
+                                my $mref = $ibx->msg_by_smsg($smsg) or next;
+                                $first = Email::Simple->new($mref);
+                        }
+                        if ($head != $tail) {
+                                $more = [ $ctx, $head, $tail, $db, $first ];
+                        }
+                });
+        } else {
+                my $mref = $ibx->msg_by_mid($mid) or return;
+                $first = Email::Simple->new($mref);
+        }
+        return unless defined $first;
+        my $fn = subject_fn($first);
         my @hdr = ('Content-Type');
-        if ($ctx->{-inbox}->{obfuscate}) {
+        if ($ibx->{obfuscate}) {
                 # obfuscation is stupid, but maybe scrapers are, too...
                 push @hdr, 'application/mbox';
                 $fn .= '.mbox';
@@ -40,10 +96,7 @@ sub emit1 {
                 $fn .= '.txt';
         }
         push @hdr, 'Content-Disposition', "inline; filename=$fn";
-
-        # single message should be easily renderable in browsers,
-        # unless obfuscation is enabled :<
-        [ 200, \@hdr, [ msg_str($ctx, $msg) ] ]
+        [ 200, \@hdr, $more ? mb_stream($more) : [ msg_str($ctx, $first) ] ];
 }
 
 sub msg_str {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index f08b9870..24600ee7 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -417,6 +417,7 @@ sub each_smsg_by_mid {
         my $term = 'Q' . $mid;
         my $head = $db->postlist_begin($term);
         my $tail = $db->postlist_end($term);
+        return ($head, $tail, $db) if wantarray;
         for (; $head->nequal($tail); $head->inc) {
                 my $doc_id = $head->get_docid;
                 my $doc = $db->get_document($doc_id);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e95fba08..f86363c6 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -218,9 +218,8 @@ sub mid2blob {
 # /$INBOX/$MESSAGE_ID/raw                    -> raw mbox
 sub get_mid_txt {
         my ($ctx) = @_;
-        my $x = mid2blob($ctx) or return r404($ctx);
         require PublicInbox::Mbox;
-        PublicInbox::Mbox::emit1($ctx, $x);
+        PublicInbox::Mbox::emit_raw($ctx) || r404($ctx);
 }
 
 # /$INBOX/$MESSAGE_ID/                   -> HTML content (short quotes)
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
new file mode 100644
index 00000000..5d089dbe
--- /dev/null
+++ b/t/psgi_v2.t
@@ -0,0 +1,110 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+                URI::Escape Plack::Builder);
+foreach my $mod (@mods) {
+        eval "require $mod";
+        plan skip_all => "$mod missing for psgi_v2_dupes.t" if $@;
+}
+use_ok($_) for @mods;
+use_ok 'PublicInbox::V2Writable';
+my $mainrepo = tempdir('pi-v2_dupes-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $ibx = {
+        mainrepo => $mainrepo,
+        name => 'test-v2writable',
+        version => 2,
+        -primary_address => 'test@example.com',
+};
+$ibx = PublicInbox::Inbox->new($ibx);
+my $new_mid;
+
+my $im = PublicInbox::V2Writable->new($ibx, 1);
+$im->{parallel} = 0;
+
+my $mime = PublicInbox::MIME->create(
+        header => [
+                From => 'a@example.com',
+                To => 'test@example.com',
+                Subject => 'this is a subject',
+                'Message-ID' => '<a-mid@b>',
+                Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
+        ],
+        body => "hello world\n",
+);
+ok($im->add($mime), 'added one message');
+$mime->body_set("hello world!\n");
+
+my @warn;
+local $SIG{__WARN__} = sub { push @warn, @_ };
+ok($im->add($mime), 'added duplicate-but-different message');
+is(scalar(@warn), 1, 'got one warning');
+my @mids = $mime->header_obj->header_raw('Message-Id');
+$new_mid = PublicInbox::MID::mid_clean($mids[0]);
+$im->done;
+
+my $cfgpfx = "publicinbox.v2test";
+my %cfg = (
+        "$cfgpfx.address" => $ibx->{-primary_address},
+        "$cfgpfx.mainrepo" => $mainrepo,
+);
+
+my $config = PublicInbox::Config->new({ %cfg });
+my $www = PublicInbox::WWW->new($config);
+my ($res, $raw, @from_);
+test_psgi(sub { $www->call(@_) }, sub {
+        my ($cb) = @_;
+        $res = $cb->(GET('/v2test/a-mid@b/raw'));
+        $raw = $res->content;
+        like($raw, qr/^hello world$/m, 'got first message');
+        like($raw, qr/^hello world!$/m, 'got second message');
+        @from_ = ($raw =~ m/^From /mg);
+        is(scalar(@from_), 2, 'two From_ lines');
+
+        $res = $cb->(GET("/v2test/$new_mid/raw"));
+        $raw = $res->content;
+        like($raw, qr/^hello world!$/m, 'second message with new Message-Id');
+        @from_ = ($raw =~ m/^From /mg);
+        is(scalar(@from_), 1, 'only one From_ line');
+});
+
+$mime->header_set('Message-Id', 'a-mid@b');
+$mime->body_set("hello ghosts\n");
+ok($im->add($mime), 'added 3rd duplicate-but-different message');
+is(scalar(@warn), 2, 'got another warning');
+like($warn[0], qr/mismatched/, 'warned about mismatched messages');
+is($warn[0], $warn[1], 'both warnings are the same');
+
+@mids = $mime->header_obj->header_raw('Message-Id');
+my $third = PublicInbox::MID::mid_clean($mids[0]);
+$im->done;
+
+# need to reload...
+$config = PublicInbox::Config->new({ %cfg });
+$www = PublicInbox::WWW->new($config);
+test_psgi(sub { $www->call(@_) }, sub {
+        my ($cb) = @_;
+        $res = $cb->(GET("/v2test/$third/raw"));
+        $raw = $res->content;
+        like($raw, qr/^hello ghosts$/m, 'got third message');
+        @from_ = ($raw =~ m/^From /mg);
+        is(scalar(@from_), 1, 'one From_ line');
+
+        $res = $cb->(GET('/v2test/a-mid@b/raw'));
+        $raw = $res->content;
+        like($raw, qr/^hello world$/m, 'got first message');
+        like($raw, qr/^hello world!$/m, 'got second message');
+        like($raw, qr/^hello ghosts$/m, 'got third message');
+        @from_ = ($raw =~ m/^From /mg);
+        is(scalar(@from_), 3, 'three From_ lines');
+});
+
+done_testing();
+
+1;