about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/LeiInput.pm76
-rw-r--r--t/lei-import-http.t43
3 files changed, 120 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index 79d393c5..ce824fcf 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -394,6 +394,7 @@ t/kqnotify.t
 t/lei-convert.t
 t/lei-daemon.t
 t/lei-externals.t
+t/lei-import-http.t
 t/lei-import-imap.t
 t/lei-import-maildir.t
 t/lei-import-nntp.t
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index d11d23d4..785e607d 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -6,6 +6,7 @@ package PublicInbox::LeiInput;
 use strict;
 use v5.10.1;
 use PublicInbox::DS;
+use PublicInbox::Spawn qw(which popen_rd);
 
 # JMAP RFC 8621 4.1.1
 # https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
@@ -77,6 +78,32 @@ error reading $name: $!
         }
 }
 
+# handles mboxrd endpoints described in Documentation/design_notes.txt
+sub handle_http_input ($$@) {
+        my ($self, $url, @args) = @_;
+        my $lei = $self->{lei} or die 'BUG: {lei} missing';
+        my $curl_opt = delete $self->{"-curl-$url"} or
+                                die("BUG: $url curl options not prepared");
+        my $uri = pop @$curl_opt;
+        my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
+        push @$curl, '-s', @$curl_opt;
+        my $cmd = $curl->for_uri($lei, $uri);
+        $lei->qerr("# $cmd");
+        my $rdr = { 2 => $lei->{2}, pgid => 0 };
+        my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
+        grep(/\A--compressed\z/, @$curl) or
+                $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
+        eval {
+                PublicInbox::MboxReader->mboxrd($fh,
+                                                $self->can('input_mbox_cb'),
+                                                $self, @args);
+        };
+        my $err = $@;
+        waitpid($pid, 0);
+        $? || $err and
+                $lei->child_error($? || 1, "@$cmd failed".$err ? " $err" : '');
+}
+
 sub input_path_url {
         my ($self, $input, @args) = @_;
         my $lei = $self->{lei};
@@ -92,6 +119,9 @@ sub input_path_url {
                                                 $self->can('input_net_cb'),
                                         $self, @args);
                 return;
+        } elsif ($input =~ m!\Ahttps?://!i) {
+                handle_http_input($self, $input, @args);
+                return;
         }
         if ($input =~ s!\A([a-z0-9]+):!!i) {
                 $ifmt = lc($1);
@@ -129,6 +159,50 @@ EOM
         }
 }
 
+sub bad_http ($$;$) {
+        my ($lei, $url, $alt) = @_;
+        my $x = $alt ? "did you mean <$alt>?" : 'download and import manually';
+        $lei->fail("E: <$url> not recognized, $x");
+}
+
+sub prepare_http_input ($$$) {
+        my ($self, $lei, $url) = @_;
+        require URI;
+        require PublicInbox::MboxReader;
+        require PublicInbox::LeiCurl;
+        require IO::Uncompress::Gunzip;
+        $self->{curl} //= which('curl') or
+                                return $lei->fail("curl missing for <$url>");
+        my $uri = URI->new($url);
+        my $path = $uri->path;
+        my %qf = $uri->query_form;
+        my @curl_opt;
+        if ($path =~ m!/(?:t\.mbox\.gz|all\.mbox\.gz)\z!) {
+                # OK
+        } elsif ($path =~ m!/raw\z!) {
+                push @curl_opt, '--compressed';
+        # convert search query to mboxrd request since they require POST
+        # this is only intended for PublicInbox::WWW, and will false-positive
+        # on many other search engines... oh well
+        } elsif (defined $qf{'q'}) {
+                $qf{x} = 'm';
+                $uri->query_form(\%qf);
+                push @curl_opt, '-d', '';
+                $$uri ne $url and $lei->qerr(<<"");
+# <$url> rewritten to <$$uri> with HTTP POST
+
+        # try to provide hints for /$INBOX/$MSGID/T/ and /$INBOX/
+        } elsif ($path =~ s!/[tT]/\z!/t.mbox.gz! ||
+                        $path =~ s!/t\.atom\z!/t.mbox.gz! ||
+                        $path =~ s!/([^/]+\@[^/]+)/\z!/$1/raw!) {
+                $uri->path($path);
+                return bad_http($lei, $url, $$uri);
+        } else {
+                return bad_http($lei, $url);
+        }
+        $self->{"-curl-$url"} = [ @curl_opt, $uri ]; # for handle_http_input
+}
+
 sub prepare_inputs { # returns undef on error
         my ($self, $lei, $inputs) = @_;
         my $in_fmt = $lei->{opt}->{'in-format'};
@@ -156,6 +230,8 @@ sub prepare_inputs { # returns undef on error
                                         push @{$sync->{no}}, $input;
                                 }
                         }
+                } elsif ($input_path =~ m!\Ahttps?://!i) {
+                        prepare_http_input($self, $lei, $input_path) or return;
                 } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
                         my $ifmt = lc $1;
                         if (($in_fmt // $ifmt) ne $ifmt) {
diff --git a/t/lei-import-http.t b/t/lei-import-http.t
new file mode 100644
index 00000000..35cbf369
--- /dev/null
+++ b/t/lei-import-http.t
@@ -0,0 +1,43 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+use PublicInbox::Spawn qw(which);
+require_mods(qw(lei -httpd));
+which('curl') or plan skip_all => "curl required for $0";
+my ($ro_home, $cfg_path) = setup_public_inboxes;
+my ($tmpdir, $for_destroy) = tmpdir;
+my $sock = tcp_server;
+my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ];
+my $env = { PI_CONFIG => $cfg_path };
+my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-httpd $?");
+my $host_port = tcp_host_port($sock);
+undef $sock;
+test_lei({ tmpdir => $tmpdir }, sub {
+        my $url = "http://$host_port/t2";
+        for my $p (qw(bogus@x/t.mbox.gz bogus@x/raw ?q=noresultever)) {
+                ok(!lei('import', "$url/$p"), "/$p fails properly");
+        }
+        for my $p (qw(/ /T/ /t/ /t.atom)) {
+                ok(!lei('import', "$url/m\@example$p"), "/$p fails");
+                like($lei_err, qr/did you mean/, "gave hint for $p");
+        }
+        lei_ok 'import', "$url/testmessage\@example.com/raw";
+        lei_ok 'q', 'm:testmessage@example.com';
+        my $res = json_utf8->decode($lei_out);
+        is($res->[0]->{'m'}, 'testmessage@example.com', 'imported raw')
+                or diag explain($res);
+
+        lei_ok 'import', "$url/qp\@example.com/t.mbox.gz";
+        lei_ok 'q', 'm:qp@example.com';
+        $res = json_utf8->decode($lei_out);
+        is($res->[0]->{'m'}, 'qp@example.com', 'imported t.mbox.gz')
+                or diag explain($res);
+
+        lei_ok 'import', "$url/?q=s:boolean";
+        lei_ok 'q', 'm:20180720072141.GA15957@example';
+        $res = json_utf8->decode($lei_out);
+        is($res->[0]->{'m'}, '20180720072141.GA15957@example',
+                        'imported search result') or diag explain($res);
+});
+done_testing;