diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | lib/PublicInbox/LeiInput.pm | 76 | ||||
-rw-r--r-- | t/lei-import-http.t | 43 |
3 files changed, 120 insertions, 0 deletions
@@ -394,6 +394,7 @@ t/kqnotify.t t/lei-convert.t t/lei-daemon.t t/lei-externals.t +t/lei-import-http.t t/lei-import-imap.t t/lei-import-maildir.t t/lei-import-nntp.t diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index d11d23d4..785e607d 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -6,6 +6,7 @@ package PublicInbox::LeiInput; use strict; use v5.10.1; use PublicInbox::DS; +use PublicInbox::Spawn qw(which popen_rd); # JMAP RFC 8621 4.1.1 # https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml @@ -77,6 +78,32 @@ error reading $name: $! } } +# handles mboxrd endpoints described in Documentation/design_notes.txt +sub handle_http_input ($$@) { + my ($self, $url, @args) = @_; + my $lei = $self->{lei} or die 'BUG: {lei} missing'; + my $curl_opt = delete $self->{"-curl-$url"} or + die("BUG: $url curl options not prepared"); + my $uri = pop @$curl_opt; + my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return; + push @$curl, '-s', @$curl_opt; + my $cmd = $curl->for_uri($lei, $uri); + $lei->qerr("# $cmd"); + my $rdr = { 2 => $lei->{2}, pgid => 0 }; + my ($fh, $pid) = popen_rd($cmd, undef, $rdr); + grep(/\A--compressed\z/, @$curl) or + $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); + eval { + PublicInbox::MboxReader->mboxrd($fh, + $self->can('input_mbox_cb'), + $self, @args); + }; + my $err = $@; + waitpid($pid, 0); + $? || $err and + $lei->child_error($? || 1, "@$cmd failed".$err ? " $err" : ''); +} + sub input_path_url { my ($self, $input, @args) = @_; my $lei = $self->{lei}; @@ -92,6 +119,9 @@ sub input_path_url { $self->can('input_net_cb'), $self, @args); return; + } elsif ($input =~ m!\Ahttps?://!i) { + handle_http_input($self, $input, @args); + return; } if ($input =~ s!\A([a-z0-9]+):!!i) { $ifmt = lc($1); @@ -129,6 +159,50 @@ EOM } } +sub bad_http ($$;$) { + my ($lei, $url, $alt) = @_; + my $x = $alt ? "did you mean <$alt>?" : 'download and import manually'; + $lei->fail("E: <$url> not recognized, $x"); +} + +sub prepare_http_input ($$$) { + my ($self, $lei, $url) = @_; + require URI; + require PublicInbox::MboxReader; + require PublicInbox::LeiCurl; + require IO::Uncompress::Gunzip; + $self->{curl} //= which('curl') or + return $lei->fail("curl missing for <$url>"); + my $uri = URI->new($url); + my $path = $uri->path; + my %qf = $uri->query_form; + my @curl_opt; + if ($path =~ m!/(?:t\.mbox\.gz|all\.mbox\.gz)\z!) { + # OK + } elsif ($path =~ m!/raw\z!) { + push @curl_opt, '--compressed'; + # convert search query to mboxrd request since they require POST + # this is only intended for PublicInbox::WWW, and will false-positive + # on many other search engines... oh well + } elsif (defined $qf{'q'}) { + $qf{x} = 'm'; + $uri->query_form(\%qf); + push @curl_opt, '-d', ''; + $$uri ne $url and $lei->qerr(<<""); +# <$url> rewritten to <$$uri> with HTTP POST + + # try to provide hints for /$INBOX/$MSGID/T/ and /$INBOX/ + } elsif ($path =~ s!/[tT]/\z!/t.mbox.gz! || + $path =~ s!/t\.atom\z!/t.mbox.gz! || + $path =~ s!/([^/]+\@[^/]+)/\z!/$1/raw!) { + $uri->path($path); + return bad_http($lei, $url, $$uri); + } else { + return bad_http($lei, $url); + } + $self->{"-curl-$url"} = [ @curl_opt, $uri ]; # for handle_http_input +} + sub prepare_inputs { # returns undef on error my ($self, $lei, $inputs) = @_; my $in_fmt = $lei->{opt}->{'in-format'}; @@ -156,6 +230,8 @@ sub prepare_inputs { # returns undef on error push @{$sync->{no}}, $input; } } + } elsif ($input_path =~ m!\Ahttps?://!i) { + prepare_http_input($self, $lei, $input_path) or return; } elsif ($input_path =~ s/\A([a-z0-9]+)://is) { my $ifmt = lc $1; if (($in_fmt // $ifmt) ne $ifmt) { diff --git a/t/lei-import-http.t b/t/lei-import-http.t new file mode 100644 index 00000000..35cbf369 --- /dev/null +++ b/t/lei-import-http.t @@ -0,0 +1,43 @@ +#!perl -w +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; use v5.10.1; use PublicInbox::TestCommon; +use PublicInbox::Spawn qw(which); +require_mods(qw(lei -httpd)); +which('curl') or plan skip_all => "curl required for $0"; +my ($ro_home, $cfg_path) = setup_public_inboxes; +my ($tmpdir, $for_destroy) = tmpdir; +my $sock = tcp_server; +my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; +my $env = { PI_CONFIG => $cfg_path }; +my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-httpd $?"); +my $host_port = tcp_host_port($sock); +undef $sock; +test_lei({ tmpdir => $tmpdir }, sub { + my $url = "http://$host_port/t2"; + for my $p (qw(bogus@x/t.mbox.gz bogus@x/raw ?q=noresultever)) { + ok(!lei('import', "$url/$p"), "/$p fails properly"); + } + for my $p (qw(/ /T/ /t/ /t.atom)) { + ok(!lei('import', "$url/m\@example$p"), "/$p fails"); + like($lei_err, qr/did you mean/, "gave hint for $p"); + } + lei_ok 'import', "$url/testmessage\@example.com/raw"; + lei_ok 'q', 'm:testmessage@example.com'; + my $res = json_utf8->decode($lei_out); + is($res->[0]->{'m'}, 'testmessage@example.com', 'imported raw') + or diag explain($res); + + lei_ok 'import', "$url/qp\@example.com/t.mbox.gz"; + lei_ok 'q', 'm:qp@example.com'; + $res = json_utf8->decode($lei_out); + is($res->[0]->{'m'}, 'qp@example.com', 'imported t.mbox.gz') + or diag explain($res); + + lei_ok 'import', "$url/?q=s:boolean"; + lei_ok 'q', 'm:20180720072141.GA15957@example'; + $res = json_utf8->decode($lei_out); + is($res->[0]->{'m'}, '20180720072141.GA15957@example', + 'imported search result') or diag explain($res); +}); +done_testing; |