about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiInput.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/LeiInput.pm')
-rw-r--r--lib/PublicInbox/LeiInput.pm76
1 files changed, 76 insertions, 0 deletions
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index d11d23d4..785e607d 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -6,6 +6,7 @@ package PublicInbox::LeiInput;
 use strict;
 use v5.10.1;
 use PublicInbox::DS;
+use PublicInbox::Spawn qw(which popen_rd);
 
 # JMAP RFC 8621 4.1.1
 # https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
@@ -77,6 +78,32 @@ error reading $name: $!
         }
 }
 
+# handles mboxrd endpoints described in Documentation/design_notes.txt
+sub handle_http_input ($$@) {
+        my ($self, $url, @args) = @_;
+        my $lei = $self->{lei} or die 'BUG: {lei} missing';
+        my $curl_opt = delete $self->{"-curl-$url"} or
+                                die("BUG: $url curl options not prepared");
+        my $uri = pop @$curl_opt;
+        my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
+        push @$curl, '-s', @$curl_opt;
+        my $cmd = $curl->for_uri($lei, $uri);
+        $lei->qerr("# $cmd");
+        my $rdr = { 2 => $lei->{2}, pgid => 0 };
+        my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
+        grep(/\A--compressed\z/, @$curl) or
+                $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
+        eval {
+                PublicInbox::MboxReader->mboxrd($fh,
+                                                $self->can('input_mbox_cb'),
+                                                $self, @args);
+        };
+        my $err = $@;
+        waitpid($pid, 0);
+        $? || $err and
+                $lei->child_error($? || 1, "@$cmd failed".$err ? " $err" : '');
+}
+
 sub input_path_url {
         my ($self, $input, @args) = @_;
         my $lei = $self->{lei};
@@ -92,6 +119,9 @@ sub input_path_url {
                                                 $self->can('input_net_cb'),
                                         $self, @args);
                 return;
+        } elsif ($input =~ m!\Ahttps?://!i) {
+                handle_http_input($self, $input, @args);
+                return;
         }
         if ($input =~ s!\A([a-z0-9]+):!!i) {
                 $ifmt = lc($1);
@@ -129,6 +159,50 @@ EOM
         }
 }
 
+sub bad_http ($$;$) {
+        my ($lei, $url, $alt) = @_;
+        my $x = $alt ? "did you mean <$alt>?" : 'download and import manually';
+        $lei->fail("E: <$url> not recognized, $x");
+}
+
+sub prepare_http_input ($$$) {
+        my ($self, $lei, $url) = @_;
+        require URI;
+        require PublicInbox::MboxReader;
+        require PublicInbox::LeiCurl;
+        require IO::Uncompress::Gunzip;
+        $self->{curl} //= which('curl') or
+                                return $lei->fail("curl missing for <$url>");
+        my $uri = URI->new($url);
+        my $path = $uri->path;
+        my %qf = $uri->query_form;
+        my @curl_opt;
+        if ($path =~ m!/(?:t\.mbox\.gz|all\.mbox\.gz)\z!) {
+                # OK
+        } elsif ($path =~ m!/raw\z!) {
+                push @curl_opt, '--compressed';
+        # convert search query to mboxrd request since they require POST
+        # this is only intended for PublicInbox::WWW, and will false-positive
+        # on many other search engines... oh well
+        } elsif (defined $qf{'q'}) {
+                $qf{x} = 'm';
+                $uri->query_form(\%qf);
+                push @curl_opt, '-d', '';
+                $$uri ne $url and $lei->qerr(<<"");
+# <$url> rewritten to <$$uri> with HTTP POST
+
+        # try to provide hints for /$INBOX/$MSGID/T/ and /$INBOX/
+        } elsif ($path =~ s!/[tT]/\z!/t.mbox.gz! ||
+                        $path =~ s!/t\.atom\z!/t.mbox.gz! ||
+                        $path =~ s!/([^/]+\@[^/]+)/\z!/$1/raw!) {
+                $uri->path($path);
+                return bad_http($lei, $url, $$uri);
+        } else {
+                return bad_http($lei, $url);
+        }
+        $self->{"-curl-$url"} = [ @curl_opt, $uri ]; # for handle_http_input
+}
+
 sub prepare_inputs { # returns undef on error
         my ($self, $lei, $inputs) = @_;
         my $in_fmt = $lei->{opt}->{'in-format'};
@@ -156,6 +230,8 @@ sub prepare_inputs { # returns undef on error
                                         push @{$sync->{no}}, $input;
                                 }
                         }
+                } elsif ($input_path =~ m!\Ahttps?://!i) {
+                        prepare_http_input($self, $lei, $input_path) or return;
                 } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
                         my $ifmt = lc $1;
                         if (($in_fmt // $ifmt) ne $ifmt) {