about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/LeiLcat.pm66
-rw-r--r--lib/PublicInbox/LeiMailSync.pm14
-rw-r--r--t/lei-import-nntp.t23
3 files changed, 74 insertions, 29 deletions
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
index 1a4a988e..0902c213 100644
--- a/lib/PublicInbox/LeiLcat.pm
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -11,47 +11,64 @@ use PublicInbox::LeiViewText;
 use URI::Escape qw(uri_unescape);
 use PublicInbox::MID qw($MID_EXTRACT);
 
-sub lcat_folder ($$$) {
-        my ($lei, $lms, $folder) = @_;
-        $lms //= $lei->lms or return;
-        my $folders = [ $folder];
+sub lcat_folder ($$;$$) {
+        my ($lei, $folder, $beg, $end) = @_;
+        my $lms = $lei->{-lms_ro} //= $lei->lms // return;
+        my $folders = [ $folder ];
         eval { $lms->arg2folder($lei, $folders) };
-        if ($@) {
-                $lei->child_error(0, "# unknown folder: $folder");
-        } else {
-                for my $f (@$folders) {
-                        my $fid = $lms->fid_for($f);
-                        push @{$lei->{lcat_todo}}, { fid => $fid };
-                }
+        return $lei->child_error(0, "# unknown folder: $folder") if $@;
+        my %range;
+        if (defined($beg)) { # NNTP article range
+                $range{min} = $beg;
+                $range{max} = $end // $beg;
+        }
+        for my $f (@$folders) {
+                my $fid = $lms->fid_for($f);
+                push @{$lei->{lcat_todo}}, { fid => $fid, %range };
         }
 }
 
 sub lcat_imap_uri ($$) {
         my ($lei, $uri) = @_;
-        my $lms = $lei->lms or return;
-        # cf. LeiXsearch->lcat_dump
+        # cf. LeiXSearch->lcat_dump
+        my $lms = $lei->{-lms_ro} //= $lei->lms // return;
         if (defined $uri->uid) {
                 push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri);
         } elsif (defined(my $fid = $lms->fid_for($$uri))) {
                 push @{$lei->{lcat_todo}}, { fid => $fid };
         } else {
-                lcat_folder($lei, $lms, $$uri);
+                lcat_folder($lei, $$uri);
         }
 }
 
+sub lcat_nntp_uri ($$) {
+        my ($lei, $uri) = @_;
+        my $mid = $uri->message; # already unescaped by URI::news
+        return "mid:$mid" if defined($mid);
+        my $lms = $lei->{-lms_ro} //= $lei->lms // return;
+        my ($ng, $beg, $end) = $uri->group;
+        $uri->group($ng);
+        lcat_folder($lei, $$uri, $beg, $end);
+        '""';
+}
+
 sub extract_1 ($$) {
         my ($lei, $x) = @_;
-        if ($x =~ m!\b(imaps?://[^>]+)!i) {
-                my $u = $1;
-                require PublicInbox::URIimap;
-                lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
-                '""'; # blank query, using {lcat_todo}
-        } elsif ($x =~ m!\b(maildir:.+)!i) {
-                lcat_folder($lei, undef, $1);
+        if ($x =~ m!\b(maildir:.+)!i) {
+                lcat_folder($lei, $1);
                 '""'; # blank query, using {lcat_todo}
-        } elsif ($x =~ m!\b([a-z]+?://\S+)!i) {
-                my $u = $1;
+        } elsif ($x =~ m!\b(([a-z]+)://\S+)!i) {
+                my ($u, $scheme) = ($1, $2);
                 $u =~ s/[\>\]\)\,\.\;]+\z//;
+                if ($scheme =~ m!\A(imaps?)\z!i) {
+                        require PublicInbox::URIimap;
+                        lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
+                        return '""'; # blank query, using {lcat_todo}
+                } elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) {
+                        require PublicInbox::URInntps;
+                        $u = PublicInbox::URInntps->new($u);
+                        return lcat_nntp_uri($lei, $u);
+                } # http, or something else:
                 require URI;
                 $u = URI->new($u);
                 my $p = $u->path;
@@ -93,7 +110,7 @@ sub extract_all {
         my $strict = !$lei->{opt}->{stdin};
         my @q;
         for my $x (@argv) {
-                if (my $term = extract_1($lei,$x)) {
+                if (my $term = extract_1($lei, $x)) {
                         push @q, $term;
                 } elsif ($strict) {
                         return $lei->fail(<<"");
@@ -101,6 +118,7 @@ could not extract Message-ID from $x
 
                 }
         }
+        delete $lei->{-lms_ro};
         @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv");
 }
 
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index f83c7de2..522a5ebc 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -197,9 +197,12 @@ INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
 sub each_src {
         my ($self, $folder, $cb, @args) = @_;
         my $dbh = $self->{dbh} //= dbh_new($self);
-        my $fid;
+        my ($fid, @rng);
+        my $and_ge_le = '';
         if (ref($folder) eq 'HASH') {
                 $fid = $folder->{fid} // die "BUG: no `fid'";
+                @rng = grep(defined, @$folder{qw(min max)});
+                $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng;
         } else {
                 $fid = $self->{fmap}->{$folder} //=
                         fid_for($self, $folder) // return;
@@ -208,16 +211,17 @@ sub each_src {
         # minimize implicit txn time to avoid blocking writers by
         # batching SELECTs.  This looks wonky but is necessary since
         # $cb-> may access the DB on its own.
-        my $ary = $dbh->selectall_arrayref(<<'', undef, $fid);
-SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ?
+        my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng);
+SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le
 ORDER BY _rowid_ ASC LIMIT 1000
 
         my $min = @$ary ? $ary->[-1]->[0] : undef;
         while (defined $min) {
                 for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) }
 
-                $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min);
-SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? AND _rowid_ > ?
+                $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min);
+SELECT _rowid_,oidbin,uid FROM blob2num
+WHERE fid = ? $and_ge_le AND _rowid_ > ?
 ORDER BY _rowid_ ASC LIMIT 1000
 
                 $min = @$ary ? $ary->[-1]->[0] : undef;
diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t
index 0b080781..eb1ae312 100644
--- a/t/lei-import-nntp.t
+++ b/t/lei-import-nntp.t
@@ -25,6 +25,11 @@ test_lei({ tmpdir => $tmpdir }, sub {
         is(ref(json_utf8->decode($lei_out)), 'ARRAY', 'ls-mail-source JSON');
 
         lei_ok('import', $url);
+        lei_ok "lcat", "nntp://$host_port/testmessage\@example.com";
+        my $local = $lei_out;
+        lei_ok "lcat", "nntp://example.com/testmessage\@example.com";
+        my $remote = $lei_out;
+        is($local, $remote, 'Message-ID used even from unknown host');
         lei_ok(qw(q z:1..));
         $out = json_utf8->decode($lei_out);
         ok(scalar(@$out) > 1, 'got imported messages');
@@ -57,6 +62,11 @@ test_lei({ tmpdir => $tmpdir }, sub {
         lei_ok('inspect', "$url/$high");
         my $x = json_utf8->decode($lei_out);
         like($x->{$url}->{$high}, qr/\A[a-f0-9]{40,}\z/, 'inspect shows blob');
+        lei_ok qw(lcat -f json), "$url/$high";
+        my $lcat = json_utf8->decode($lei_out);
+        is($lcat->[1], undef, 'only one result for lcat');
+        is($lcat->[0]->{blob}, $x->{$url}->{$high},
+                'lcat showed correct blob');
 
         lei_ok 'ls-mail-sync';
         is($lei_out, "$url\n", 'article number not stored as folder');
@@ -78,6 +88,19 @@ test_lei({ tmpdir => $tmpdir }, sub {
         is(scalar(grep(/\A[a-f0-9]{40,}\z/, values %{$x->{$url}})),
                 $end - $low + 1, 'all values are git blobs');
 
+        lei_ok qw(lcat -f json), "$url/$low";
+        $lcat = json_utf8->decode($lei_out);
+        is($lcat->[1], undef, 'only one result for lcat');
+        is($lcat->[0]->{blob}, $x->{$url}->{$low},
+                'lcat showed correct blob');
+        lei_ok qw(lcat -f json), "$url/$low-$end";
+        $lcat = json_utf8->decode($lei_out);
+        pop @$lcat;
+        for ($low..$end) {
+                my $tip = shift @$lcat;
+                is($x->{$url}->{$_}, $tip->{blob}, "blob matches art #$_");
+        }
+
         lei_ok 'ls-mail-sync';
         is($lei_out, "$url\n", 'article range not stored as folder');
         lei_ok qw(q z:0..); my $start = json_utf8->decode($lei_out);