about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-09-21 07:41:54 +0000
committerEric Wong <e@80x24.org>2021-09-21 19:18:35 +0000
commit1ed1a566c7d300ab8afb604b2e7c939299318005 (patch)
tree11d98ef0c48983c0dfcd5def0c97f9aee4f28b48 /lib/PublicInbox
parent3b23743ec90a4cb67dbc0b8bd94cc342c78e7a67 (diff)
downloadpublic-inbox-1ed1a566c7d300ab8afb604b2e7c939299318005.tar.gz
NNTP URLs are probably more prevalent in public message archives
than IMAP URLs.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/LeiLcat.pm66
-rw-r--r--lib/PublicInbox/LeiMailSync.pm14
2 files changed, 51 insertions, 29 deletions
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
index 1a4a988e..0902c213 100644
--- a/lib/PublicInbox/LeiLcat.pm
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -11,47 +11,64 @@ use PublicInbox::LeiViewText;
 use URI::Escape qw(uri_unescape);
 use PublicInbox::MID qw($MID_EXTRACT);
 
-sub lcat_folder ($$$) {
-        my ($lei, $lms, $folder) = @_;
-        $lms //= $lei->lms or return;
-        my $folders = [ $folder];
+sub lcat_folder ($$;$$) {
+        my ($lei, $folder, $beg, $end) = @_;
+        my $lms = $lei->{-lms_ro} //= $lei->lms // return;
+        my $folders = [ $folder ];
         eval { $lms->arg2folder($lei, $folders) };
-        if ($@) {
-                $lei->child_error(0, "# unknown folder: $folder");
-        } else {
-                for my $f (@$folders) {
-                        my $fid = $lms->fid_for($f);
-                        push @{$lei->{lcat_todo}}, { fid => $fid };
-                }
+        return $lei->child_error(0, "# unknown folder: $folder") if $@;
+        my %range;
+        if (defined($beg)) { # NNTP article range
+                $range{min} = $beg;
+                $range{max} = $end // $beg;
+        }
+        for my $f (@$folders) {
+                my $fid = $lms->fid_for($f);
+                push @{$lei->{lcat_todo}}, { fid => $fid, %range };
         }
 }
 
 sub lcat_imap_uri ($$) {
         my ($lei, $uri) = @_;
-        my $lms = $lei->lms or return;
-        # cf. LeiXsearch->lcat_dump
+        # cf. LeiXSearch->lcat_dump
+        my $lms = $lei->{-lms_ro} //= $lei->lms // return;
         if (defined $uri->uid) {
                 push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri);
         } elsif (defined(my $fid = $lms->fid_for($$uri))) {
                 push @{$lei->{lcat_todo}}, { fid => $fid };
         } else {
-                lcat_folder($lei, $lms, $$uri);
+                lcat_folder($lei, $$uri);
         }
 }
 
+sub lcat_nntp_uri ($$) {
+        my ($lei, $uri) = @_;
+        my $mid = $uri->message; # already unescaped by URI::news
+        return "mid:$mid" if defined($mid);
+        my $lms = $lei->{-lms_ro} //= $lei->lms // return;
+        my ($ng, $beg, $end) = $uri->group;
+        $uri->group($ng);
+        lcat_folder($lei, $$uri, $beg, $end);
+        '""';
+}
+
 sub extract_1 ($$) {
         my ($lei, $x) = @_;
-        if ($x =~ m!\b(imaps?://[^>]+)!i) {
-                my $u = $1;
-                require PublicInbox::URIimap;
-                lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
-                '""'; # blank query, using {lcat_todo}
-        } elsif ($x =~ m!\b(maildir:.+)!i) {
-                lcat_folder($lei, undef, $1);
+        if ($x =~ m!\b(maildir:.+)!i) {
+                lcat_folder($lei, $1);
                 '""'; # blank query, using {lcat_todo}
-        } elsif ($x =~ m!\b([a-z]+?://\S+)!i) {
-                my $u = $1;
+        } elsif ($x =~ m!\b(([a-z]+)://\S+)!i) {
+                my ($u, $scheme) = ($1, $2);
                 $u =~ s/[\>\]\)\,\.\;]+\z//;
+                if ($scheme =~ m!\A(imaps?)\z!i) {
+                        require PublicInbox::URIimap;
+                        lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
+                        return '""'; # blank query, using {lcat_todo}
+                } elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) {
+                        require PublicInbox::URInntps;
+                        $u = PublicInbox::URInntps->new($u);
+                        return lcat_nntp_uri($lei, $u);
+                } # http, or something else:
                 require URI;
                 $u = URI->new($u);
                 my $p = $u->path;
@@ -93,7 +110,7 @@ sub extract_all {
         my $strict = !$lei->{opt}->{stdin};
         my @q;
         for my $x (@argv) {
-                if (my $term = extract_1($lei,$x)) {
+                if (my $term = extract_1($lei, $x)) {
                         push @q, $term;
                 } elsif ($strict) {
                         return $lei->fail(<<"");
@@ -101,6 +118,7 @@ could not extract Message-ID from $x
 
                 }
         }
+        delete $lei->{-lms_ro};
         @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv");
 }
 
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index f83c7de2..522a5ebc 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -197,9 +197,12 @@ INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
 sub each_src {
         my ($self, $folder, $cb, @args) = @_;
         my $dbh = $self->{dbh} //= dbh_new($self);
-        my $fid;
+        my ($fid, @rng);
+        my $and_ge_le = '';
         if (ref($folder) eq 'HASH') {
                 $fid = $folder->{fid} // die "BUG: no `fid'";
+                @rng = grep(defined, @$folder{qw(min max)});
+                $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng;
         } else {
                 $fid = $self->{fmap}->{$folder} //=
                         fid_for($self, $folder) // return;
@@ -208,16 +211,17 @@ sub each_src {
         # minimize implicit txn time to avoid blocking writers by
         # batching SELECTs.  This looks wonky but is necessary since
         # $cb-> may access the DB on its own.
-        my $ary = $dbh->selectall_arrayref(<<'', undef, $fid);
-SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ?
+        my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng);
+SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le
 ORDER BY _rowid_ ASC LIMIT 1000
 
         my $min = @$ary ? $ary->[-1]->[0] : undef;
         while (defined $min) {
                 for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) }
 
-                $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min);
-SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? AND _rowid_ > ?
+                $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min);
+SELECT _rowid_,oidbin,uid FROM blob2num
+WHERE fid = ? $and_ge_le AND _rowid_ > ?
 ORDER BY _rowid_ ASC LIMIT 1000
 
                 $min = @$ary ? $ary->[-1]->[0] : undef;