diff options
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/LeiLcat.pm | 66 | ||||
-rw-r--r-- | lib/PublicInbox/LeiMailSync.pm | 14 |
2 files changed, 51 insertions, 29 deletions
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm index 1a4a988e..0902c213 100644 --- a/lib/PublicInbox/LeiLcat.pm +++ b/lib/PublicInbox/LeiLcat.pm @@ -11,47 +11,64 @@ use PublicInbox::LeiViewText; use URI::Escape qw(uri_unescape); use PublicInbox::MID qw($MID_EXTRACT); -sub lcat_folder ($$$) { - my ($lei, $lms, $folder) = @_; - $lms //= $lei->lms or return; - my $folders = [ $folder]; +sub lcat_folder ($$;$$) { + my ($lei, $folder, $beg, $end) = @_; + my $lms = $lei->{-lms_ro} //= $lei->lms // return; + my $folders = [ $folder ]; eval { $lms->arg2folder($lei, $folders) }; - if ($@) { - $lei->child_error(0, "# unknown folder: $folder"); - } else { - for my $f (@$folders) { - my $fid = $lms->fid_for($f); - push @{$lei->{lcat_todo}}, { fid => $fid }; - } + return $lei->child_error(0, "# unknown folder: $folder") if $@; + my %range; + if (defined($beg)) { # NNTP article range + $range{min} = $beg; + $range{max} = $end // $beg; + } + for my $f (@$folders) { + my $fid = $lms->fid_for($f); + push @{$lei->{lcat_todo}}, { fid => $fid, %range }; } } sub lcat_imap_uri ($$) { my ($lei, $uri) = @_; - my $lms = $lei->lms or return; - # cf. LeiXsearch->lcat_dump + # cf. LeiXSearch->lcat_dump + my $lms = $lei->{-lms_ro} //= $lei->lms // return; if (defined $uri->uid) { push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri); } elsif (defined(my $fid = $lms->fid_for($$uri))) { push @{$lei->{lcat_todo}}, { fid => $fid }; } else { - lcat_folder($lei, $lms, $$uri); + lcat_folder($lei, $$uri); } } +sub lcat_nntp_uri ($$) { + my ($lei, $uri) = @_; + my $mid = $uri->message; # already unescaped by URI::news + return "mid:$mid" if defined($mid); + my $lms = $lei->{-lms_ro} //= $lei->lms // return; + my ($ng, $beg, $end) = $uri->group; + $uri->group($ng); + lcat_folder($lei, $$uri, $beg, $end); + '""'; +} + sub extract_1 ($$) { my ($lei, $x) = @_; - if ($x =~ m!\b(imaps?://[^>]+)!i) { - my $u = $1; - require PublicInbox::URIimap; - lcat_imap_uri($lei, PublicInbox::URIimap->new($u)); - '""'; # blank query, using {lcat_todo} - } elsif ($x =~ m!\b(maildir:.+)!i) { - lcat_folder($lei, undef, $1); + if ($x =~ m!\b(maildir:.+)!i) { + lcat_folder($lei, $1); '""'; # blank query, using {lcat_todo} - } elsif ($x =~ m!\b([a-z]+?://\S+)!i) { - my $u = $1; + } elsif ($x =~ m!\b(([a-z]+)://\S+)!i) { + my ($u, $scheme) = ($1, $2); $u =~ s/[\>\]\)\,\.\;]+\z//; + if ($scheme =~ m!\A(imaps?)\z!i) { + require PublicInbox::URIimap; + lcat_imap_uri($lei, PublicInbox::URIimap->new($u)); + return '""'; # blank query, using {lcat_todo} + } elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) { + require PublicInbox::URInntps; + $u = PublicInbox::URInntps->new($u); + return lcat_nntp_uri($lei, $u); + } # http, or something else: require URI; $u = URI->new($u); my $p = $u->path; @@ -93,7 +110,7 @@ sub extract_all { my $strict = !$lei->{opt}->{stdin}; my @q; for my $x (@argv) { - if (my $term = extract_1($lei,$x)) { + if (my $term = extract_1($lei, $x)) { push @q, $term; } elsif ($strict) { return $lei->fail(<<""); @@ -101,6 +118,7 @@ could not extract Message-ID from $x } } + delete $lei->{-lms_ro}; @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv"); } diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index f83c7de2..522a5ebc 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -197,9 +197,12 @@ INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?) sub each_src { my ($self, $folder, $cb, @args) = @_; my $dbh = $self->{dbh} //= dbh_new($self); - my $fid; + my ($fid, @rng); + my $and_ge_le = ''; if (ref($folder) eq 'HASH') { $fid = $folder->{fid} // die "BUG: no `fid'"; + @rng = grep(defined, @$folder{qw(min max)}); + $and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng; } else { $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder) // return; @@ -208,16 +211,17 @@ sub each_src { # minimize implicit txn time to avoid blocking writers by # batching SELECTs. This looks wonky but is necessary since # $cb-> may access the DB on its own. - my $ary = $dbh->selectall_arrayref(<<'', undef, $fid); -SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? + my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng); +SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le ORDER BY _rowid_ ASC LIMIT 1000 my $min = @$ary ? $ary->[-1]->[0] : undef; while (defined $min) { for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) } - $ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min); -SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? AND _rowid_ > ? + $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min); +SELECT _rowid_,oidbin,uid FROM blob2num +WHERE fid = ? $and_ge_le AND _rowid_ > ? ORDER BY _rowid_ ASC LIMIT 1000 $min = @$ary ? $ary->[-1]->[0] : undef; |