about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-04-22 07:08:21 -0200
committerEric Wong <e@80x24.org>2021-04-22 17:05:48 -0400
commit21c0a681c3c9bdcdeffc6516db65812c7635cd68 (patch)
tree1365cd39205672f7ab2aa10596bdf0df1315e095 /lib/PublicInbox
parent311a5d37ad275cd75b1e64d87827c4d13fe4bfab (diff)
downloadpublic-inbox-21c0a681c3c9bdcdeffc6516db65812c7635cd68.tar.gz
No point in burning through bandwidth to import stuff we already
saw.  All this logic is shared with -watch but uses a different
pathname for lei since it's tied to lei/store (and not a
public-inbox).
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/LEI.pm4
-rw-r--r--lib/PublicInbox/LeiImport.pm5
-rw-r--r--lib/PublicInbox/NetReader.pm13
3 files changed, 17 insertions, 5 deletions
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 2e1aa246..d9e644eb 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -193,7 +193,8 @@ our %CMD = ( # sorted in order of importance/use:
 'import' => [ 'LOCATION...|--stdin',
         'one-time import/update from URL or filesystem',
         qw(stdin| offset=i recursive|r exclude=s include|I=s
-        lock=s@ in-format|F=s kw|keywords|flags! verbose|v+), @c_opt ],
+        lock=s@ in-format|F=s kw|keywords|flags! verbose|v+
+        incremental!), @c_opt ],
 'convert' => [ 'LOCATION...|--stdin',
         'one-time conversion from URL or filesystem to another format',
         qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s
@@ -244,6 +245,7 @@ my %OPTDESC = (
 'lock=s@' => [ 'METHOD|dotlock|fcntl|flock|none',
         'mbox(5) locking method(s) to use (default: fcntl,dotlock)' ],
 
+'incremental!        import' => 'import already seen IMAP and NNTP articles',
 'globoff|g' => "do not match locations using '*?' wildcards ".
                 "and\xa0'[]'\x{a0}ranges",
 'verbose|v+' => 'be more verbose',
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 16271603..accf08f5 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -58,6 +58,11 @@ sub lei_import { # the main "lei import" method
         my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
         if (my $net = $lei->{net}) {
                 # $j = $net->net_concurrency($j); TODO
+                if ($lei->{opt}->{incremental} // 1) {
+                        $net->{incremental} = 1;
+                        $net->{itrk_fn} = $lei->store_path .
+                                                '/net_last.sqlite3';
+                }
         } else {
                 my $nproc = $self->detect_nproc;
                 $j = $nproc if $j > $nproc;
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 0ef66fd8..c7b43f01 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -373,6 +373,13 @@ sub run_commit_cb ($) {
         $cb->(@args);
 }
 
+sub _itrk ($$) {
+        my ($self, $uri) = @_;
+        return unless $self->{incremental};
+        # itrk_fn is set by lei
+        PublicInbox::IMAPTracker->new($$uri, $self->{itrk_fn});
+}
+
 sub _imap_fetch_all ($$$) {
         my ($self, $mic, $uri) = @_;
         my $sec = uri_section($uri);
@@ -389,8 +396,7 @@ sub _imap_fetch_all ($$$) {
                 return "E: $uri cannot get UIDVALIDITY";
         $r_uidnext //= $mic->uidnext($mbx) //
                 return "E: $uri cannot get UIDNEXT";
-        my $itrk = $self->{incremental} ?
-                        PublicInbox::IMAPTracker->new($$uri) : 0;
+        my $itrk = _itrk($self, $uri);
         my ($l_uidval, $l_uid) = $itrk ? $itrk->get_last : ();
         $l_uidval //= $r_uidval; # first time
         $l_uid //= 0;
@@ -543,8 +549,7 @@ sub _nntp_fetch_all ($$$) {
         # IMAPTracker is also used for tracking NNTP, UID == article number
         # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's
         # expensive.  So we assume newsgroups don't change:
-        my $itrk = $self->{incremental} ?
-                        PublicInbox::IMAPTracker->new($$uri) : 0;
+        my $itrk = _itrk($self, $uri);
         my (undef, $l_art) = $itrk ? $itrk->get_last : ();
 
         # allow users to specify articles to refetch