From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 1EC901F9E5 for ; Thu, 22 Apr 2021 09:08:23 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/3] lei import: --incremental default for NNTP and IMAP Date: Thu, 22 Apr 2021 07:08:21 -0200 Message-Id: <20210422090822.18869-3-e@80x24.org> In-Reply-To: <20210422090822.18869-1-e@80x24.org> References: <20210422090822.18869-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: No point in burning through bandwidth to import stuff we already saw. All this logic is shared with -watch but uses a different pathname for lei since it's tied to lei/store (and not a public-inbox). --- Documentation/lei-store-format.pod | 1 + lib/PublicInbox/LEI.pm | 4 +++- lib/PublicInbox/LeiImport.pm | 5 +++++ lib/PublicInbox/NetReader.pm | 13 +++++++++---- t/lei-import-imap.t | 3 +++ t/lei-import-nntp.t | 3 +++ 6 files changed, 24 insertions(+), 5 deletions(-) diff --git a/Documentation/lei-store-format.pod b/Documentation/lei-store-format.pod index a42c770e..3e1ddc65 100644 --- a/Documentation/lei-store-format.pod +++ b/Documentation/lei-store-format.pod @@ -32,6 +32,7 @@ prevent them from being accidentally treated as a v2 inbox. ~/.local/share/lei/store - ipc.lock # lock file for internal lei IPC - local/$EPOCH.git # normal bare git repositories + - net_last.sqlite3 # import state for IMAP & NNTP Additionally, the following share the same roles they do in extindex: diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 2e1aa246..d9e644eb 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -193,7 +193,8 @@ our %CMD = ( # sorted in order of importance/use: 'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include|I=s - lock=s@ in-format|F=s kw|keywords|flags! verbose|v+), @c_opt ], + lock=s@ in-format|F=s kw|keywords|flags! verbose|v+ + incremental!), @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s @@ -244,6 +245,7 @@ my %OPTDESC = ( 'lock=s@' => [ 'METHOD|dotlock|fcntl|flock|none', 'mbox(5) locking method(s) to use (default: fcntl,dotlock)' ], +'incremental! import' => 'import already seen IMAP and NNTP articles', 'globoff|g' => "do not match locations using '*?' wildcards ". "and\xa0'[]'\x{a0}ranges", 'verbose|v+' => 'be more verbose', diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 16271603..accf08f5 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -58,6 +58,11 @@ sub lei_import { # the main "lei import" method my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1; if (my $net = $lei->{net}) { # $j = $net->net_concurrency($j); TODO + if ($lei->{opt}->{incremental} // 1) { + $net->{incremental} = 1; + $net->{itrk_fn} = $lei->store_path . + '/net_last.sqlite3'; + } } else { my $nproc = $self->detect_nproc; $j = $nproc if $j > $nproc; diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm index 0ef66fd8..c7b43f01 100644 --- a/lib/PublicInbox/NetReader.pm +++ b/lib/PublicInbox/NetReader.pm @@ -373,6 +373,13 @@ sub run_commit_cb ($) { $cb->(@args); } +sub _itrk ($$) { + my ($self, $uri) = @_; + return unless $self->{incremental}; + # itrk_fn is set by lei + PublicInbox::IMAPTracker->new($$uri, $self->{itrk_fn}); +} + sub _imap_fetch_all ($$$) { my ($self, $mic, $uri) = @_; my $sec = uri_section($uri); @@ -389,8 +396,7 @@ sub _imap_fetch_all ($$$) { return "E: $uri cannot get UIDVALIDITY"; $r_uidnext //= $mic->uidnext($mbx) // return "E: $uri cannot get UIDNEXT"; - my $itrk = $self->{incremental} ? - PublicInbox::IMAPTracker->new($$uri) : 0; + my $itrk = _itrk($self, $uri); my ($l_uidval, $l_uid) = $itrk ? $itrk->get_last : (); $l_uidval //= $r_uidval; # first time $l_uid //= 0; @@ -543,8 +549,7 @@ sub _nntp_fetch_all ($$$) { # IMAPTracker is also used for tracking NNTP, UID == article number # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's # expensive. So we assume newsgroups don't change: - my $itrk = $self->{incremental} ? - PublicInbox::IMAPTracker->new($$uri) : 0; + my $itrk = _itrk($self, $uri); my (undef, $l_art) = $itrk ? $itrk->get_last : (); # allow users to specify articles to refetch diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t index 7e4d44b9..490ea9be 100644 --- a/t/lei-import-imap.t +++ b/t/lei-import-imap.t @@ -24,5 +24,8 @@ test_lei({ tmpdir => $tmpdir }, sub { for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); lei_ok([qw(tag +kw:seen), "imap://$host_port/t.v2.0"], undef, undef); + + my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3"; + ok(-s $f, 'net tracked for redundant imports'); }); done_testing; diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t index 1fc6dbad..d795a86a 100644 --- a/t/lei-import-nntp.t +++ b/t/lei-import-nntp.t @@ -26,5 +26,8 @@ test_lei({ tmpdir => $tmpdir }, sub { my %r; for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); + + my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3"; + ok(-s $f, 'net tracked for redundant imports'); }); done_testing;