user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/6] more lei stuffs
@ 2021-02-10  7:07 Eric Wong
  2021-02-10  7:07 ` [PATCH 1/6] lei *external: glob improvements, ls-external filtering Eric Wong
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

IMAP and NNTP lei-import support are coming.  Basically stuff in
-watch, but IMAP will need to support flags (aka keywords) in
IMAP.

"lei ls-external" gains filtering + globbing support
(still needs shell completion support)

Eric Wong (6):
  lei *external: glob improvements, ls-external filtering
  lei_external: remove unnecessary Exporter use
  test_common: support lei-daemon only testing
  lei ls-external: support --local and --remote
  lei: note some TODO items (curl, externals)
  net_reader: new package split from -watch

 MANIFEST                       |   1 +
 lib/PublicInbox/LEI.pm         |   4 +-
 lib/PublicInbox/LeiCurl.pm     |   2 +
 lib/PublicInbox/LeiExternal.pm |  85 +++++++++----
 lib/PublicInbox/NetReader.pm   | 220 +++++++++++++++++++++++++++++++++
 lib/PublicInbox/TestCommon.pm  |  13 +-
 lib/PublicInbox/Watch.pm       | 204 +-----------------------------
 t/lei-externals.t              |  36 ++++--
 t/lei_external.t               |  20 ++-
 9 files changed, 343 insertions(+), 242 deletions(-)
 create mode 100644 lib/PublicInbox/NetReader.pm


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/6] lei *external: glob improvements, ls-external filtering
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
@ 2021-02-10  7:07 ` Eric Wong
  2021-02-10  7:07 ` [PATCH 2/6] lei_external: remove unnecessary Exporter use Eric Wong
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

The "ls-external" now accepts the same glob patterns used by
with lei q --{include,only,exclude}.  If no glob is detected, it
will be treated as a literal substring match (like "grep -F").

Inverting matches is also supported ("grep -v").
---
 lib/PublicInbox/LEI.pm         |  4 +-
 lib/PublicInbox/LeiExternal.pm | 74 ++++++++++++++++++++++++----------
 t/lei_external.t               | 20 +++++++--
 3 files changed, 71 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index dd831c54..eb5a646e 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -124,8 +124,8 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i),
 	qw(quiet|q verbose|v+),
 	index_opt(), PublicInbox::LeiQuery::curl_opt() ],
-'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations',
-	qw(format|f=s z|0 local remote quiet|q) ],
+'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations',
+	qw(format|f=s z|0 globoff|g invert-match|v local remote) ],
 'forget-external' => [ 'LOCATION...|--prune',
 	'exclude further results from a publicinbox|extindex',
 	qw(prune quiet|q) ],
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b65dc87c..bac15226 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -22,22 +22,16 @@ sub externals_each {
 	# highest boost first, but stable for alphabetic tie break
 	use sort 'stable';
 	my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
-	return @order if !$cb;
-	for my $loc (@order) {
-		$cb->(@arg, $loc, $boost{$loc});
+	if (ref($cb) eq 'CODE') {
+		for my $loc (@order) {
+			$cb->(@arg, $loc, $boost{$loc});
+		}
+	} elsif (ref($cb) eq 'HASH') {
+		%$cb = %boost;
 	}
 	@order; # scalar or array
 }
 
-sub lei_ls_external {
-	my ($self, @argv) = @_;
-	my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
-	externals_each($self, sub {
-		my ($loc, $boost_val) = @_;
-		$self->out($loc, $OFS, 'boost=', $boost_val, $ORS);
-	});
-}
-
 sub ext_canonicalize {
 	my ($location) = @_;
 	if ($location !~ m!\Ahttps?://!) {
@@ -52,28 +46,47 @@ sub ext_canonicalize {
 	}
 }
 
-my %patmap = ('*' => '[^/]*?', '?' => '[^/]', '[' => '[', ']' => ']');
-sub glob2pat {
-	my ($glob) = @_;
-        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
-        $glob;
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+		'[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re {
+	my ($re) = @_;
+	my $p = '';
+	my $in_bracket = 0;
+	my $qm = 0;
+	my $changes = ($re =~ s!(.)!
+		$re_map{$p eq '\\' ? '' : do {
+			if ($1 eq '[') { ++$in_bracket }
+			elsif ($1 eq ']') { --$in_bracket }
+			$p = $1;
+		}} // do {
+			$p = $1;
+			($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+		}!sge);
+	# bashism (also supported by curl): {a,b,c} => (a|b|c)
+	$re =~ s/([^\\]*)\\\{([^,]*?,[^\\]*?)\\\}/
+		(my $in_braces = $2) =~ tr!,!|!;
+		$1."($in_braces)";
+		/sge;
+	($changes - $qm) ? $re : undef;
 }
 
+# get canonicalized externals list matching $loc
+# $is_exclude denotes it's for --exclude
+# otherwise it's for --only/--include is assumed
 sub get_externals {
-	my ($self, $loc, $exclude) = @_;
+	my ($self, $loc, $is_exclude) = @_;
 	return (ext_canonicalize($loc)) if -e $loc;
-
 	my @m;
 	my @cur = externals_each($self);
 	my $do_glob = !$self->{opt}->{globoff}; # glob by default
-	if ($do_glob && ($loc =~ /[\*\?]/s || $loc =~ /\[.*\]/s)) {
-		my $re = glob2pat($loc);
+	if ($do_glob && (my $re = glob2re($loc))) {
 		@m = grep(m!$re!, @cur);
 		return @m if scalar(@m);
 	} elsif (index($loc, '/') < 0) { # exact basename match:
 		@m = grep(m!/\Q$loc\E/?\z!, @cur);
 		return @m if scalar(@m) == 1;
-	} elsif ($exclude) { # URL, maybe:
+	} elsif ($is_exclude) { # URL, maybe:
 		my $canon = ext_canonicalize($loc);
 		@m = grep(m!\A\Q$canon\E\z!, @cur);
 		return @m if scalar(@m) == 1;
@@ -88,6 +101,23 @@ sub get_externals {
 	();
 }
 
+sub lei_ls_external {
+	my ($self, $filter) = @_;
+	my $do_glob = !$self->{opt}->{globoff}; # glob by default
+	my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+	$filter //= '*';
+	my $re = $do_glob ? glob2re($filter) : undef;
+	$re //= index($filter, '/') < 0 ?
+			qr!/\Q$filter\E/?\z! : # exact basename match
+			qr/\Q$filter\E/; # grep -F semantics
+	my @ext = externals_each($self, my $boost = {});
+	@ext = $self->{opt}->{'invert-match'} ? grep(!/$re/, @ext)
+					: grep(/$re/, @ext);
+	for my $loc (@ext) {
+		$self->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);
+	}
+}
+
 sub add_external_finish {
 	my ($self, $location) = @_;
 	my $cfg = $self->_lei_cfg(1);
diff --git a/t/lei_external.t b/t/lei_external.t
index 587990db..0ef6633d 100644
--- a/t/lei_external.t
+++ b/t/lei_external.t
@@ -1,7 +1,8 @@
 #!perl -w
-use strict;
-use v5.10.1;
-use Test::More;
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# internal unit test, see t/lei-externals.t for functional tests
+use strict; use v5.10.1; use Test::More;
 my $cls = 'PublicInbox::LeiExternal';
 require_ok $cls;
 my $canon = $cls->can('ext_canonicalize');
@@ -15,4 +16,17 @@ is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent',
 is($canon->('/this//path/'), '/this/path', 'extra slashes gone');
 is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved');
 
+my $glob2re = $cls->can('glob2re');
+is($glob2re->('foo'), undef, 'plain string unchanged');
+is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted');
+is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted');
+is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces');
+is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start');
+is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end');
+is_deeply($glob2re->('{a}'), undef, 'ungrouped brace');
+is_deeply($glob2re->('{a'), undef, 'open left brace');
+is_deeply($glob2re->('a}'), undef, 'open right brace');
+is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
+is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
+
 done_testing;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/6] lei_external: remove unnecessary Exporter use
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
  2021-02-10  7:07 ` [PATCH 1/6] lei *external: glob improvements, ls-external filtering Eric Wong
@ 2021-02-10  7:07 ` Eric Wong
  2021-02-10  7:07 ` [PATCH 3/6] test_common: support lei-daemon only testing Eric Wong
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

We don't need to export for methods which are only called via
"->" or "->can".
---
 lib/PublicInbox/LeiExternal.pm | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index bac15226..b4e1918d 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -5,8 +5,6 @@
 package PublicInbox::LeiExternal;
 use strict;
 use v5.10.1;
-use parent qw(Exporter);
-our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
 use PublicInbox::Config;
 
 sub externals_each {

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/6] test_common: support lei-daemon only testing
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
  2021-02-10  7:07 ` [PATCH 1/6] lei *external: glob improvements, ls-external filtering Eric Wong
  2021-02-10  7:07 ` [PATCH 2/6] lei_external: remove unnecessary Exporter use Eric Wong
@ 2021-02-10  7:07 ` Eric Wong
  2021-02-10  7:07 ` [PATCH 4/6] lei ls-external: support --local and --remote Eric Wong
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

Daemon-only tests can be significantly faster due to cached
configs; so give developers a chance to test only daemons to
improve productivity.

The differences between daemon and oneshot modes are minimal,
at this point.
---
 lib/PublicInbox/TestCommon.pm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 63d45ac3..64fe0499 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -506,6 +506,8 @@ EOM
 		}
 	}; # SKIP for lei_daemon
 	unless ($test_opt->{daemon_only}) {
+		$ENV{TEST_LEI_DAEMON_ONLY} and
+			skip 'TEST_LEI_DAEMON_ONLY set', 1;
 		require_ok 'PublicInbox::LEI';
 		my $home = "$tmpdir/lei-oneshot";
 		mkdir($home, 0700) or BAIL_OUT "mkdir: $!";

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/6] lei ls-external: support --local and --remote
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
                   ` (2 preceding siblings ...)
  2021-02-10  7:07 ` [PATCH 3/6] test_common: support lei-daemon only testing Eric Wong
@ 2021-02-10  7:07 ` Eric Wong
  2021-02-10  7:07 ` [PATCH 5/6] lei: note some TODO items (curl, externals) Eric Wong
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

Similar to "lei q", "--local" means only local and "--remote"
means remote only.  I can't think of a reason to have --no-*
variants for these switches.

There's also updates to the TestCommon for more common lei
cases.
---
 lib/PublicInbox/LeiExternal.pm | 12 +++++++++---
 lib/PublicInbox/TestCommon.pm  | 11 ++++++++++-
 t/lei-externals.t              | 36 +++++++++++++++++++++++++---------
 3 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b4e1918d..b402eed4 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -101,16 +101,22 @@ sub get_externals {
 
 sub lei_ls_external {
 	my ($self, $filter) = @_;
-	my $do_glob = !$self->{opt}->{globoff}; # glob by default
-	my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+	my $opt = $self->{opt};
+	my $do_glob = !$opt->{globoff}; # glob by default
+	my ($OFS, $ORS) = $opt->{z} ? ("\0", "\0\0") : (" ", "\n");
 	$filter //= '*';
 	my $re = $do_glob ? glob2re($filter) : undef;
 	$re //= index($filter, '/') < 0 ?
 			qr!/\Q$filter\E/?\z! : # exact basename match
 			qr/\Q$filter\E/; # grep -F semantics
 	my @ext = externals_each($self, my $boost = {});
-	@ext = $self->{opt}->{'invert-match'} ? grep(!/$re/, @ext)
+	@ext = $opt->{'invert-match'} ? grep(!/$re/, @ext)
 					: grep(/$re/, @ext);
+	if ($opt->{'local'} && !$opt->{remote}) {
+		@ext = grep(!m!\A[a-z\+]+://!, @ext);
+	} elsif ($opt->{remote} && !$opt->{'local'}) {
+		@ext = grep(m!\A[a-z\+]+://!, @ext);
+	}
 	for my $loc (@ext) {
 		$self->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);
 	}
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 64fe0499..f5b3fae4 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -9,12 +9,14 @@ use v5.10.1;
 use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek);
 use POSIX qw(dup2);
 use IO::Socket::INET;
+use File::Spec;
 our @EXPORT;
 BEGIN {
 	@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
 		run_script start_script key2sub xsys xsys_e xqx eml_load tick
 		have_xapian_compact json_utf8 setup_public_inboxes
-		tcp_host_port test_lei lei $lei $lei_out $lei_err $lei_opt);
+		tcp_host_port test_lei lei lei_ok
+		$lei $lei_out $lei_err $lei_opt);
 	require Test::More;
 	my @methods = grep(!/\W/, @Test::More::EXPORT);
 	eval(join('', map { "*$_=\\&Test::More::$_;" } @methods));
@@ -459,6 +461,13 @@ our $lei = sub {
 
 sub lei (@) { $lei->(@_) }
 
+sub lei_ok (@) {
+	my $msg = ref($_[-1]) ? pop(@_) : undef;
+	# filter out anything that looks like a path name for consistent logs
+	my @msg = grep(!m!\A/!, @_);
+	ok($lei->(@_), "lei @msg". ($msg ? " ($$msg)" : ''));
+}
+
 sub json_utf8 () {
 	state $x = ref(PublicInbox::Config->json)->new->utf8->canonical;
 }
diff --git a/t/lei-externals.t b/t/lei-externals.t
index 28c01174..9fc8bae9 100644
--- a/t/lei-externals.t
+++ b/t/lei-externals.t
@@ -35,20 +35,20 @@ test_lei(sub {
 	my $home = $ENV{HOME};
 	my $config_file = "$home/.config/lei/config";
 	my $store_dir = "$home/.local/share/lei";
-	ok($lei->('ls-external'), 'ls-external works');
+	lei_ok 'ls-external', \'ls-external on fresh install';
 	is($lei_out.$lei_err, '', 'ls-external no output, yet');
 	ok(!-e $config_file && !-e $store_dir,
 		'nothing created by ls-external');
 
-	ok(!$lei->('add-external', "$home/nonexistent"),
-		"fails on non-existent dir");
-	ok($lei->('ls-external'), 'ls-external works after add failure');
+	ok(!lei('add-external', "$home/nonexistent",
+		"fails on non-existent dir"));
+	lei_ok('ls-external', \'ls-external works after add failure');
 	is($lei_out.$lei_err, '', 'ls-external still has no output');
 	my $cfg = PublicInbox::Config->new($cfg_path);
 	$cfg->each_inbox(sub {
 		my ($ibx) = @_;
-		ok($lei->(qw(add-external -q), $ibx->{inboxdir}),
-			'added external');
+		lei_ok(qw(add-external -q), $ibx->{inboxdir},
+				\'added external');
 		is($lei_out.$lei_err, '', 'no output');
 	});
 	ok(-s $config_file && -e $store_dir,
@@ -59,12 +59,30 @@ test_lei(sub {
 		is($lcfg->{"external.$ibx->{inboxdir}.boost"}, 0,
 			"configured boost on $ibx->{name}");
 	});
-	$lei->('ls-external');
+	lei_ok 'ls-external';
 	like($lei_out, qr/boost=0\n/s, 'ls-external has output');
-	ok($lei->(qw(add-external -q https://EXAMPLE.com/ibx)), 'add remote');
+	lei_ok qw(add-external -q https://EXAMPLE.com/ibx), \'add remote';
 	is($lei_err, '', 'no warnings after add-external');
 
-	ok($lei->(qw(_complete lei forget-external)), 'complete for externals');
+	{
+		lei_ok qw(ls-external --remote);
+		my $r_only = +{ map { $_ => 1 } split(/^/m, $lei_out) };
+		lei_ok qw(ls-external --local);
+		my $l_only = +{ map { $_ => 1 } split(/^/m, $lei_out) };
+		lei_ok 'ls-external';
+		is_deeply([grep { $l_only->{$_} } keys %$r_only], [],
+			'no locals in --remote');
+		is_deeply([grep { $r_only->{$_} } keys %$l_only], [],
+			'no remotes in --local');
+		my $all = +{ map { $_ => 1 } split(/^/m, $lei_out) };
+		is_deeply($all, { %$r_only, %$l_only },
+				'default output combines remote + local');
+		lei_ok qw(ls-external --remote --local);
+		my $both = +{ map { $_ => 1 } split(/^/m, $lei_out) };
+		is_deeply($all, $both, '--remote --local == no args');
+	}
+
+	lei_ok qw(_complete lei forget-external), \'complete for externals';
 	my %comp = map { $_ => 1 } split(/\s+/, $lei_out);
 	ok($comp{'https://example.com/ibx/'}, 'forget external completion');
 	$cfg->each_inbox(sub {

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/6] lei: note some TODO items (curl, externals)
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
                   ` (3 preceding siblings ...)
  2021-02-10  7:07 ` [PATCH 4/6] lei ls-external: support --local and --remote Eric Wong
@ 2021-02-10  7:07 ` Eric Wong
  2021-02-10  7:07 ` [PATCH 6/6] net_reader: new package split from -watch Eric Wong
  2021-02-10  8:38 ` [PATCH 7/6] lei_external: fix+test handling of escaped braces Eric Wong
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

I don't know if it's worth it to use libcurl directly
(nor the effort to support and maintain tests)
---
 lib/PublicInbox/LeiCurl.pm     | 2 ++
 lib/PublicInbox/LeiExternal.pm | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/lib/PublicInbox/LeiCurl.pm b/lib/PublicInbox/LeiCurl.pm
index f346a1b4..3a79fbf8 100644
--- a/lib/PublicInbox/LeiCurl.pm
+++ b/lib/PublicInbox/LeiCurl.pm
@@ -2,6 +2,8 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # common option and torsocks(1) wrapping for curl(1)
+# Eventually, we may support using libcurl via Inline::C and/or
+# WWW::Curl; but curl(1) is most prevalent and widely-installed.
 package PublicInbox::LeiCurl;
 use strict;
 use v5.10.1;
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b402eed4..8a51afcb 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -44,6 +44,8 @@ sub ext_canonicalize {
 	}
 }
 
+# TODO: we will probably extract glob2re into a separate module for
+# PublicInbox::Filter::Base and maybe other places
 my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
 		'[' => '[', ']' => ']', ',' => ',' );
 
@@ -99,6 +101,7 @@ sub get_externals {
 	();
 }
 
+# TODO: does this need JSON output?
 sub lei_ls_external {
 	my ($self, $filter) = @_;
 	my $opt = $self->{opt};

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/6] net_reader: new package split from -watch
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
                   ` (4 preceding siblings ...)
  2021-02-10  7:07 ` [PATCH 5/6] lei: note some TODO items (curl, externals) Eric Wong
@ 2021-02-10  7:07 ` Eric Wong
  2021-02-10  8:38 ` [PATCH 7/6] lei_external: fix+test handling of escaped braces Eric Wong
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  7:07 UTC (permalink / raw)
  To: meta

We'll be using some of this for IMAP and NNTP support in lei,
too.  More will need to be done to improve code sharing and
reusability, soon, but this is a start.
---
 MANIFEST                     |   1 +
 lib/PublicInbox/NetReader.pm | 220 +++++++++++++++++++++++++++++++++++
 lib/PublicInbox/Watch.pm     | 204 +-------------------------------
 3 files changed, 222 insertions(+), 203 deletions(-)
 create mode 100644 lib/PublicInbox/NetReader.pm

diff --git a/MANIFEST b/MANIFEST
index f8ee6998..92226d5a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -209,6 +209,7 @@ lib/PublicInbox/NDC_PP.pm
 lib/PublicInbox/NNTP.pm
 lib/PublicInbox/NNTPD.pm
 lib/PublicInbox/NNTPdeflate.pm
+lib/PublicInbox/NetReader.pm
 lib/PublicInbox/NewsWWW.pm
 lib/PublicInbox/OnDestroy.pm
 lib/PublicInbox/Over.pm
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
new file mode 100644
index 00000000..79047fd2
--- /dev/null
+++ b/lib/PublicInbox/NetReader.pm
@@ -0,0 +1,220 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common reader code for IMAP and NNTP (and maybe JMAP)
+package PublicInbox::NetReader;
+use strict;
+use v5.10.1;
+use parent qw(Exporter);
+
+# TODO: trim this down, this is huge
+our @EXPORT = qw(uri_new uri_scheme uri_section
+		mic_for nn_new nn_for
+		imap_url nntp_url);
+
+# avoid exposing deprecated "snews" to users.
+my %SCHEME_MAP = ('snews' => 'nntps');
+
+sub uri_scheme ($) {
+	my ($uri) = @_;
+	my $scheme = $uri->scheme;
+	$SCHEME_MAP{$scheme} // $scheme;
+}
+
+# returns the git config section name, e.g [imap "imaps://user@example.com"]
+# without the mailbox, so we can share connections between different inboxes
+sub uri_section ($) {
+	my ($uri) = @_;
+	uri_scheme($uri) . '://' . $uri->authority;
+}
+
+sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback
+
+sub mic_for { # mic = Mail::IMAPClient
+	my ($self, $url, $mic_args) = @_;
+	require PublicInbox::URIimap;
+	my $uri = PublicInbox::URIimap->new($url);
+	require PublicInbox::GitCredential;
+	my $cred = bless {
+		url => $url,
+		protocol => $uri->scheme,
+		host => $uri->host,
+		username => $uri->user,
+		password => $uri->password,
+	}, 'PublicInbox::GitCredential';
+	my $common = $mic_args->{uri_section($uri)} // {};
+	# IMAPClient and Net::Netrc both mishandles `0', so we pass `127.0.0.1'
+	my $host = $cred->{host};
+	$host = '127.0.0.1' if $host eq '0';
+	my $mic_arg = {
+		Port => $uri->port,
+		Server => $host,
+		Ssl => $uri->scheme eq 'imaps',
+		Keepalive => 1, # SO_KEEPALIVE
+		%$common, # may set Starttls, Compress, Debug ....
+	};
+	require PublicInbox::IMAPClient;
+	my $mic = PublicInbox::IMAPClient->new(%$mic_arg) or
+		die "E: <$url> new: $@\n";
+
+	# default to using STARTTLS if it's available, but allow
+	# it to be disabled since I usually connect to localhost
+	if (!$mic_arg->{Ssl} && !defined($mic_arg->{Starttls}) &&
+			$mic->has_capability('STARTTLS') &&
+			$mic->can('starttls')) {
+		$mic->starttls or die "E: <$url> STARTTLS: $@\n";
+	}
+
+	# do we even need credentials?
+	if (!defined($cred->{username}) &&
+			$mic->has_capability('AUTH=ANONYMOUS')) {
+		$cred = undef;
+	}
+	if ($cred) {
+		$cred->check_netrc unless defined $cred->{password};
+		$cred->fill; # may prompt user here
+		$mic->User($mic_arg->{User} = $cred->{username});
+		$mic->Password($mic_arg->{Password} = $cred->{password});
+	} else { # AUTH=ANONYMOUS
+		$mic->Authmechanism($mic_arg->{Authmechanism} = 'ANONYMOUS');
+		$mic->Authcallback($mic_arg->{Authcallback} = \&auth_anon_cb);
+	}
+	if ($mic->login && $mic->IsAuthenticated) {
+		# success! keep IMAPClient->new arg in case we get disconnected
+		$self->{mic_arg}->{uri_section($uri)} = $mic_arg;
+	} else {
+		warn "E: <$url> LOGIN: $@\n";
+		$mic = undef;
+	}
+	$cred->run($mic ? 'approve' : 'reject') if $cred;
+	$mic;
+}
+
+sub uri_new {
+	my ($url) = @_;
+	require URI;
+
+	# URI::snews exists, URI::nntps does not, so use URI::snews
+	$url =~ s!\Anntps://!snews://!i;
+	URI->new($url);
+}
+
+# Net::NNTP doesn't support CAPABILITIES, yet
+sub try_starttls ($) {
+	my ($host) = @_;
+	return if $host =~ /\.onion\z/s;
+	return if $host =~ /\A127\.[0-9]+\.[0-9]+\.[0-9]+\z/s;
+	return if $host eq '::1';
+	1;
+}
+
+sub nn_new ($$$) {
+	my ($nn_arg, $nntp_opt, $url) = @_;
+	my $nn = Net::NNTP->new(%$nn_arg) or die "E: <$url> new: $!\n";
+
+	# default to using STARTTLS if it's available, but allow
+	# it to be disabled for localhost/VPN users
+	if (!$nn_arg->{SSL} && $nn->can('starttls')) {
+		if (!defined($nntp_opt->{starttls}) &&
+				try_starttls($nn_arg->{Host})) {
+			# soft fail by default
+			$nn->starttls or warn <<"";
+W: <$url> STARTTLS tried and failed (not requested)
+
+		} elsif ($nntp_opt->{starttls}) {
+			# hard fail if explicitly configured
+			$nn->starttls or die <<"";
+E: <$url> STARTTLS requested and failed
+
+		}
+	} elsif ($nntp_opt->{starttls}) {
+		$nn->can('starttls') or
+			die "E: <$url> Net::NNTP too old for STARTTLS\n";
+		$nn->starttls or die <<"";
+E: <$url> STARTTLS requested and failed
+
+	}
+	$nn;
+}
+
+sub nn_for ($$$) { # nn = Net::NNTP
+	my ($self, $url, $nn_args) = @_;
+	my $uri = uri_new($url);
+	my $sec = uri_section($uri);
+	my $nntp_opt = $self->{nntp_opt}->{$sec} //= {};
+	my $host = $uri->host;
+	# Net::NNTP and Net::Netrc both mishandle `0', so we pass `127.0.0.1'
+	$host = '127.0.0.1' if $host eq '0';
+	my $cred;
+	my ($u, $p);
+	if (defined(my $ui = $uri->userinfo)) {
+		require PublicInbox::GitCredential;
+		$cred = bless {
+			url => $sec,
+			protocol => uri_scheme($uri),
+			host => $host,
+		}, 'PublicInbox::GitCredential';
+		($u, $p) = split(/:/, $ui, 2);
+		($cred->{username}, $cred->{password}) = ($u, $p);
+		$cred->check_netrc unless defined $p;
+	}
+	my $common = $nn_args->{$sec} // {};
+	my $nn_arg = {
+		Port => $uri->port,
+		Host => $host,
+		SSL => $uri->secure, # snews == nntps
+		%$common, # may Debug ....
+	};
+	my $nn = nn_new($nn_arg, $nntp_opt, $url);
+
+	if ($cred) {
+		$cred->fill; # may prompt user here
+		if ($nn->authinfo($u, $p)) {
+			push @{$nntp_opt->{-postconn}}, [ 'authinfo', $u, $p ];
+		} else {
+			warn "E: <$url> AUTHINFO $u XXXX failed\n";
+			$nn = undef;
+		}
+	}
+
+	if ($nntp_opt->{compress}) {
+		# https://rt.cpan.org/Ticket/Display.html?id=129967
+		if ($nn->can('compress')) {
+			if ($nn->compress) {
+				push @{$nntp_opt->{-postconn}}, [ 'compress' ];
+			} else {
+				warn "W: <$url> COMPRESS failed\n";
+			}
+		} else {
+			delete $nntp_opt->{compress};
+			warn <<"";
+W: <$url> COMPRESS not supported by Net::NNTP
+W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates
+
+		}
+	}
+
+	$self->{nn_arg}->{$sec} = $nn_arg;
+	$cred->run($nn ? 'approve' : 'reject') if $cred;
+	$nn;
+}
+
+sub imap_url {
+	my ($url) = @_;
+	require PublicInbox::URIimap;
+	my $uri = PublicInbox::URIimap->new($url);
+	$uri ? $uri->canonical->as_string : undef;
+}
+
+my %IS_NNTP = (news => 1, snews => 1, nntp => 1);
+sub nntp_url {
+	my ($url) = @_;
+	my $uri = uri_new($url);
+	return unless $uri && $IS_NNTP{$uri->scheme} && $uri->group;
+	$url = $uri->canonical->as_string;
+	# nntps is IANA registered, snews is deprecated
+	$url =~ s!\Asnews://!nntps://!;
+	$url;
+}
+
+1;
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index a4302162..8a457b81 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -9,6 +9,7 @@ use v5.10.1;
 use PublicInbox::Eml;
 use PublicInbox::InboxWritable qw(eml_from_path);
 use PublicInbox::MdirReader;
+use PublicInbox::NetReader;
 use PublicInbox::Filter::Base qw(REJECT);
 use PublicInbox::Spamcheck;
 use PublicInbox::Sigfd;
@@ -279,22 +280,6 @@ sub watch_fs_init ($) {
 	PublicInbox::DirIdle->new([keys %{$self->{mdmap}}], $cb);
 }
 
-# avoid exposing deprecated "snews" to users.
-my %SCHEME_MAP = ('snews' => 'nntps');
-
-sub uri_scheme ($) {
-	my ($uri) = @_;
-	my $scheme = $uri->scheme;
-	$SCHEME_MAP{$scheme} // $scheme;
-}
-
-# returns the git config section name, e.g [imap "imaps://user@example.com"]
-# without the mailbox, so we can share connections between different inboxes
-sub uri_section ($) {
-	my ($uri) = @_;
-	uri_scheme($uri) . '://' . $uri->authority;
-}
-
 sub cfg_intvl ($$$) {
 	my ($cfg, $key, $url) = @_;
 	my $v = $cfg->urlmatch($key, $url) // return;
@@ -344,66 +329,6 @@ sub imap_common_init ($) {
 	$mic_args;
 }
 
-sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback
-
-sub mic_for ($$$) { # mic = Mail::IMAPClient
-	my ($self, $url, $mic_args) = @_;
-	my $uri = PublicInbox::URIimap->new($url);
-	require PublicInbox::GitCredential;
-	my $cred = bless {
-		url => $url,
-		protocol => $uri->scheme,
-		host => $uri->host,
-		username => $uri->user,
-		password => $uri->password,
-	}, 'PublicInbox::GitCredential';
-	my $common = $mic_args->{uri_section($uri)} // {};
-	# IMAPClient and Net::Netrc both mishandles `0', so we pass `127.0.0.1'
-	my $host = $cred->{host};
-	$host = '127.0.0.1' if $host eq '0';
-	my $mic_arg = {
-		Port => $uri->port,
-		Server => $host,
-		Ssl => $uri->scheme eq 'imaps',
-		Keepalive => 1, # SO_KEEPALIVE
-		%$common, # may set Starttls, Compress, Debug ....
-	};
-	my $mic = PublicInbox::IMAPClient->new(%$mic_arg) or
-		die "E: <$url> new: $@\n";
-
-	# default to using STARTTLS if it's available, but allow
-	# it to be disabled since I usually connect to localhost
-	if (!$mic_arg->{Ssl} && !defined($mic_arg->{Starttls}) &&
-			$mic->has_capability('STARTTLS') &&
-			$mic->can('starttls')) {
-		$mic->starttls or die "E: <$url> STARTTLS: $@\n";
-	}
-
-	# do we even need credentials?
-	if (!defined($cred->{username}) &&
-			$mic->has_capability('AUTH=ANONYMOUS')) {
-		$cred = undef;
-	}
-	if ($cred) {
-		$cred->check_netrc unless defined $cred->{password};
-		$cred->fill; # may prompt user here
-		$mic->User($mic_arg->{User} = $cred->{username});
-		$mic->Password($mic_arg->{Password} = $cred->{password});
-	} else { # AUTH=ANONYMOUS
-		$mic->Authmechanism($mic_arg->{Authmechanism} = 'ANONYMOUS');
-		$mic->Authcallback($mic_arg->{Authcallback} = \&auth_anon_cb);
-	}
-	if ($mic->login && $mic->IsAuthenticated) {
-		# success! keep IMAPClient->new arg in case we get disconnected
-		$self->{mic_arg}->{uri_section($uri)} = $mic_arg;
-	} else {
-		warn "E: <$url> LOGIN: $@\n";
-		$mic = undef;
-	}
-	$cred->run($mic ? 'approve' : 'reject') if $cred;
-	$mic;
-}
-
 sub imap_import_msg ($$$$$) {
 	my ($self, $url, $uid, $raw, $flags) = @_;
 	# our target audience expects LF-only, save storage
@@ -805,106 +730,6 @@ sub nntp_common_init ($) {
 	$nn_args;
 }
 
-# Net::NNTP doesn't support CAPABILITIES, yet
-sub try_starttls ($) {
-	my ($host) = @_;
-	return if $host =~ /\.onion\z/s;
-	return if $host =~ /\A127\.[0-9]+\.[0-9]+\.[0-9]+\z/s;
-	return if $host eq '::1';
-	1;
-}
-
-sub nn_new ($$$) {
-	my ($nn_arg, $nntp_opt, $url) = @_;
-	my $nn = Net::NNTP->new(%$nn_arg) or die "E: <$url> new: $!\n";
-
-	# default to using STARTTLS if it's available, but allow
-	# it to be disabled for localhost/VPN users
-	if (!$nn_arg->{SSL} && $nn->can('starttls')) {
-		if (!defined($nntp_opt->{starttls}) &&
-				try_starttls($nn_arg->{Host})) {
-			# soft fail by default
-			$nn->starttls or warn <<"";
-W: <$url> STARTTLS tried and failed (not requested)
-
-		} elsif ($nntp_opt->{starttls}) {
-			# hard fail if explicitly configured
-			$nn->starttls or die <<"";
-E: <$url> STARTTLS requested and failed
-
-		}
-	} elsif ($nntp_opt->{starttls}) {
-		$nn->can('starttls') or
-			die "E: <$url> Net::NNTP too old for STARTTLS\n";
-		$nn->starttls or die <<"";
-E: <$url> STARTTLS requested and failed
-
-	}
-	$nn;
-}
-
-sub nn_for ($$$) { # nn = Net::NNTP
-	my ($self, $url, $nn_args) = @_;
-	my $uri = uri_new($url);
-	my $sec = uri_section($uri);
-	my $nntp_opt = $self->{nntp_opt}->{$sec} //= {};
-	my $host = $uri->host;
-	# Net::NNTP and Net::Netrc both mishandle `0', so we pass `127.0.0.1'
-	$host = '127.0.0.1' if $host eq '0';
-	my $cred;
-	my ($u, $p);
-	if (defined(my $ui = $uri->userinfo)) {
-		require PublicInbox::GitCredential;
-		$cred = bless {
-			url => $sec,
-			protocol => uri_scheme($uri),
-			host => $host,
-		}, 'PublicInbox::GitCredential';
-		($u, $p) = split(/:/, $ui, 2);
-		($cred->{username}, $cred->{password}) = ($u, $p);
-		$cred->check_netrc unless defined $p;
-	}
-	my $common = $nn_args->{$sec} // {};
-	my $nn_arg = {
-		Port => $uri->port,
-		Host => $host,
-		SSL => $uri->secure, # snews == nntps
-		%$common, # may Debug ....
-	};
-	my $nn = nn_new($nn_arg, $nntp_opt, $url);
-
-	if ($cred) {
-		$cred->fill; # may prompt user here
-		if ($nn->authinfo($u, $p)) {
-			push @{$nntp_opt->{-postconn}}, [ 'authinfo', $u, $p ];
-		} else {
-			warn "E: <$url> AUTHINFO $u XXXX failed\n";
-			$nn = undef;
-		}
-	}
-
-	if ($nntp_opt->{compress}) {
-		# https://rt.cpan.org/Ticket/Display.html?id=129967
-		if ($nn->can('compress')) {
-			if ($nn->compress) {
-				push @{$nntp_opt->{-postconn}}, [ 'compress' ];
-			} else {
-				warn "W: <$url> COMPRESS failed\n";
-			}
-		} else {
-			delete $nntp_opt->{compress};
-			warn <<"";
-W: <$url> COMPRESS not supported by Net::NNTP
-W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates
-
-		}
-	}
-
-	$self->{nn_arg}->{$sec} = $nn_arg;
-	$cred->run($nn ? 'approve' : 'reject') if $cred;
-	$nn;
-}
-
 sub nntp_fetch_all ($$$) {
 	my ($self, $nn, $url) = @_;
 	my $uri = uri_new($url);
@@ -1132,31 +957,4 @@ EOF
 	undef;
 }
 
-sub uri_new {
-	my ($url) = @_;
-
-	# URI::snews exists, URI::nntps does not, so use URI::snews
-	$url =~ s!\Anntps://!snews://!i;
-	URI->new($url);
-}
-
-sub imap_url {
-	my ($url) = @_;
-	require PublicInbox::URIimap;
-	my $uri = PublicInbox::URIimap->new($url);
-	$uri ? $uri->canonical->as_string : undef;
-}
-
-my %IS_NNTP = (news => 1, snews => 1, nntp => 1);
-sub nntp_url {
-	my ($url) = @_;
-	require URI;
-	my $uri = uri_new($url);
-	return unless $uri && $IS_NNTP{$uri->scheme} && $uri->group;
-	$url = $uri->canonical->as_string;
-	# nntps is IANA registered, snews is deprecated
-	$url =~ s!\Asnews://!nntps://!;
-	$url;
-}
-
 1;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/6] lei_external: fix+test handling of escaped braces
  2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
                   ` (5 preceding siblings ...)
  2021-02-10  7:07 ` [PATCH 6/6] net_reader: new package split from -watch Eric Wong
@ 2021-02-10  8:38 ` Eric Wong
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2021-02-10  8:38 UTC (permalink / raw)
  To: meta

While '{' and '}' are rare in path names, somebody may still
use them or deal with software which does (e.g. GNU arch).
---
 lib/PublicInbox/LeiExternal.pm | 9 +++++----
 t/lei_external.t               | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index 8a51afcb..6cc2e671 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -58,16 +58,17 @@ sub glob2re {
 		$re_map{$p eq '\\' ? '' : do {
 			if ($1 eq '[') { ++$in_bracket }
 			elsif ($1 eq ']') { --$in_bracket }
+			elsif ($1 eq ',') { ++$qm } # no change
 			$p = $1;
 		}} // do {
 			$p = $1;
 			($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
 		}!sge);
 	# bashism (also supported by curl): {a,b,c} => (a|b|c)
-	$re =~ s/([^\\]*)\\\{([^,]*?,[^\\]*?)\\\}/
-		(my $in_braces = $2) =~ tr!,!|!;
-		$1."($in_braces)";
-		/sge;
+	$changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+			(my $in_braces = $2) =~ tr!,!|!;
+			$1."($in_braces)";
+			/sge);
 	($changes - $qm) ? $re : undef;
 }
 
diff --git a/t/lei_external.t b/t/lei_external.t
index 0ef6633d..78f71658 100644
--- a/t/lei_external.t
+++ b/t/lei_external.t
@@ -28,5 +28,7 @@ is_deeply($glob2re->('{a'), undef, 'open left brace');
 is_deeply($glob2re->('a}'), undef, 'open right brace');
 is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
 is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
+is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
+is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
 
 done_testing;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-02-10  8:38 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-10  7:07 [PATCH 0/6] more lei stuffs Eric Wong
2021-02-10  7:07 ` [PATCH 1/6] lei *external: glob improvements, ls-external filtering Eric Wong
2021-02-10  7:07 ` [PATCH 2/6] lei_external: remove unnecessary Exporter use Eric Wong
2021-02-10  7:07 ` [PATCH 3/6] test_common: support lei-daemon only testing Eric Wong
2021-02-10  7:07 ` [PATCH 4/6] lei ls-external: support --local and --remote Eric Wong
2021-02-10  7:07 ` [PATCH 5/6] lei: note some TODO items (curl, externals) Eric Wong
2021-02-10  7:07 ` [PATCH 6/6] net_reader: new package split from -watch Eric Wong
2021-02-10  8:38 ` [PATCH 7/6] lei_external: fix+test handling of escaped braces Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).