From 2755c6f839f0a0552cd134160e1691380511a61a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 13 Dec 2020 22:38:48 +0000 Subject: lei: FD-passing and IPC basics The start of lei, a Local Email Interface. It'll support a daemon via FD passing to avoid startup time penalties if IO::FDPass is installed, but fall back to a slow one-shot mode if not. Compared to traditional socket daemon, FD passing should allow us to eventually do stuff like run "git show" and still have proper terminal support for pager and color. --- lib/PublicInbox/Daemon.pm | 6 +- lib/PublicInbox/LeiDaemon.pm | 303 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/LeiDaemon.pm (limited to 'lib') diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index a2171535..6b92b60d 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -1,7 +1,9 @@ # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ -# contains common daemon code for the httpd, imapd, and nntpd servers. -# This may be used for read-only IMAP server if we decide to implement it. +# +# Contains common daemon code for the httpd, imapd, and nntpd servers +# and designed for handling thousands of untrusted clients over slow +# and/or lossy connections. package PublicInbox::Daemon; use strict; use warnings; diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm new file mode 100644 index 00000000..ae40b3a6 --- /dev/null +++ b/lib/PublicInbox/LeiDaemon.pm @@ -0,0 +1,303 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# Backend for `lei' (local email interface). Unlike the C10K-oriented +# PublicInbox::Daemon, this is designed exclusively to handle trusted +# local clients with read/write access to the FS and use as many +# system resources as the local user has access to. +package PublicInbox::LeiDaemon; +use strict; +use v5.10.1; +use parent qw(PublicInbox::DS); +use Getopt::Long (); +use Errno qw(EAGAIN ECONNREFUSED ENOENT); +use POSIX qw(setsid); +use IO::Socket::UNIX; +use IO::Handle (); +use Sys::Syslog qw(syslog openlog); +use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT); +use PublicInbox::Sigfd; +use PublicInbox::DS qw(now); +use PublicInbox::Spawn qw(spawn); +our $quit = sub { exit(shift // 0) }; +my $glp = Getopt::Long::Parser->new; +$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); + +sub x_it ($$) { # pronounced "exit" + my ($client, $code) = @_; + if (my $sig = ($code & 127)) { + kill($sig, $client->{pid} // $$); + } else { + $code >>= 8; + if (my $sock = $client->{sock}) { + say $sock "exit=$code"; + } else { # for oneshot + $quit->($code); + } + } +} + +sub emit ($$$) { + my ($client, $channel, $buf) = @_; + print { $client->{$channel} } $buf or warn "print FD[$channel]: $!"; +} + +sub fail ($$;$) { + my ($client, $buf, $exit_code) = @_; + $buf .= "\n" unless $buf =~ /\n\z/s; + emit($client, 2, $buf); + x_it($client, ($exit_code // 1) << 8); + undef; +} + +sub _help ($;$) { + my ($client, $channel) = @_; + emit($client, $channel //= 1, < failure +} + +sub assert_args ($$$;$@) { + my ($client, $argv, $proto, $opt, @spec) = @_; + $opt //= {}; + push @spec, qw(help|h); + $glp->getoptionsfromarray($argv, $opt, @spec) or + return fail($client, 'bad arguments or options'); + if ($opt->{help}) { + _help($client); + undef; + } else { + my ($nreq, $rest) = split(/;/, $proto); + $nreq = (($nreq // '') =~ tr/$/$/); + my $argc = scalar(@$argv); + my $tot = ($rest // '') eq '@' ? $argc : ($proto =~ tr/$/$/); + return 1 if $argc <= $tot && $argc >= $nreq; + _help($client, 2); + undef + } +} + +sub dispatch { + my ($client, $cmd, @argv) = @_; + local $SIG{__WARN__} = sub { emit($client, 2, "@_") }; + local $SIG{__DIE__} = 'DEFAULT'; + if (defined $cmd) { + my $func = "lei_$cmd"; + $func =~ tr/-/_/; + if (my $cb = __PACKAGE__->can($func)) { + $client->{cmd} = $cmd; + $cb->($client, \@argv); + } elsif (grep(/\A-/, $cmd, @argv)) { + assert_args($client, [ $cmd, @argv ], ''); + } else { + fail($client, "`$cmd' is not an lei command"); + } + } else { + _help($client, 2); + } +} + +sub lei_daemon_pid { + my ($client, $argv) = @_; + assert_args($client, $argv, '') and emit($client, 1, "$$\n"); +} + +sub lei_DBG_pwd { + my ($client, $argv) = @_; + assert_args($client, $argv, '') and + emit($client, 1, "$client->{env}->{PWD}\n"); +} + +sub lei_DBG_cwd { + my ($client, $argv) = @_; + require Cwd; + assert_args($client, $argv, '') and emit($client, 1, Cwd::cwd()."\n"); +} + +sub lei_DBG_false { x_it($_[0], 1 << 8) } + +sub lei_daemon_stop { + my ($client, $argv) = @_; + assert_args($client, $argv, '') and $quit->(0); +} + +sub lei_help { _help($_[0]) } + +sub reap_exec { # dwaitpid callback + my ($client, $pid) = @_; + x_it($client, $?); +} + +sub lei_git { # support passing through random git commands + my ($client, $argv) = @_; + my %opt = map { $_ => $client->{$_} } (0..2); + my $pid = spawn(['git', @$argv], $client->{env}, \%opt); + PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client); +} + +sub accept_dispatch { # Listener {post_accept} callback + my ($sock) = @_; # ignore other + $sock->blocking(1); + $sock->autoflush(1); + my $client = { sock => $sock }; + vec(my $rin = '', fileno($sock), 1) = 1; + # `say $sock' triggers "die" in lei(1) + for my $i (0..2) { + if (select(my $rout = $rin, undef, undef, 1)) { + my $fd = IO::FDPass::recv(fileno($sock)); + if ($fd >= 0) { + my $rdr = ($fd == 0 ? '<&=' : '>&='); + if (open(my $fh, $rdr, $fd)) { + $client->{$i} = $fh; + } else { + say $sock "open($rdr$fd) (FD=$i): $!"; + return; + } + } else { + say $sock "recv FD=$i: $!"; + return; + } + } else { + say $sock "timed out waiting to recv FD=$i"; + return; + } + } + # $ARGV_STR = join("]\0[", @ARGV); + # $ENV_STR = join('', map { "$_=$ENV{$_}\0" } keys %ENV); + # $line = "$$\0\0>$ARGV_STR\0\0>$ENV_STR\0\0"; + my ($client_pid, $argv, $env) = do { + local $/ = "\0\0\0"; # yes, 3 NULs at EOL, not 2 + chomp(my $line = <$sock>); + split(/\0\0>/, $line, 3); + }; + my %env = map { split(/=/, $_, 2) } split(/\0/, $env); + if (chdir($env{PWD})) { + $client->{env} = \%env; + $client->{pid} = $client_pid; + eval { dispatch($client, split(/\]\0\[/, $argv)) }; + say $sock $@ if $@; + } else { + say $sock "chdir($env{PWD}): $!"; # implicit close + } +} + +sub noop {} + +# lei(1) calls this when it can't connect +sub lazy_start ($$) { + my ($path, $err) = @_; + if ($err == ECONNREFUSED) { + unlink($path) or die "unlink($path): $!"; + } elsif ($err != ENOENT) { + die "connect($path): $!"; + } + my $umask = umask(077) // die("umask(077): $!"); + my $l = IO::Socket::UNIX->new(Local => $path, + Listen => 1024, + Type => SOCK_STREAM) or + $err = $!; + umask($umask) or die("umask(restore): $!"); + $l or return $err; + my @st = stat($path) or die "stat($path): $!"; + my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino + pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; + my $oldset = PublicInbox::Sigfd::block_signals(); + my $pid = fork // die "fork: $!"; + if ($pid) { + PublicInbox::Sigfd::sig_setmask($oldset); + return; # client will connect to $path + } + openlog($path, 'pid', 'user'); + local $SIG{__DIE__} = sub { + syslog('crit', "@_"); + exit $! if $!; + exit $? >> 8 if $? >> 8; + exit 255; + }; + local $SIG{__WARN__} = sub { syslog('warning', "@_") }; + open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; + open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; + open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; + setsid(); + $pid = fork // die "fork: $!"; + exit if $pid; + $0 = "lei-daemon $path"; + require PublicInbox::Listener; + require PublicInbox::EOFpipe; + $l->blocking(0); + $eof_w->blocking(0); + $eof_r->blocking(0); + my $listener = PublicInbox::Listener->new($l, \&accept_dispatch, $l); + my $exit_code; + local $quit = sub { + $exit_code //= shift; + my $tmp = $listener or exit($exit_code); + unlink($path) if defined($path); + syswrite($eof_w, '.'); + $l = $listener = $path = undef; + $tmp->close if $tmp; # DS::close + PublicInbox::DS->SetLoopTimeout(1000); + }; + PublicInbox::EOFpipe->new($eof_r, sub {}, undef); + my $sig = { + CHLD => \&PublicInbox::DS::enqueue_reap, + QUIT => $quit, + INT => $quit, + TERM => $quit, + HUP => \&noop, + USR1 => \&noop, + USR2 => \&noop, + }; + my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); + local %SIG = (%SIG, %$sig) if !$sigfd; + if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets + PublicInbox::DS->SetLoopTimeout(5000); + } else { + # wake up every second to accept signals if we don't + # have signalfd or IO::KQueue: + PublicInbox::Sigfd::sig_setmask($oldset); + PublicInbox::DS->SetLoopTimeout(1000); + } + PublicInbox::DS->SetPostLoopCallback(sub { + my ($dmap, undef) = @_; + if (@st = defined($path) ? stat($path) : ()) { + if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) { + warn "$path dev/ino changed, quitting\n"; + $path = undef; + } + } elsif (defined($path)) { + warn "stat($path): $!, quitting ...\n"; + undef $path; # don't unlink + $quit->(); + } + return 1 if defined($path); + my $now = now(); + my $n = 0; + for my $s (values %$dmap) { + $s->can('busy') or next; + if ($s->busy($now)) { + ++$n; + } else { + $s->close; + } + } + $n; # true: continue, false: stop + }); + PublicInbox::DS->EventLoop; + exit($exit_code // 0); +} + +# for users w/o IO::FDPass +sub oneshot { + dispatch({ + 0 => *STDIN{IO}, + 1 => *STDOUT{IO}, + 2 => *STDERR{IO}, + env => \%ENV + }, @ARGV); +} + +1; -- cgit v1.2.3-24-ge0c7 From 478a8d308d952af5ae957136c2ab09455f2a767c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Dec 2020 03:14:16 +0000 Subject: lei: proposed command-listing and options In an attempt to ensure a coherent UI/UX, we'll try to document all proposed commands and options in one place for easy reference --- lib/PublicInbox/LeiDaemon.pm | 137 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) (limited to 'lib') diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm index ae40b3a6..d0c53416 100644 --- a/lib/PublicInbox/LeiDaemon.pm +++ b/lib/PublicInbox/LeiDaemon.pm @@ -23,6 +23,143 @@ our $quit = sub { exit(shift // 0) }; my $glp = Getopt::Long::Parser->new; $glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); +# TBD: this is a documentation mechanism to show a subcommand +# (may) pass options through to another command: +sub pass_through { () } + +# TODO: generate shell completion + help using %CMD and %OPTDESC +# command => [ positional_args, 1-line description, Getopt::Long option spec ] +our %CMD = ( # sorted in order of importance/use: +'query' => [ 'SEARCH-TERMS...', 'search for messages matching terms', qw( + save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a + limit|n=i sort|s=s reverse|r offset=i remote local! extinbox! + since|after=s until|before=s) ], + +'show' => [ '{MID|OID}', 'show a given object (Message-ID or object ID)', + qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), + pass_through('git show') ], + +'add-extinbox' => [ 'URL-OR-PATHNAME', + 'add/set priority of a publicinbox|extindex for extra matches', + qw(prio=i) ], +'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex sources', + qw(format|f=s z local remote) ], +'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}', + 'exclude further results from a publicinbox|extindex', + qw(prune) ], + +'ls-query' => [ '[FILTER]', 'list saved search queries', + qw(name-only format|f=s z) ], +'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ], +'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ], + +'plonk' => [ '{--thread|--from=IDENT}', + 'exclude mail matching From: or thread from non-Message-ID searches', + qw(thread|t from|f=s mid=s oid=s) ], +'mark' => [ 'MESSAGE-FLAGS', 'set/unset flags on message(s) from stdin', + qw(stdin| oid=s exact by-mid|mid:s) ], +'forget' => [ '--stdin', 'exclude message(s) on stdin from query results', + qw(stdin| oid=s exact by-mid|mid:s) ], + +'purge-mailsource' => [ '{URL-OR-PATHNAME|--all}', + 'remove imported messages from IMAP, Maildirs, and MH', + qw(exact! all jobs:i indexed) ], + +# code repos are used for `show' to solve blobs from patch mails +'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', + qw(prio=i) ], +'ls-coderepo' => [ '[FILTER]', 'list known code repos', qw(format|f=s z) ], +'forget-coderepo' => [ 'PATHNAME', + 'stop using repo to solve blobs from patches', + qw(prune) ], + +'add-watch' => [ '[URL_OR_PATHNAME]', + 'watch for new messages and flag changes', + qw(import! flags! interval=s recursive|r exclude=s include=s) ], +'ls-watch' => [ '[FILTER]', 'list active watches with numbers and status', + qw(format|f=s z) ], +'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], +'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], +'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', + qw(prune) ], + +'import' => [ '{URL_OR_PATHNAME|--stdin}', + 'one-shot import/update from URL or filesystem', + qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags), + ], + +'config' => [ '[ANYTHING...]', + 'git-config(1) wrapper for ~/.config/lei/config', + pass_through('git config') ], +'daemon-stop' => [ undef, 'stop the lei-daemon' ], +'daemon-pid' => [ undef, 'show the PID of the lei-daemon' ], +'help' => [ '[SUBCOMMAND]', 'show help' ], + +# XXX do we need this? +# 'git' => [ '[ANYTHING...]', 'git(1) wrapper', pass_through('git') ], + +'reorder-local-store-and-break-history' => [ '[REFNAME]', + 'rewrite git history in an attempt to improve compression', + 'gc!' ] +); # @CMD + +# switch descriptions, try to keep consistent across commands +# $spec: Getopt::Long option specification +# $spec => [@ALLOWED_VALUES (default is first), $description], +# $spec => $description +# "$SUB_COMMAND TAB $spec" => as above +my $stdin_formats = [ qw(auto raw mboxrd mboxcl2 mboxcl mboxo), + 'specify message input format' ]; +my $ls_format = [ qw(plain json null), 'listing output format' ]; +my $show_format = [ qw(plain raw html mboxrd mboxcl2 mboxcl), + 'message/object output format' ]; + +my %OPTDESC = ( +'solve!' => 'do not attempt to reconstruct blobs from emails', +'save-as=s' => 'save a search terms by given name', + +'type=s' => [qw(any mid git), 'disambiguate type' ], + +'dedupe|d=s' => [qw(content oid mid), 'deduplication strategy'], +'thread|t' => 'every message in the same thread as the actual match(es)', +'augment|a' => 'augment --output destination instead of clobbering', + +'output|o=s' => "destination (e.g. `/path/to/Maildir', or `-' for stdout)", + +'mark format|f=s' => $stdin_formats, +'forget format|f=s' => $stdin_formats, +'query format|f=s' => [qw(maildir mboxrd mboxcl2 mboxcl html oid), + q[specify output format (default: determined by --output)]], +'ls-query format|f=s' => $ls_format, +'ls-extinbox format|f=s' => $ls_format, + +'limit|n=i' => 'integer limit on number of matches (default: 10000)', +'offset=i' => 'search result offset (default: 0)', + +'sort|s=s@' => [qw(internaldate date relevance docid), + "order of results `--output'-dependent)"], + +'prio=i' => 'priority of query source', + +'local' => 'limit operations to the local filesystem', +'local!' => 'exclude results from the local filesystem', +'remote' => 'limit operations to those requiring network access', +'remote!' => 'prevent operations requiring network access', + +'mid=s' => 'specify the Message-ID of a message', +'oid=s' => 'specify the git object ID of a message', + +'recursive|r' => 'scan directories/mailboxes/newsgroups recursively', +'exclude=s' => 'exclude mailboxes/newsgroups based on pattern', +'include=s' => 'include mailboxes/newsgroups based on pattern', + +'exact' => 'operate on exact header matches only', +'exact!' => 'rely on content match instead of exact header matches', + +'by-mid|mid:s' => 'match only by Message-ID, ignoring contents', +'jobs:i' => 'set parallelism level', +); # %OPTDESC + sub x_it ($$) { # pronounced "exit" my ($client, $code) = @_; if (my $sig = ($code & 127)) { -- cgit v1.2.3-24-ge0c7 From 6cdb84af2c75b3c66a35c8c4973f455da15dd0a4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 14 Dec 2020 11:42:40 +0000 Subject: lei_store: local storage for Local Email Interface Still unstable, this builds off the equally unstable extindex :P This will be used for caching/memoization of traditional mail stores (IMAP, Maildir, etc) while providing indexing via Xapian, along with compression, and checksumming from git. Most notably, this adds the ability to add/remove per-message keywords (draft, seen, flagged, answered) as described in the JMAP specification (RFC 8621 section 4.1.1). We'll use `.' (a single period) as an $eidx_key since it's an invalid {inboxdir} or {newsgroup} name. --- lib/PublicInbox/ExtSearch.pm | 4 +- lib/PublicInbox/ExtSearchIdx.pm | 35 ++++++- lib/PublicInbox/Import.pm | 4 + lib/PublicInbox/LeiDaemon.pm | 2 +- lib/PublicInbox/LeiSearch.pm | 40 ++++++++ lib/PublicInbox/LeiStore.pm | 197 ++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/OverIdx.pm | 10 ++ lib/PublicInbox/SearchIdx.pm | 47 ++++++++- lib/PublicInbox/SearchIdxShard.pm | 33 +++++++ lib/PublicInbox/V2Writable.pm | 2 +- 10 files changed, 364 insertions(+), 10 deletions(-) create mode 100644 lib/PublicInbox/LeiSearch.pm create mode 100644 lib/PublicInbox/LeiStore.pm (limited to 'lib') diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index 2a560935..410ae958 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -17,13 +17,13 @@ use DBI qw(:sql_types); # SQL_BLOB use parent qw(PublicInbox::Search); sub new { - my (undef, $topdir) = @_; + my ($class, $topdir) = @_; $topdir = File::Spec->canonpath($topdir); bless { topdir => $topdir, # xpfx => 'ei15' xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION - }, __PACKAGE__; + }, $class; } sub misc { diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index b82d0546..56896056 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -896,18 +896,31 @@ sub idx_init { # similar to V2Writable return if $self->{idx_shards}; $self->git->cleanup; - + my $mode = 0644; my $ALL = $self->git->{git_dir}; # ALL.git - PublicInbox::Import::init_bare($ALL) unless -d $ALL; + my $old = -d $ALL; + if ($opt->{-private}) { # LeiStore + $mode = 0600; + if (!$old) { + umask 077; # don't bother restoring + PublicInbox::Import::init_bare($ALL); + $self->git->qx(qw(config core.sharedRepository 0600)); + } + } else { + PublicInbox::Import::init_bare($ALL) unless $old; + } my $info_dir = "$ALL/objects/info"; my $alt = "$info_dir/alternates"; - my $mode = 0644; my (@old, @new, %seen); # seen: st_dev + st_ino if (-e $alt) { open(my $fh, '<', $alt) or die "open $alt: $!"; $mode = (stat($fh))[2] & 07777; while (my $line = <$fh>) { chomp(my $d = $line); + + # expand relative path (/local/ stuff) + substr($d, 0, 3) eq '../' and + $d = "$ALL/objects/$d"; if (my @st = stat($d)) { next if $seen{"$st[0]\0$st[1]"}++; } else { @@ -917,6 +930,22 @@ sub idx_init { # similar to V2Writable push @old, $line; } } + + # for LeiStore, and possibly some mirror-only state + if (opendir(my $dh, my $local = "$self->{topdir}/local")) { + # highest numbered epoch first + for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 } + grep(/\A[0-9]+\.git\z/, readdir($dh))) { + my $d = "$local/$n.git/objects"; # absolute path + if (my @st = stat($d)) { + next if $seen{"$st[0]\0$st[1]"}++; + # favor relative paths for rename-friendliness + push @new, "../../local/$n.git/objects\n"; + } else { + warn "W: stat($d) failed: $!\n"; + } + } + } for my $ibx (@{$self->{ibx_list}}) { my $line = $ibx->git->{git_dir} . "/objects\n"; chomp(my $d = $line); diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 2cb4896a..b7be4c46 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -405,6 +405,10 @@ sub add { if ($smsg) { $smsg->{blob} = $self->get_mark(":$blob"); $smsg->{raw_bytes} = $n; + if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore + return if $oidx->blob_exists($smsg->{blob}); + } + # XXX do we need this? it's in git at this point $smsg->{-raw_email} = \$raw_email; } my $ref = $self->{ref}; diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm index d0c53416..b4b1ac59 100644 --- a/lib/PublicInbox/LeiDaemon.pm +++ b/lib/PublicInbox/LeiDaemon.pm @@ -42,7 +42,7 @@ our %CMD = ( # sorted in order of importance/use: 'add-extinbox' => [ 'URL-OR-PATHNAME', 'add/set priority of a publicinbox|extindex for extra matches', qw(prio=i) ], -'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex sources', +'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex locations', qw(format|f=s z local remote) ], 'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}', 'exclude further results from a publicinbox|extindex', diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm new file mode 100644 index 00000000..9cfd6ea2 --- /dev/null +++ b/lib/PublicInbox/LeiSearch.pm @@ -0,0 +1,40 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +package PublicInbox::LeiSearch; +use strict; +use v5.10.1; +use parent qw(PublicInbox::ExtSearch); +use PublicInbox::Search; + +sub combined_docid ($$) { + my ($self, $num) = @_; + my $nshard = ($self->{nshard} // 1); + ($num - 1) * $nshard + 1; +} + +sub msg_keywords { + my ($self, $num) = @_; # num_or_mitem + my $xdb = $self->xdb; # set {nshard}; + my $docid = ref($num) ? $num->get_docid : do { + # get combined docid from over.num: + # (not generic Xapian, only works with our sharding scheme) + my $nshard = $self->{nshard} // 1; + ($num - 1) * $nshard + $num % $nshard + 1; + }; + my %kw; + eval { + my $end = $xdb->termlist_end($docid); + my $cur = $xdb->termlist_begin($docid); + for (; $cur != $end; $cur++) { + $cur->skip_to('K'); + last if $cur == $end; + my $kw = $cur->get_termname; + $kw =~ s/\AK//s and $kw{$kw} = undef; + } + }; + warn "E: #$docid ($num): $@\n" if $@; + wantarray ? sort(keys(%kw)) : \%kw; +} + +1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm new file mode 100644 index 00000000..56f668b8 --- /dev/null +++ b/lib/PublicInbox/LeiStore.pm @@ -0,0 +1,197 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +# +# Local storage (cache/memo) for lei(1), suitable for personal/private +# mail iff on encrypted device/FS. Based on v2, but only deduplicates +# based on git OID. +# +# for xref3, the following are constant: $eidx_key = '.', $xnum = -1 +package PublicInbox::LeiStore; +use strict; +use v5.10.1; +use parent qw(PublicInbox::Lock); +use PublicInbox::SearchIdx qw(crlf_adjust); +use PublicInbox::ExtSearchIdx; +use PublicInbox::Import; +use PublicInbox::InboxWritable; +use PublicInbox::V2Writable; +use PublicInbox::ContentHash qw(content_hash); +use PublicInbox::MID qw(mids); +use PublicInbox::LeiSearch; + +sub new { + my (undef, $dir, $opt) = @_; + my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt); + bless { priv_eidx => $eidx }, __PACKAGE__; +} + +sub git { $_[0]->{priv_eidx}->git } # read-only + +sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR } + +sub rotate_bytes { + $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor) +} + +sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" }; + +sub git_epoch_max { + my ($self) = @_; + my $pfx = $self->git_pfx; + my $max = 0; + return $max unless -d $pfx ; + opendir my $dh, $pfx or die "opendir $pfx: $!\n"; + while (defined(my $git_dir = readdir($dh))) { + $git_dir =~ m!\A([0-9]+)\.git\z! or next; + $max = $1 + 0 if $1 > $max; + } + $max; +} + +sub importer { + my ($self) = @_; + my $max; + my $im = $self->{im}; + if ($im) { + return $im if $im->{bytes_added} < $self->rotate_bytes; + + delete $self->{im}; + $im->done; + undef $im; + $self->checkpoint; + $max = $self->git_epoch_max + 1; + } + my $pfx = $self->git_pfx; + $max //= $self->git_epoch_max; + while (1) { + my $latest = "$pfx/$max.git"; + my $old = -e $latest; + my $git = PublicInbox::Git->new($latest); + PublicInbox::Import::init_bare({ git => $git }); + $git->qx(qw(config core.sharedRepository 0600)) if !$old; + my $packed_bytes = $git->packed_bytes; + my $unpacked_bytes = $packed_bytes / $self->packing_factor; + if ($unpacked_bytes >= $self->rotate_bytes) { + $max++; + next; + } + chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); + die "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; + my ($n, $e) = ($i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/g) + or die "could not extract name/email from `$i'\n"; + $self->{im} = $im = PublicInbox::Import->new($git, $n, $e); + $im->{bytes_added} = int($packed_bytes / $self->packing_factor); + $im->{lock_path} = undef; + $im->{path_type} = 'v2'; + return $im; + } +} + +sub search { + PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir}); +} + +sub eidx_init { + my ($self) = @_; + my $eidx = $self->{priv_eidx}; + $eidx->idx_init({-private => 1}); + $eidx; +} + +sub _docids_for ($$) { + my ($self, $eml) = @_; + my %docids; + my $chash = content_hash($eml); + my $eidx = eidx_init($self); + my $oidx = $eidx->{oidx}; + my $im = $self->{im}; + for my $mid (@{mids($eml)}) { + my ($id, $prev); + while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { + my $oid = $cur->{blob}; + my $docid = $cur->{num}; + my $bref = $im ? $im->cat_blob($oid) : undef; + $bref //= $eidx->git->cat_file($oid) // do { + warn "W: $oid (#$docid) <$mid> not found\n"; + next; + }; + local $self->{current_info} = $oid; + my $x = PublicInbox::Eml->new($bref); + $docids{$docid} = $docid if content_hash($x) eq $chash; + } + } + sort { $a <=> $b } values %docids; +} + +sub set_eml_keywords { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my @docids = _docids_for($self, $eml); + for my $docid (@docids) { + $eidx->idx_shard($docid)->shard_set_keywords($docid, @kw); + } + \@docids; +} + +sub add_eml_keywords { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my @docids = _docids_for($self, $eml); + for my $docid (@docids) { + $eidx->idx_shard($docid)->shard_add_keywords($docid, @kw); + } + \@docids; +} + +sub remove_eml_keywords { + my ($self, $eml, @kw) = @_; + my $eidx = eidx_init($self); + my @docids = _docids_for($self, $eml); + for my $docid (@docids) { + $eidx->idx_shard($docid)->shard_remove_keywords($docid, @kw); + } + \@docids; +} + +sub add_eml { + my ($self, $eml) = @_; + my $eidx = eidx_init($self); + my $oidx = $eidx->{oidx}; + my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; + my $im = $self->importer; + $im->add($eml, undef, $smsg) or return; # duplicate returns undef + my $msgref = delete $smsg->{-raw_email}; + $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref); + + local $self->{current_info} = $smsg->{blob}; + if (my @docids = _docids_for($self, $eml)) { + for my $docid (@docids) { + my $idx = $eidx->idx_shard($docid); + $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); + $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id + } + } else { + $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); + $oidx->add_overview($eml, $smsg); + $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); + my $idx = $eidx->idx_shard($smsg->{num}); + $idx->index_raw($msgref, $eml, $smsg); + } + $smsg->{blob} +} + +sub done { + my ($self) = @_; + my $err = ''; + if (my $im = delete($self->{im})) { + eval { $im->done }; + if ($@) { + $err .= "import done: $@\n"; + warn $err; + } + } + $self->{priv_eidx}->done; + die $err if $err; +} + +1; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 4a39bf53..c8630ddb 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -684,4 +684,14 @@ DELETE FROM eidxq WHERE docid = ? } +sub blob_exists { + my ($self, $oidhex) = @_; + my $sth = $self->dbh->prepare_cached(<<'', undef, 1); +SELECT COUNT(*) FROM xref3 WHERE oidbin = ? + + $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->execute; + $sth->fetchrow_array; +} + 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index b731f698..548f2114 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -1,6 +1,6 @@ # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ -# based on notmuch, but with no concept of folders, files or flags +# based on notmuch, but with no concept of folders, files # # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use # with the web and NNTP interfaces. This index maintains thread @@ -371,7 +371,7 @@ sub eml2doc ($$$;$) { index_headers($self, $smsg); if (defined(my $eidx_key = $smsg->{eidx_key})) { - $doc->add_boolean_term('O'.$eidx_key); + $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; } msg_iter($eml, \&index_xapian, [ $self, $doc ]); index_ids($self, $doc, $eml, $mids); @@ -467,7 +467,7 @@ sub add_eidx_info { begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; term_generator($self)->set_document($doc); - $doc->add_boolean_term('O'.$eidx_key); + $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; index_list_id($self, $doc, $eml); $self->{xdb}->replace_document($docid, $doc); } @@ -501,6 +501,47 @@ sub remove_eidx_info { $self->{xdb}->replace_document($docid, $doc); } +sub set_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my %keep = map { $_ => 1 } @kw; + my %add = %keep; + my @rm; + my $end = $doc->termlist_end; + for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { + $cur->skip_to('K'); + last if $cur == $end; + my $kw = $cur->get_termname; + $kw =~ s/\AK//s or next; + $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw); + } + return unless (scalar(@rm) + scalar(keys %add)); + $doc->remove_term('K'.$_) for @rm; + $doc->add_boolean_term('K'.$_) for (keys %add); + $self->{xdb}->replace_document($docid, $doc); +} + +sub add_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + $doc->add_boolean_term('K'.$_) for @kw; + $self->{xdb}->replace_document($docid, $doc); +} + +sub remove_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my $replace; + eval { + $doc->remove_term('K'.$_); + $replace = 1 + } for @kw; + $self->{xdb}->replace_document($docid, $doc) if $replace; +} + sub get_val ($$) { my ($doc, $col) = @_; sortable_unserialise($doc->get_value($col)); diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 2e654769..87b0bad6 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -89,6 +89,12 @@ sub shard_worker_loop ($$$$$) { my ($len, $docid, $eidx_key) = split(/ /, $line, 3); $self->remove_eidx_info($docid, $eidx_key, eml($r, $len)); + } elsif ($line =~ s/\A=K (\d+) //) { + $self->set_keywords($1 + 0, split(/ /, $line)); + } elsif ($line =~ s/\A-K (\d+) //) { + $self->remove_keywords($1 + 0, split(/ /, $line)); + } elsif ($line =~ s/\A\+K (\d+) //) { + $self->add_keywords($1 + 0, split(/ /, $line)); } elsif ($line =~ s/\AO ([^\n]+)//) { my $over_fn = $1; $over_fn =~ tr/\0/\n/; @@ -210,6 +216,33 @@ sub shard_remove { } } +sub shard_set_keywords { + my ($self, $docid, @kw) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "=K $docid @kw\n" or die "failed to write: $!"; + } else { # same process + $self->set_keywords($docid, @kw); + } +} + +sub shard_remove_keywords { + my ($self, $docid, @kw) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "-K $docid @kw\n" or die "failed to write: $!"; + } else { # same process + $self->remove_keywords($docid, @kw); + } +} + +sub shard_add_keywords { + my ($self, $docid, @kw) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "+K $docid @kw\n" or die "failed to write: $!"; + } else { # same process + $self->add_keywords($docid, @kw); + } +} + sub shard_over_check { my ($self, $over) = @_; if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 3e3b275f..e8a5fbd2 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -24,7 +24,7 @@ use File::Temp (); my $OID = qr/[a-f0-9]{40,}/; # an estimate of the post-packed size to the raw uncompressed size -my $PACKING_FACTOR = 0.4; +our $PACKING_FACTOR = 0.4; # SATA storage lags behind what CPUs are capable of, so relying on # nproc(1) can be misleading and having extra Xapian shards is a -- cgit v1.2.3-24-ge0c7 From d42172638f5479f76e73470ad48a679100c3e0d5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Dec 2020 08:21:24 +0000 Subject: tests: more common JSON module loading We'll probably be using JSON more in the future, so make it easier to require in tests --- lib/PublicInbox/ManifestJsGz.pm | 2 +- lib/PublicInbox/TestCommon.pm | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 6d5b57ee..33df020a 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -11,7 +11,7 @@ use PublicInbox::Config; use IO::Compress::Gzip qw(gzip); use HTTP::Date qw(time2str); -our $json = PublicInbox::Config::json(); +my $json = PublicInbox::Config::json(); # called by WwwListing sub url_regexp { diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 299b9c6a..2116575b 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -75,6 +75,10 @@ sub require_mods { my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/; my @need; while (my $mod = shift(@mods)) { + if ($mod eq 'json') { + $mod = 'Cpanel::JSON::XS||JSON::MaybeXS||'. + 'JSON||JSON::PP' + } if ($mod eq 'Search::Xapian') { if (eval { require PublicInbox::Search } && PublicInbox::Search::load_xapian()) { -- cgit v1.2.3-24-ge0c7 From 504774acd5236653cdeafb536be95fbfb147258f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Dec 2020 09:03:34 +0000 Subject: lei: use spawn (vfork + execve) for lazy start This allows us to rely on FD_CLOEXEC being set on pipes from prove(1), so forgetting `daemon-stop' won't cause tests to hang. Unfortunately, daemon tests will be slower with this. --- lib/PublicInbox/LeiDaemon.pm | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm index b4b1ac59..fd4d00d4 100644 --- a/lib/PublicInbox/LeiDaemon.pm +++ b/lib/PublicInbox/LeiDaemon.pm @@ -324,29 +324,27 @@ sub accept_dispatch { # Listener {post_accept} callback sub noop {} # lei(1) calls this when it can't connect -sub lazy_start ($$) { +sub lazy_start { my ($path, $err) = @_; if ($err == ECONNREFUSED) { unlink($path) or die "unlink($path): $!"; } elsif ($err != ENOENT) { die "connect($path): $!"; } + require IO::FDPass; my $umask = umask(077) // die("umask(077): $!"); my $l = IO::Socket::UNIX->new(Local => $path, Listen => 1024, Type => SOCK_STREAM) or $err = $!; umask($umask) or die("umask(restore): $!"); - $l or return $err; + $l or return die "bind($path): $err"; my @st = stat($path) or die "stat($path): $!"; my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; my $oldset = PublicInbox::Sigfd::block_signals(); my $pid = fork // die "fork: $!"; - if ($pid) { - PublicInbox::Sigfd::sig_setmask($oldset); - return; # client will connect to $path - } + return if $pid; openlog($path, 'pid', 'user'); local $SIG{__DIE__} = sub { syslog('crit', "@_"); @@ -360,7 +358,7 @@ sub lazy_start ($$) { open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; setsid(); $pid = fork // die "fork: $!"; - exit if $pid; + return if $pid; $0 = "lei-daemon $path"; require PublicInbox::Listener; require PublicInbox::EOFpipe; -- cgit v1.2.3-24-ge0c7 From 04115815acef798c4330a76df9c1cb3d3542c9f9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Dec 2020 09:14:31 +0000 Subject: lei: refine help/option parsing, implement "init" There's a bunch of work in here as the foundations are being fleshed out. One of the UI/UX is to make it easy to keep built-in help and shell completions consistent --- lib/PublicInbox/LeiDaemon.pm | 401 +++++++++++++++++++++++++++++++++---------- lib/PublicInbox/LeiStore.pm | 7 +- 2 files changed, 312 insertions(+), 96 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm index fd4d00d4..010c1cba 100644 --- a/lib/PublicInbox/LeiDaemon.pm +++ b/lib/PublicInbox/LeiDaemon.pm @@ -15,13 +15,18 @@ use POSIX qw(setsid); use IO::Socket::UNIX; use IO::Handle (); use Sys::Syslog qw(syslog openlog); +use PublicInbox::Config; use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT); use PublicInbox::Sigfd; use PublicInbox::DS qw(now); use PublicInbox::Spawn qw(spawn); -our $quit = sub { exit(shift // 0) }; +use Text::Wrap qw(wrap); +use File::Path qw(mkpath); +use File::Spec; +our $quit = \&CORE::exit; my $glp = Getopt::Long::Parser->new; $glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); +our %PATH2CFG; # persistent for socket daemon # TBD: this is a documentation mechanism to show a subcommand # (may) pass options through to another command: @@ -30,45 +35,48 @@ sub pass_through { () } # TODO: generate shell completion + help using %CMD and %OPTDESC # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: -'query' => [ 'SEARCH-TERMS...', 'search for messages matching terms', qw( +'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a - limit|n=i sort|s=s reverse|r offset=i remote local! extinbox! + limit|n=i sort|s=s@ reverse|r offset=i remote local! extinbox! since|after=s until|before=s) ], -'show' => [ '{MID|OID}', 'show a given object (Message-ID or object ID)', +'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), pass_through('git show') ], -'add-extinbox' => [ 'URL-OR-PATHNAME', +'add-extinbox' => [ 'URL_OR_PATHNAME', 'add/set priority of a publicinbox|extindex for extra matches', qw(prio=i) ], -'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex locations', +'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations', qw(format|f=s z local remote) ], -'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}', +'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}', 'exclude further results from a publicinbox|extindex', qw(prune) ], -'ls-query' => [ '[FILTER]', 'list saved search queries', +'ls-query' => [ '[FILTER...]', 'list saved search queries', qw(name-only format|f=s z) ], 'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ], 'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ], -'plonk' => [ '{--thread|--from=IDENT}', +'plonk' => [ '--thread|--from=IDENT', 'exclude mail matching From: or thread from non-Message-ID searches', - qw(thread|t from|f=s mid=s oid=s) ], -'mark' => [ 'MESSAGE-FLAGS', 'set/unset flags on message(s) from stdin', + qw(thread|t stdin| from|f=s mid=s oid=s) ], +'mark' => [ 'MESSAGE_FLAGS...', + 'set/unset flags on message(s) from stdin', qw(stdin| oid=s exact by-mid|mid:s) ], -'forget' => [ '--stdin', 'exclude message(s) on stdin from query results', - qw(stdin| oid=s exact by-mid|mid:s) ], +'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', + 'exclude message(s) on stdin from query results', + qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], -'purge-mailsource' => [ '{URL-OR-PATHNAME|--all}', +'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}', 'remove imported messages from IMAP, Maildirs, and MH', qw(exact! all jobs:i indexed) ], # code repos are used for `show' to solve blobs from patch mails 'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', qw(prio=i) ], -'ls-coderepo' => [ '[FILTER]', 'list known code repos', qw(format|f=s z) ], +'ls-coderepo' => [ '[FILTER_TERMS...]', + 'list known code repos', qw(format|f=s z) ], 'forget-coderepo' => [ 'PATHNAME', 'stop using repo to solve blobs from patches', qw(prune) ], @@ -76,7 +84,7 @@ our %CMD = ( # sorted in order of importance/use: 'add-watch' => [ '[URL_OR_PATHNAME]', 'watch for new messages and flag changes', qw(import! flags! interval=s recursive|r exclude=s include=s) ], -'ls-watch' => [ '[FILTER]', 'list active watches with numbers and status', +'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', qw(format|f=s z) ], 'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], 'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], @@ -88,11 +96,13 @@ our %CMD = ( # sorted in order of importance/use: qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags), ], -'config' => [ '[ANYTHING...]', - 'git-config(1) wrapper for ~/.config/lei/config', +'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config', pass_through('git config') ], -'daemon-stop' => [ undef, 'stop the lei-daemon' ], -'daemon-pid' => [ undef, 'show the PID of the lei-daemon' ], +'init' => [ '[PATHNAME]', + 'initialize storage, default: ~/.local/share/lei/store', + qw(quiet|q) ], +'daemon-stop' => [ '', 'stop the lei-daemon' ], +'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], 'help' => [ '[SUBCOMMAND]', 'show help' ], # XXX do we need this? @@ -108,36 +118,43 @@ our %CMD = ( # sorted in order of importance/use: # $spec => [@ALLOWED_VALUES (default is first), $description], # $spec => $description # "$SUB_COMMAND TAB $spec" => as above -my $stdin_formats = [ qw(auto raw mboxrd mboxcl2 mboxcl mboxo), +my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo', 'specify message input format' ]; -my $ls_format = [ qw(plain json null), 'listing output format' ]; -my $show_format = [ qw(plain raw html mboxrd mboxcl2 mboxcl), - 'message/object output format' ]; +my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; my %OPTDESC = ( +'help|h' => 'show this built-in help', +'quiet|q' => 'be quiet', 'solve!' => 'do not attempt to reconstruct blobs from emails', -'save-as=s' => 'save a search terms by given name', +'save-as=s' => ['NAME', 'save a search terms by given name'], -'type=s' => [qw(any mid git), 'disambiguate type' ], +'type=s' => [ 'any|mid|git', 'disambiguate type' ], -'dedupe|d=s' => [qw(content oid mid), 'deduplication strategy'], -'thread|t' => 'every message in the same thread as the actual match(es)', +'dedupe|d=s' => ['STRAT|content|oid|mid', + 'deduplication strategy'], +'show thread|t' => 'display entire thread a message belongs to', +'query thread|t' => + 'return all messages in the same thread as the actual match(es)', 'augment|a' => 'augment --output destination instead of clobbering', -'output|o=s' => "destination (e.g. `/path/to/Maildir', or `-' for stdout)", +'output|o=s' => [ 'DEST', + "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ], +'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl', + 'message/object output format' ], 'mark format|f=s' => $stdin_formats, 'forget format|f=s' => $stdin_formats, -'query format|f=s' => [qw(maildir mboxrd mboxcl2 mboxcl html oid), - q[specify output format (default: determined by --output)]], +'query format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid', + 'specify output format, default depends on --output'], 'ls-query format|f=s' => $ls_format, -'ls-extinbox format|f=s' => $ls_format, +'ls-extinbox format|f=s' => $ls_format, -'limit|n=i' => 'integer limit on number of matches (default: 10000)', -'offset=i' => 'search result offset (default: 0)', +'limit|n=i' => ['NUM', + 'limit on number of matches (default: 10000)' ], +'offset=i' => ['OFF', 'search result offset (default: 0)'], -'sort|s=s@' => [qw(internaldate date relevance docid), - "order of results `--output'-dependent)"], +'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', + "order of results `--output'-dependent"], 'prio=i' => 'priority of query source', @@ -156,7 +173,7 @@ my %OPTDESC = ( 'exact' => 'operate on exact header matches only', 'exact!' => 'rely on content match instead of exact header matches', -'by-mid|mid:s' => 'match only by Message-ID, ignoring contents', +'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], 'jobs:i' => 'set parallelism level', ); # %OPTDESC @@ -174,93 +191,282 @@ sub x_it ($$) { # pronounced "exit" } } -sub emit ($$$) { - my ($client, $channel, $buf) = @_; - print { $client->{$channel} } $buf or warn "print FD[$channel]: $!"; +sub emit { + my ($client, $channel) = @_; # $buf = $_[2] + print { $client->{$channel} } $_[2] or die "print FD[$channel]: $!"; } -sub fail ($$;$) { - my ($client, $buf, $exit_code) = @_; +sub err { + my ($client, $buf) = @_; $buf .= "\n" unless $buf =~ /\n\z/s; emit($client, 2, $buf); +} + +sub qerr { $_[0]->{opt}->{quiet} or err(@_) } + +sub fail ($$;$) { + my ($client, $buf, $exit_code) = @_; + err($client, $buf); x_it($client, ($exit_code // 1) << 8); undef; } sub _help ($;$) { - my ($client, $channel) = @_; - emit($client, $channel //= 1, <{cmd} // 'COMMAND'; + my @info = @{$CMD{$cmd} // [ '...', '...' ]}; + my @top = ($cmd, shift(@info) // ()); + my $cmd_desc = shift(@info); + my @opt_desc; + my $lpad = 2; + for my $sw (@info) { # qw(prio=s + my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; + my $arg_vals = ''; + ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; + + # lower-case is a keyword (e.g. `content', `oid'), + # ALL_CAPS is a string description (e.g. `PATH') + if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { + $desc .= "\ndefault: `$1'"; + } + my (@vals, @s, @l); + my $x = $sw; + if ($x =~ s/!\z//) { # solve! => --no-solve + $x = "no-$x"; + } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" + @vals = (' [', undef, ']'); + } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" + @vals = (' ', undef); + } # else: no args $x = 'thread|t' + for (split(/\|/, $x)) { # help|h + length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); + } + if (!scalar(@vals)) { # no args 'thread|t' + } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" + $vals[1] = $1; + } else { + $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" + } + if ($arg_vals =~ /([,\|])/) { + my $sep = $1; + my @allow = split(/\Q$sep\E/, $arg_vals); + my $must = $sep eq '|' ? 'Must' : 'Can'; + @allow = map { "`$_'" } @allow; + my $last = pop @allow; + $desc .= "\n$must be one of: " . + join(', ', @allow) . " or $last"; + } + my $lhs = join(', ', @s, @l) . join('', @vals); + $lhs =~ s/\A--/ --/; # pad if no short options + $lpad = length($lhs) if length($lhs) > $lpad; + push @opt_desc, $lhs, $desc; + } + my $msg = $errmsg ? "E: $errmsg\n" : ''; + $msg .= < failure + $lpad += 2; + local $Text::Wrap::columns = 78 - $lpad; + my $padding = ' ' x ($lpad + 2); + while (my ($lhs, $rhs) = splice(@opt_desc, 0, 2)) { + $msg .= ' '.pack("A$lpad", $lhs); + $rhs = wrap('', '', $rhs); + $rhs =~ s/\n/\n$padding/sg; # LHS pad continuation lines + $msg .= $rhs; + $msg .= "\n"; + } + my $channel = $errmsg ? 2 : 1; + emit($client, $channel, $msg); + x_it($client, $errmsg ? 1 << 8 : 0); # stderr => failure + undef; } -sub assert_args ($$$;$@) { - my ($client, $argv, $proto, $opt, @spec) = @_; - $opt //= {}; - push @spec, qw(help|h); - $glp->getoptionsfromarray($argv, $opt, @spec) or - return fail($client, 'bad arguments or options'); - if ($opt->{help}) { - _help($client); - undef; - } else { - my ($nreq, $rest) = split(/;/, $proto); - $nreq = (($nreq // '') =~ tr/$/$/); - my $argc = scalar(@$argv); - my $tot = ($rest // '') eq '@' ? $argc : ($proto =~ tr/$/$/); - return 1 if $argc <= $tot && $argc >= $nreq; - _help($client, 2); - undef +sub optparse ($$$) { + my ($client, $cmd, $argv) = @_; + $client->{cmd} = $cmd; + my $opt = $client->{opt} = {}; + my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ]; + my ($proto, $desc, @spec) = @$info; + $glp->getoptionsfromarray($argv, $opt, @spec, qw(help|h)) or + return _help($client, "bad arguments or options for $cmd"); + return _help($client) if $opt->{help}; + my $i = 0; + my $POS_ARG = '[A-Z][A-Z0-9_]+'; + my ($err, $inf); + my @args = split(/ /, $proto); + for my $var (@args) { + if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args; + $inf = defined($argv->[$i]) and last; + $var =~ s/\.\.\.\z//; + $err = "$var not supplied"; + } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i + $argv->[$i++] // ($err = "$var not supplied"); + } elsif ($var =~ /\.\.\.\]\z/) { # optional args start + $inf = 1; + last; + } elsif ($var =~ /\A\[$POS_ARG\]\z/) { # one optional arg + $i++; + } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin + my @or = split(/\|/, $var); + my $ok; + for my $o (@or) { + if ($o =~ /\A--([a-z0-9\-]+)/) { + $ok = defined($opt->{$1}); + last; + } elsif (defined($argv->[$i])) { + $ok = 1; + $i++; + last; + } # else continue looping + } + my $last = pop @or; + $err = join(', ', @or) . " or $last must be set"; + } else { + warn "BUG: can't parse `$var' in $proto"; + } + last if $err; + } + # warn "inf=$inf ".scalar(@$argv). ' '.scalar(@args)."\n"; + if (!$inf && scalar(@$argv) > scalar(@args)) { + $err //= 'too many arguments'; } + $err ? fail($client, "usage: lei $cmd $proto\nE: $err") : 1; } sub dispatch { my ($client, $cmd, @argv) = @_; - local $SIG{__WARN__} = sub { emit($client, 2, "@_") }; + local $SIG{__WARN__} = sub { err($client, "@_") }; local $SIG{__DIE__} = 'DEFAULT'; - if (defined $cmd) { - my $func = "lei_$cmd"; - $func =~ tr/-/_/; - if (my $cb = __PACKAGE__->can($func)) { - $client->{cmd} = $cmd; - $cb->($client, \@argv); - } elsif (grep(/\A-/, $cmd, @argv)) { - assert_args($client, [ $cmd, @argv ], ''); - } else { - fail($client, "`$cmd' is not an lei command"); - } + return _help($client, 'no command given') unless defined($cmd); + my $func = "lei_$cmd"; + $func =~ tr/-/_/; + if (my $cb = __PACKAGE__->can($func)) { + optparse($client, $cmd, \@argv) or return; + $cb->($client, @argv); + } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only + my $opt = {}; + $glp->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or + return _help($client, 'bad arguments or options'); + _help($client); } else { - _help($client, 2); + fail($client, "`$cmd' is not an lei command"); } } -sub lei_daemon_pid { - my ($client, $argv) = @_; - assert_args($client, $argv, '') and emit($client, 1, "$$\n"); +sub _lei_cfg ($;$) { + my ($client, $creat) = @_; + my $env = $client->{env}; + my $cfg_dir = File::Spec->canonpath(( $env->{XDG_CONFIG_HOME} // + ($env->{HOME} // '/nonexistent').'/.config').'/lei'); + my $f = "$cfg_dir/config"; + my @st = stat($f); + my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size + if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case + return ($client->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; + } + if (!@st) { + unless ($creat) { + delete $client->{cfg}; + return; + } + -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n"; + open my $fh, '>>', $f or die "open($f): $!\n"; + @st = stat($fh) or die "fstat($f): $!\n"; + $cur_st = pack('dd', $st[10], $st[7]); + qerr($client, "I: $f created"); + } + my $cfg = PublicInbox::Config::git_config_dump($f); + $cfg->{-st} = $cur_st; + $cfg->{'-f'} = $f; + $client->{cfg} = $PATH2CFG{$f} = $cfg; } -sub lei_DBG_pwd { - my ($client, $argv) = @_; - assert_args($client, $argv, '') and - emit($client, 1, "$client->{env}->{PWD}\n"); +sub _lei_store ($;$) { + my ($client, $creat) = @_; + my $cfg = _lei_cfg($client, $creat); + $cfg->{-lei_store} //= do { + require PublicInbox::LeiStore; + PublicInbox::SearchIdx::load_xapian_writable(); + defined(my $dir = $cfg->{'leistore.dir'}) or return; + PublicInbox::LeiStore->new($dir, { creat => $creat }); + }; +} + +sub lei_show { + my ($client, @argv) = @_; } -sub lei_DBG_cwd { - my ($client, $argv) = @_; - require Cwd; - assert_args($client, $argv, '') and emit($client, 1, Cwd::cwd()."\n"); +sub lei_query { + my ($client, @argv) = @_; } -sub lei_DBG_false { x_it($_[0], 1 << 8) } +sub lei_mark { + my ($client, @argv) = @_; +} -sub lei_daemon_stop { - my ($client, $argv) = @_; - assert_args($client, $argv, '') and $quit->(0); +sub lei_config { + my ($client, @argv) = @_; + my $env = $client->{env}; + if (defined $env->{GIT_CONFIG}) { + my %copy = %$env; + delete $copy{GIT_CONFIG}; + $env = \%copy; + } + if (my @conflict = (grep(/\A-f=?\z/, @argv), + grep(/\A--(?:global|system| + file|config-file)=?\z/x, @argv))) { + return fail($client, "@conflict not supported by lei config"); + } + my $cfg = _lei_cfg($client, 1); + my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; + my %rdr = map { $_ => $client->{$_} } (0..2); + require PublicInbox::Import; + PublicInbox::Import::run_die($cmd, $env, \%rdr); } +sub lei_init { + my ($client, $dir) = @_; + my $cfg = _lei_cfg($client, 1); + my $cur = $cfg->{'leistore.dir'}; + my $env = $client->{env}; + $dir //= ( $env->{XDG_DATA_HOME} // + ($env->{HOME} // '/nonexistent').'/.local/share' + ) . '/lei/store'; + $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware + my @cur = stat($cur) if defined($cur); + $cur = File::Spec->canonpath($cur) if $cur; + my @dir = stat($dir); + my $exists = "I: leistore.dir=$cur already initialized" if @dir; + if (@cur) { + if ($cur eq $dir) { + _lei_store($client, 1)->done; + return qerr($client, $exists); + } + + # some folks like symlinks and bind mounts :P + if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") { + lei_config($client, 'leistore.dir', $dir); + _lei_store($client, 1)->done; + return qerr($client, "$exists (as $cur)"); + } + return fail($client, <<""); +E: leistore.dir=$cur already initialized and it is not $dir + + } + lei_config($client, 'leistore.dir', $dir); + _lei_store($client, 1)->done; + $exists //= "I: leistore.dir=$dir newly initialized"; + return qerr($client, $exists); +} + +sub lei_daemon_pid { + emit($_[0], 1, "$$\n"); +} + +sub lei_daemon_stop { $quit->(0) } + sub lei_help { _help($_[0]) } sub reap_exec { # dwaitpid callback @@ -269,9 +475,9 @@ sub reap_exec { # dwaitpid callback } sub lei_git { # support passing through random git commands - my ($client, $argv) = @_; - my %opt = map { $_ => $client->{$_} } (0..2); - my $pid = spawn(['git', @$argv], $client->{env}, \%opt); + my ($client, @argv) = @_; + my %rdr = map { $_ => $client->{$_} } (0..2); + my $pid = spawn(['git', @argv], $client->{env}, \%rdr); PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client); } @@ -360,6 +566,7 @@ sub lazy_start { $pid = fork // die "fork: $!"; return if $pid; $0 = "lei-daemon $path"; + local %PATH2CFG; require PublicInbox::Listener; require PublicInbox::EOFpipe; $l->blocking(0); @@ -427,6 +634,10 @@ sub lazy_start { # for users w/o IO::FDPass sub oneshot { + my ($main_pkg) = @_; + my $exit = $main_pkg->can('exit'); # caller may override exit() + local $quit = $exit if $exit; + local %PATH2CFG; dispatch({ 0 => *STDIN{IO}, 1 => *STDOUT{IO}, diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 56f668b8..b5b49efb 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -22,7 +22,12 @@ use PublicInbox::LeiSearch; sub new { my (undef, $dir, $opt) = @_; my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt); - bless { priv_eidx => $eidx }, __PACKAGE__; + my $self = bless { priv_eidx => $eidx }, __PACKAGE__; + if ($opt->{creat}) { + PublicInbox::SearchIdx::load_xapian_writable(); + eidx_init($self); + } + $self; } sub git { $_[0]->{priv_eidx}->git } # read-only -- cgit v1.2.3-24-ge0c7 From 671d4e711d4a20dfb987b273d08e73e5add37f1d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 16 Dec 2020 05:45:52 +0000 Subject: t/lei-oneshot: standalone oneshot (non-socket) test We can use the same "local $ENV{FOO}" hack we do with t/nntpd-v2.t to test the oneshot code path without imposing an extra script in the users' $PATH. --- lib/PublicInbox/TestCommon.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 2116575b..c236c589 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -168,7 +168,7 @@ sub run_script_exit { die RUN_SCRIPT_EXIT; } -my %cached_scripts; +our %cached_scripts; sub key2sub ($) { my ($key) = @_; $cached_scripts{$key} //= do { -- cgit v1.2.3-24-ge0c7 From e605ec76c5a3afe9390ca95709fed719a098235a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 16 Dec 2020 05:59:41 +0000 Subject: lei: ensure we run a restrictive umask While we configure the LeiStore git repos and DBs to have a restrictive umask, lei may also write to Maildirs/mboxes/etc. We will follow mutt behavior when saving files/messages to the FS. We only want to create files which are only readable by the local user since this is intended for private mail and could be used on shared systems. We may allow passing the umask on a per-command-basis, but it's probably not worth the effort to support. --- lib/PublicInbox/LeiDaemon.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm index 010c1cba..1f170f1d 100644 --- a/lib/PublicInbox/LeiDaemon.pm +++ b/lib/PublicInbox/LeiDaemon.pm @@ -538,12 +538,11 @@ sub lazy_start { die "connect($path): $!"; } require IO::FDPass; - my $umask = umask(077) // die("umask(077): $!"); + umask(077) // die("umask(077): $!"); my $l = IO::Socket::UNIX->new(Local => $path, Listen => 1024, Type => SOCK_STREAM) or $err = $!; - umask($umask) or die("umask(restore): $!"); $l or return die "bind($path): $err"; my @st = stat($path) or die "stat($path): $!"; my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino @@ -638,6 +637,7 @@ sub oneshot { my $exit = $main_pkg->can('exit'); # caller may override exit() local $quit = $exit if $exit; local %PATH2CFG; + umask(077) // die("umask(077): $!"); dispatch({ 0 => *STDIN{IO}, 1 => *STDOUT{IO}, -- cgit v1.2.3-24-ge0c7 From bcf5e76a87b46b038509b65ced64149e6d2b81c3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 16 Dec 2020 09:30:28 +0000 Subject: lei: support `daemon-env' for modifying long-lived env While lei(1) socket connections can set environment variables for its running context, it may not completely remove some of them. The background daemon just inherits whatever env the client spawning it had. This command ensures the persistent env can be modified as needed. Similar to env(1), this supports "-u", "-" (--clear), and "-0"/"-z" switches. It may be useful to unset or change or even completely clear the environment independently of what a socket client feeds us. "-i" is omitted since "--ignore-environment" seems like a bad name for a persistent daemon as opposed to a one-shot command. "-" and --clear (like clearenv(3)) will completely clobber the environment. "Lonesome dash" support is added to our option/help parsing for the "-" shortcut to "--clear". Getopt::Long doesn't seem to support specs like "clear|" or "stdin|", but only "", so we do a little pre/post-processing to merge the cases. --- lib/PublicInbox/LeiDaemon.pm | 55 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm index 1f170f1d..56f4aa7d 100644 --- a/lib/PublicInbox/LeiDaemon.pm +++ b/lib/PublicInbox/LeiDaemon.pm @@ -60,7 +60,7 @@ our %CMD = ( # sorted in order of importance/use: 'plonk' => [ '--thread|--from=IDENT', 'exclude mail matching From: or thread from non-Message-ID searches', - qw(thread|t stdin| from|f=s mid=s oid=s) ], + qw(stdin| thread|t from|f=s mid=s oid=s) ], 'mark' => [ 'MESSAGE_FLAGS...', 'set/unset flags on message(s) from stdin', qw(stdin| oid=s exact by-mid|mid:s) ], @@ -103,6 +103,8 @@ our %CMD = ( # sorted in order of importance/use: qw(quiet|q) ], 'daemon-stop' => [ '', 'stop the lei-daemon' ], 'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], +'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment', + qw(clear| unset|u=s@ z|0) ], 'help' => [ '[SUBCOMMAND]', 'show help' ], # XXX do we need this? @@ -175,6 +177,16 @@ my %OPTDESC = ( 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], 'jobs:i' => 'set parallelism level', + +# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere? +'z' => 'use NUL \\0 instead of newline (CR) to delimit lines', +'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', + +# note: no "--ignore-environment" / "-i" support like env(1) since that +# is one-shot and this is for a persistent daemon: +'clear|' => 'clear the daemon environment', +'unset|u=s@' => ['NAME', + 'unset matching NAME, may be specified multiple times'], ); # %OPTDESC sub x_it ($$) { # pronounced "exit" @@ -257,7 +269,11 @@ sub _help ($;$) { join(', ', @allow) . " or $last"; } my $lhs = join(', ', @s, @l) . join('', @vals); - $lhs =~ s/\A--/ --/; # pad if no short options + if ($x =~ /\|\z/) { # "stdin|" or "clear|" + $lhs =~ s/\A--/- , --/; + } else { + $lhs =~ s/\A--/ --/; # pad if no short options + } $lpad = length($lhs) if length($lhs) > $lpad; push @opt_desc, $lhs, $desc; } @@ -289,9 +305,20 @@ sub optparse ($$$) { my $opt = $client->{opt} = {}; my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ]; my ($proto, $desc, @spec) = @$info; - $glp->getoptionsfromarray($argv, $opt, @spec, qw(help|h)) or + push @spec, qw(help|h); + my $lone_dash; + if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias + $lone_dash = $spec[0]; + $opt->{$spec[0]} = \(my $var); + push @spec, '' => \$var; + } + $glp->getoptionsfromarray($argv, $opt, @spec) or return _help($client, "bad arguments or options for $cmd"); return _help($client) if $opt->{help}; + + # "-" aliases "stdin" or "clear" + $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash; + my $i = 0; my $POS_ARG = '[A-Z][A-Z0-9_]+'; my ($err, $inf); @@ -461,12 +488,28 @@ E: leistore.dir=$cur already initialized and it is not $dir return qerr($client, $exists); } -sub lei_daemon_pid { - emit($_[0], 1, "$$\n"); -} +sub lei_daemon_pid { emit($_[0], 1, "$$\n") } sub lei_daemon_stop { $quit->(0) } +sub lei_daemon_env { + my ($client, @argv) = @_; + my $opt = $client->{opt}; + if (defined $opt->{clear}) { + %ENV = (); + } elsif (my $u = $opt->{unset}) { + delete @ENV{@$u}; + } + if (@argv) { + %ENV = (%ENV, map { split(/=/, $_, 2) } @argv); + } elsif (!defined($opt->{clear}) && !$opt->{unset}) { + my $eor = $opt->{z} ? "\0" : "\n"; + my $buf = ''; + while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" } + emit($client, 1, $buf) + } +} + sub lei_help { _help($_[0]) } sub reap_exec { # dwaitpid callback -- cgit v1.2.3-24-ge0c7 From 475e6b6a722361223505a7fcb084f5e729c69240 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 00:16:18 +0000 Subject: lei_store: simplify git_epoch_max, slightly This follows how we detect the max epoch for v2 and shard count in Xapian. --- lib/PublicInbox/LeiStore.pm | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index b5b49efb..d3667d29 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -18,6 +18,7 @@ use PublicInbox::V2Writable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; +use List::Util qw(max); sub new { my (undef, $dir, $opt) = @_; @@ -42,15 +43,13 @@ sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" }; sub git_epoch_max { my ($self) = @_; - my $pfx = $self->git_pfx; - my $max = 0; - return $max unless -d $pfx ; - opendir my $dh, $pfx or die "opendir $pfx: $!\n"; - while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A([0-9]+)\.git\z! or next; - $max = $1 + 0 if $1 > $max; + if (opendir(my $dh, $self->git_pfx)) { + max(map { + substr($_, 0, -4) + 0; # drop ".git" suffix + } grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0; + } else { + $!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n"); } - $max; } sub importer { -- cgit v1.2.3-24-ge0c7 From ba135f3e25bf5d1b3aa3d34e31fefb55ee4c8d29 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 03:39:49 +0000 Subject: search: simplify initialization, add ->xdb_shards_flat This reduces differences between v1 and v2 code, and introduces ->xdb_shards_flat to provide read-only access to shards without using Xapian::MultiDatabase. This will allow us to combine shards of several inboxes AND extindexes for lei. --- lib/PublicInbox/ExtSearch.pm | 6 ---- lib/PublicInbox/LeiSearch.pm | 5 ++-- lib/PublicInbox/Search.pm | 65 +++++++++++++++++-------------------------- lib/PublicInbox/SearchIdx.pm | 15 ++++------ lib/PublicInbox/V2Writable.pm | 8 +----- 5 files changed, 34 insertions(+), 65 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index 410ae958..7ce950bc 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -33,12 +33,6 @@ sub misc { sub search { $_[0] } # self -# overrides PublicInbox::Search::_xdb -sub _xdb { - my ($self) = @_; - $self->xdb_sharded; -} - # same as per-inbox ->over, for now... sub over { my ($self) = @_; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 9cfd6ea2..66c16e04 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -9,8 +9,7 @@ use PublicInbox::Search; sub combined_docid ($$) { my ($self, $num) = @_; - my $nshard = ($self->{nshard} // 1); - ($num - 1) * $nshard + 1; + ($num - 1) * $self->{nshard} + 1; } sub msg_keywords { @@ -19,7 +18,7 @@ sub msg_keywords { my $docid = ref($num) ? $num->get_docid : do { # get combined docid from over.num: # (not generic Xapian, only works with our sharding scheme) - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; ($num - 1) * $nshard + $num % $nshard + 1; }; my %kw; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1d38fb9..bbc5e32f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -191,41 +191,37 @@ sub xdir ($;$) { } } -sub xdb_sharded { +# returns all shards as separate Xapian::Database objects w/o combining +sub xdb_shards_flat ($) { my ($self) = @_; - opendir(my $dh, $self->{xpfx}) or return; # not initialized yet - - # We need numeric sorting so shard[0] is first for reading - # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; + my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); - for (0..$last) { - my $shard_dir = "$self->{xpfx}/$_"; - if (-d $shard_dir && -r _) { + if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { + @xdb = ($X{Database}->new($xpfx)); + $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; + } else { + opendir(my $dh, $xpfx) or return (); # not initialized yet + # We need numeric sorting so shard[0] is first for reading + # Xapian metadata, if needed + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return (); + for (0..$last) { + my $shard_dir = "$self->{xpfx}/$_"; push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; - } else { # gaps from missing epochs throw off mdocid() - warn "E: $shard_dir missing or unreadable\n"; - return; } + $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; } - $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; - $self->{nshard} = scalar(@xdb); - my $xdb = shift @xdb; - $xdb->add_database($_) for @xdb; - $xdb; + @xdb; } sub _xdb { my ($self) = @_; - my $dir = xdir($self, 1); $self->{qp_flags} //= $QP_FLAGS; - if ($self->{ibx_ver} >= 2) { - xdb_sharded($self); - } else { - $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert"; - $X{Database}->new($dir); - } + my @xdb = xdb_shards_flat($self) or return; + $self->{nshard} = scalar(@xdb); + my $xdb = shift @xdb; + $xdb->add_database($_) for @xdb; + $xdb; } # v2 Xapian docids don't conflict, so they're identical to @@ -239,7 +235,7 @@ sub mdocid { sub mset_to_artnums { my ($self, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; [ map { mdocid($nshard, $_) } $mset->items ]; } @@ -251,25 +247,14 @@ sub xdb ($) { }; } -sub xpfx_init ($) { - my ($self) = @_; - if ($self->{ibx_ver} == 1) { - $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION; - } else { - $self->{xpfx} .= '/xap'.SCHEMA_VERSION; - } -} - sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; - my $self = bless { - xpfx => $ibx->{inboxdir}, # for xpfx_init + my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian'; + bless { + xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION, altid => $ibx->{altid}, - ibx_ver => $ibx->version, }, $class; - xpfx_init($self); - $self; } sub reopen { @@ -362,7 +347,7 @@ sub _enquire_once { # retry_reopen callback sub mset_to_smsg { my ($self, $ibx, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 548f2114..7e2843e9 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -54,14 +54,11 @@ sub new { } } $ibx = PublicInbox::InboxWritable->new($ibx); - my $self = bless { - ibx => $ibx, - xpfx => $inboxdir, # for xpfx_init - -altid => $altid, - ibx_ver => $version, - indexlevel => $indexlevel, - }, $class; - $self->xpfx_init; + my $self = PublicInbox::Search->new($ibx); + bless $self, $class; + $self->{ibx} = $ibx; + $self->{-altid} = $altid; + $self->{indexlevel} = $indexlevel; $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium'; if ($ibx->{-skip_docdata}) { $self->{-set_skip_docdata_once} = 1; @@ -408,7 +405,7 @@ sub add_xapian ($$$$) { sub _msgmap_init ($) { my ($self) = @_; - die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1; + die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1; $self->{mm} //= eval { require PublicInbox::Msgmap; my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index e8a5fbd2..7d41b0f6 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -73,13 +73,7 @@ sub count_shards ($) { delete $ibx->{search}; $srch->{nshard} // 0 } else { # ExtSearchIdx - $self->{nshard} // do { - if ($self->xdb_sharded) { - $self->{nshard} // die 'BUG: {nshard} unset'; - } else { - 0; - } - } + $self->{nshard} ||= scalar($self->xdb_shards_flat); } } -- cgit v1.2.3-24-ge0c7 From 867b47b6f667fe500313f85207d19e16703aace0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 03:53:13 +0000 Subject: rename LeiDaemon package to PublicInbox::LEI "LEI" is an acronym, and ALL CAPS is consistent with existing PublicInbox::{IMAP,HTTP,NNTP,WWW} naming for top-level modules, 3 of 4 old ones which deal directly with sockets and requests. --- lib/PublicInbox/LEI.pm | 692 +++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiDaemon.pm | 692 ------------------------------------------- 2 files changed, 692 insertions(+), 692 deletions(-) create mode 100644 lib/PublicInbox/LEI.pm delete mode 100644 lib/PublicInbox/LeiDaemon.pm (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm new file mode 100644 index 00000000..b5ba1f71 --- /dev/null +++ b/lib/PublicInbox/LEI.pm @@ -0,0 +1,692 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# Backend for `lei' (local email interface). Unlike the C10K-oriented +# PublicInbox::Daemon, this is designed exclusively to handle trusted +# local clients with read/write access to the FS and use as many +# system resources as the local user has access to. +package PublicInbox::LEI; +use strict; +use v5.10.1; +use parent qw(PublicInbox::DS); +use Getopt::Long (); +use Errno qw(EAGAIN ECONNREFUSED ENOENT); +use POSIX qw(setsid); +use IO::Socket::UNIX; +use IO::Handle (); +use Sys::Syslog qw(syslog openlog); +use PublicInbox::Config; +use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT); +use PublicInbox::Sigfd; +use PublicInbox::DS qw(now); +use PublicInbox::Spawn qw(spawn); +use Text::Wrap qw(wrap); +use File::Path qw(mkpath); +use File::Spec; +our $quit = \&CORE::exit; +my $glp = Getopt::Long::Parser->new; +$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); +our %PATH2CFG; # persistent for socket daemon + +# TBD: this is a documentation mechanism to show a subcommand +# (may) pass options through to another command: +sub pass_through { () } + +# TODO: generate shell completion + help using %CMD and %OPTDESC +# command => [ positional_args, 1-line description, Getopt::Long option spec ] +our %CMD = ( # sorted in order of importance/use: +'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( + save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a + limit|n=i sort|s=s@ reverse|r offset=i remote local! extinbox! + since|after=s until|before=s) ], + +'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', + qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), + pass_through('git show') ], + +'add-extinbox' => [ 'URL_OR_PATHNAME', + 'add/set priority of a publicinbox|extindex for extra matches', + qw(prio=i) ], +'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations', + qw(format|f=s z local remote) ], +'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}', + 'exclude further results from a publicinbox|extindex', + qw(prune) ], + +'ls-query' => [ '[FILTER...]', 'list saved search queries', + qw(name-only format|f=s z) ], +'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ], +'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ], + +'plonk' => [ '--thread|--from=IDENT', + 'exclude mail matching From: or thread from non-Message-ID searches', + qw(stdin| thread|t from|f=s mid=s oid=s) ], +'mark' => [ 'MESSAGE_FLAGS...', + 'set/unset flags on message(s) from stdin', + qw(stdin| oid=s exact by-mid|mid:s) ], +'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', + 'exclude message(s) on stdin from query results', + qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], + +'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}', + 'remove imported messages from IMAP, Maildirs, and MH', + qw(exact! all jobs:i indexed) ], + +# code repos are used for `show' to solve blobs from patch mails +'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', + qw(prio=i) ], +'ls-coderepo' => [ '[FILTER_TERMS...]', + 'list known code repos', qw(format|f=s z) ], +'forget-coderepo' => [ 'PATHNAME', + 'stop using repo to solve blobs from patches', + qw(prune) ], + +'add-watch' => [ '[URL_OR_PATHNAME]', + 'watch for new messages and flag changes', + qw(import! flags! interval=s recursive|r exclude=s include=s) ], +'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', + qw(format|f=s z) ], +'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], +'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], +'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', + qw(prune) ], + +'import' => [ '{URL_OR_PATHNAME|--stdin}', + 'one-shot import/update from URL or filesystem', + qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags), + ], + +'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config', + pass_through('git config') ], +'init' => [ '[PATHNAME]', + 'initialize storage, default: ~/.local/share/lei/store', + qw(quiet|q) ], +'daemon-stop' => [ '', 'stop the lei-daemon' ], +'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], +'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment', + qw(clear| unset|u=s@ z|0) ], +'help' => [ '[SUBCOMMAND]', 'show help' ], + +# XXX do we need this? +# 'git' => [ '[ANYTHING...]', 'git(1) wrapper', pass_through('git') ], + +'reorder-local-store-and-break-history' => [ '[REFNAME]', + 'rewrite git history in an attempt to improve compression', + 'gc!' ] +); # @CMD + +# switch descriptions, try to keep consistent across commands +# $spec: Getopt::Long option specification +# $spec => [@ALLOWED_VALUES (default is first), $description], +# $spec => $description +# "$SUB_COMMAND TAB $spec" => as above +my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo', + 'specify message input format' ]; +my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; + +my %OPTDESC = ( +'help|h' => 'show this built-in help', +'quiet|q' => 'be quiet', +'solve!' => 'do not attempt to reconstruct blobs from emails', +'save-as=s' => ['NAME', 'save a search terms by given name'], + +'type=s' => [ 'any|mid|git', 'disambiguate type' ], + +'dedupe|d=s' => ['STRAT|content|oid|mid', + 'deduplication strategy'], +'show thread|t' => 'display entire thread a message belongs to', +'query thread|t' => + 'return all messages in the same thread as the actual match(es)', +'augment|a' => 'augment --output destination instead of clobbering', + +'output|o=s' => [ 'DEST', + "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ], + +'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl', + 'message/object output format' ], +'mark format|f=s' => $stdin_formats, +'forget format|f=s' => $stdin_formats, +'query format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid', + 'specify output format, default depends on --output'], +'ls-query format|f=s' => $ls_format, +'ls-extinbox format|f=s' => $ls_format, + +'limit|n=i' => ['NUM', + 'limit on number of matches (default: 10000)' ], +'offset=i' => ['OFF', 'search result offset (default: 0)'], + +'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', + "order of results `--output'-dependent"], + +'prio=i' => 'priority of query source', + +'local' => 'limit operations to the local filesystem', +'local!' => 'exclude results from the local filesystem', +'remote' => 'limit operations to those requiring network access', +'remote!' => 'prevent operations requiring network access', + +'mid=s' => 'specify the Message-ID of a message', +'oid=s' => 'specify the git object ID of a message', + +'recursive|r' => 'scan directories/mailboxes/newsgroups recursively', +'exclude=s' => 'exclude mailboxes/newsgroups based on pattern', +'include=s' => 'include mailboxes/newsgroups based on pattern', + +'exact' => 'operate on exact header matches only', +'exact!' => 'rely on content match instead of exact header matches', + +'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], +'jobs:i' => 'set parallelism level', + +# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere? +'z' => 'use NUL \\0 instead of newline (CR) to delimit lines', +'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', + +# note: no "--ignore-environment" / "-i" support like env(1) since that +# is one-shot and this is for a persistent daemon: +'clear|' => 'clear the daemon environment', +'unset|u=s@' => ['NAME', + 'unset matching NAME, may be specified multiple times'], +); # %OPTDESC + +sub x_it ($$) { # pronounced "exit" + my ($client, $code) = @_; + if (my $sig = ($code & 127)) { + kill($sig, $client->{pid} // $$); + } else { + $code >>= 8; + if (my $sock = $client->{sock}) { + say $sock "exit=$code"; + } else { # for oneshot + $quit->($code); + } + } +} + +sub emit { + my ($client, $channel) = @_; # $buf = $_[2] + print { $client->{$channel} } $_[2] or die "print FD[$channel]: $!"; +} + +sub err { + my ($client, $buf) = @_; + $buf .= "\n" unless $buf =~ /\n\z/s; + emit($client, 2, $buf); +} + +sub qerr { $_[0]->{opt}->{quiet} or err(@_) } + +sub fail ($$;$) { + my ($client, $buf, $exit_code) = @_; + err($client, $buf); + x_it($client, ($exit_code // 1) << 8); + undef; +} + +sub _help ($;$) { + my ($client, $errmsg) = @_; + my $cmd = $client->{cmd} // 'COMMAND'; + my @info = @{$CMD{$cmd} // [ '...', '...' ]}; + my @top = ($cmd, shift(@info) // ()); + my $cmd_desc = shift(@info); + my @opt_desc; + my $lpad = 2; + for my $sw (@info) { # qw(prio=s + my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; + my $arg_vals = ''; + ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; + + # lower-case is a keyword (e.g. `content', `oid'), + # ALL_CAPS is a string description (e.g. `PATH') + if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { + $desc .= "\ndefault: `$1'"; + } + my (@vals, @s, @l); + my $x = $sw; + if ($x =~ s/!\z//) { # solve! => --no-solve + $x = "no-$x"; + } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" + @vals = (' [', undef, ']'); + } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" + @vals = (' ', undef); + } # else: no args $x = 'thread|t' + for (split(/\|/, $x)) { # help|h + length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); + } + if (!scalar(@vals)) { # no args 'thread|t' + } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" + $vals[1] = $1; + } else { + $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" + } + if ($arg_vals =~ /([,\|])/) { + my $sep = $1; + my @allow = split(/\Q$sep\E/, $arg_vals); + my $must = $sep eq '|' ? 'Must' : 'Can'; + @allow = map { "`$_'" } @allow; + my $last = pop @allow; + $desc .= "\n$must be one of: " . + join(', ', @allow) . " or $last"; + } + my $lhs = join(', ', @s, @l) . join('', @vals); + if ($x =~ /\|\z/) { # "stdin|" or "clear|" + $lhs =~ s/\A--/- , --/; + } else { + $lhs =~ s/\A--/ --/; # pad if no short options + } + $lpad = length($lhs) if length($lhs) > $lpad; + push @opt_desc, $lhs, $desc; + } + my $msg = $errmsg ? "E: $errmsg\n" : ''; + $msg .= < failure + undef; +} + +sub optparse ($$$) { + my ($client, $cmd, $argv) = @_; + $client->{cmd} = $cmd; + my $opt = $client->{opt} = {}; + my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ]; + my ($proto, $desc, @spec) = @$info; + push @spec, qw(help|h); + my $lone_dash; + if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias + $lone_dash = $spec[0]; + $opt->{$spec[0]} = \(my $var); + push @spec, '' => \$var; + } + $glp->getoptionsfromarray($argv, $opt, @spec) or + return _help($client, "bad arguments or options for $cmd"); + return _help($client) if $opt->{help}; + + # "-" aliases "stdin" or "clear" + $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash; + + my $i = 0; + my $POS_ARG = '[A-Z][A-Z0-9_]+'; + my ($err, $inf); + my @args = split(/ /, $proto); + for my $var (@args) { + if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args; + $inf = defined($argv->[$i]) and last; + $var =~ s/\.\.\.\z//; + $err = "$var not supplied"; + } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i + $argv->[$i++] // ($err = "$var not supplied"); + } elsif ($var =~ /\.\.\.\]\z/) { # optional args start + $inf = 1; + last; + } elsif ($var =~ /\A\[$POS_ARG\]\z/) { # one optional arg + $i++; + } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin + my @or = split(/\|/, $var); + my $ok; + for my $o (@or) { + if ($o =~ /\A--([a-z0-9\-]+)/) { + $ok = defined($opt->{$1}); + last; + } elsif (defined($argv->[$i])) { + $ok = 1; + $i++; + last; + } # else continue looping + } + my $last = pop @or; + $err = join(', ', @or) . " or $last must be set"; + } else { + warn "BUG: can't parse `$var' in $proto"; + } + last if $err; + } + # warn "inf=$inf ".scalar(@$argv). ' '.scalar(@args)."\n"; + if (!$inf && scalar(@$argv) > scalar(@args)) { + $err //= 'too many arguments'; + } + $err ? fail($client, "usage: lei $cmd $proto\nE: $err") : 1; +} + +sub dispatch { + my ($client, $cmd, @argv) = @_; + local $SIG{__WARN__} = sub { err($client, "@_") }; + local $SIG{__DIE__} = 'DEFAULT'; + return _help($client, 'no command given') unless defined($cmd); + my $func = "lei_$cmd"; + $func =~ tr/-/_/; + if (my $cb = __PACKAGE__->can($func)) { + optparse($client, $cmd, \@argv) or return; + $cb->($client, @argv); + } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only + my $opt = {}; + $glp->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or + return _help($client, 'bad arguments or options'); + _help($client); + } else { + fail($client, "`$cmd' is not an lei command"); + } +} + +sub _lei_cfg ($;$) { + my ($client, $creat) = @_; + my $env = $client->{env}; + my $cfg_dir = File::Spec->canonpath(( $env->{XDG_CONFIG_HOME} // + ($env->{HOME} // '/nonexistent').'/.config').'/lei'); + my $f = "$cfg_dir/config"; + my @st = stat($f); + my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size + if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case + return ($client->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; + } + if (!@st) { + unless ($creat) { + delete $client->{cfg}; + return; + } + -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n"; + open my $fh, '>>', $f or die "open($f): $!\n"; + @st = stat($fh) or die "fstat($f): $!\n"; + $cur_st = pack('dd', $st[10], $st[7]); + qerr($client, "I: $f created"); + } + my $cfg = PublicInbox::Config::git_config_dump($f); + $cfg->{-st} = $cur_st; + $cfg->{'-f'} = $f; + $client->{cfg} = $PATH2CFG{$f} = $cfg; +} + +sub _lei_store ($;$) { + my ($client, $creat) = @_; + my $cfg = _lei_cfg($client, $creat); + $cfg->{-lei_store} //= do { + require PublicInbox::LeiStore; + PublicInbox::SearchIdx::load_xapian_writable(); + defined(my $dir = $cfg->{'leistore.dir'}) or return; + PublicInbox::LeiStore->new($dir, { creat => $creat }); + }; +} + +sub lei_show { + my ($client, @argv) = @_; +} + +sub lei_query { + my ($client, @argv) = @_; +} + +sub lei_mark { + my ($client, @argv) = @_; +} + +sub lei_config { + my ($client, @argv) = @_; + my $env = $client->{env}; + if (defined $env->{GIT_CONFIG}) { + my %copy = %$env; + delete $copy{GIT_CONFIG}; + $env = \%copy; + } + if (my @conflict = (grep(/\A-f=?\z/, @argv), + grep(/\A--(?:global|system| + file|config-file)=?\z/x, @argv))) { + return fail($client, "@conflict not supported by lei config"); + } + my $cfg = _lei_cfg($client, 1); + my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; + my %rdr = map { $_ => $client->{$_} } (0..2); + require PublicInbox::Import; + PublicInbox::Import::run_die($cmd, $env, \%rdr); +} + +sub lei_init { + my ($client, $dir) = @_; + my $cfg = _lei_cfg($client, 1); + my $cur = $cfg->{'leistore.dir'}; + my $env = $client->{env}; + $dir //= ( $env->{XDG_DATA_HOME} // + ($env->{HOME} // '/nonexistent').'/.local/share' + ) . '/lei/store'; + $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware + my @cur = stat($cur) if defined($cur); + $cur = File::Spec->canonpath($cur) if $cur; + my @dir = stat($dir); + my $exists = "I: leistore.dir=$cur already initialized" if @dir; + if (@cur) { + if ($cur eq $dir) { + _lei_store($client, 1)->done; + return qerr($client, $exists); + } + + # some folks like symlinks and bind mounts :P + if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") { + lei_config($client, 'leistore.dir', $dir); + _lei_store($client, 1)->done; + return qerr($client, "$exists (as $cur)"); + } + return fail($client, <<""); +E: leistore.dir=$cur already initialized and it is not $dir + + } + lei_config($client, 'leistore.dir', $dir); + _lei_store($client, 1)->done; + $exists //= "I: leistore.dir=$dir newly initialized"; + return qerr($client, $exists); +} + +sub lei_daemon_pid { emit($_[0], 1, "$$\n") } + +sub lei_daemon_stop { $quit->(0) } + +sub lei_daemon_env { + my ($client, @argv) = @_; + my $opt = $client->{opt}; + if (defined $opt->{clear}) { + %ENV = (); + } elsif (my $u = $opt->{unset}) { + delete @ENV{@$u}; + } + if (@argv) { + %ENV = (%ENV, map { split(/=/, $_, 2) } @argv); + } elsif (!defined($opt->{clear}) && !$opt->{unset}) { + my $eor = $opt->{z} ? "\0" : "\n"; + my $buf = ''; + while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" } + emit($client, 1, $buf) + } +} + +sub lei_help { _help($_[0]) } + +sub reap_exec { # dwaitpid callback + my ($client, $pid) = @_; + x_it($client, $?); +} + +sub lei_git { # support passing through random git commands + my ($client, @argv) = @_; + my %rdr = map { $_ => $client->{$_} } (0..2); + my $pid = spawn(['git', @argv], $client->{env}, \%rdr); + PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client); +} + +sub accept_dispatch { # Listener {post_accept} callback + my ($sock) = @_; # ignore other + $sock->blocking(1); + $sock->autoflush(1); + my $client = { sock => $sock }; + vec(my $rin = '', fileno($sock), 1) = 1; + # `say $sock' triggers "die" in lei(1) + for my $i (0..2) { + if (select(my $rout = $rin, undef, undef, 1)) { + my $fd = IO::FDPass::recv(fileno($sock)); + if ($fd >= 0) { + my $rdr = ($fd == 0 ? '<&=' : '>&='); + if (open(my $fh, $rdr, $fd)) { + $client->{$i} = $fh; + } else { + say $sock "open($rdr$fd) (FD=$i): $!"; + return; + } + } else { + say $sock "recv FD=$i: $!"; + return; + } + } else { + say $sock "timed out waiting to recv FD=$i"; + return; + } + } + # $ARGV_STR = join("]\0[", @ARGV); + # $ENV_STR = join('', map { "$_=$ENV{$_}\0" } keys %ENV); + # $line = "$$\0\0>$ARGV_STR\0\0>$ENV_STR\0\0"; + my ($client_pid, $argv, $env) = do { + local $/ = "\0\0\0"; # yes, 3 NULs at EOL, not 2 + chomp(my $line = <$sock>); + split(/\0\0>/, $line, 3); + }; + my %env = map { split(/=/, $_, 2) } split(/\0/, $env); + if (chdir($env{PWD})) { + $client->{env} = \%env; + $client->{pid} = $client_pid; + eval { dispatch($client, split(/\]\0\[/, $argv)) }; + say $sock $@ if $@; + } else { + say $sock "chdir($env{PWD}): $!"; # implicit close + } +} + +sub noop {} + +# lei(1) calls this when it can't connect +sub lazy_start { + my ($path, $err) = @_; + if ($err == ECONNREFUSED) { + unlink($path) or die "unlink($path): $!"; + } elsif ($err != ENOENT) { + die "connect($path): $!"; + } + require IO::FDPass; + umask(077) // die("umask(077): $!"); + my $l = IO::Socket::UNIX->new(Local => $path, + Listen => 1024, + Type => SOCK_STREAM) or + $err = $!; + $l or return die "bind($path): $err"; + my @st = stat($path) or die "stat($path): $!"; + my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino + pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; + my $oldset = PublicInbox::Sigfd::block_signals(); + my $pid = fork // die "fork: $!"; + return if $pid; + openlog($path, 'pid', 'user'); + local $SIG{__DIE__} = sub { + syslog('crit', "@_"); + exit $! if $!; + exit $? >> 8 if $? >> 8; + exit 255; + }; + local $SIG{__WARN__} = sub { syslog('warning', "@_") }; + open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; + open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; + open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; + setsid(); + $pid = fork // die "fork: $!"; + return if $pid; + $0 = "lei-daemon $path"; + local %PATH2CFG; + require PublicInbox::Listener; + require PublicInbox::EOFpipe; + $l->blocking(0); + $eof_w->blocking(0); + $eof_r->blocking(0); + my $listener = PublicInbox::Listener->new($l, \&accept_dispatch, $l); + my $exit_code; + local $quit = sub { + $exit_code //= shift; + my $tmp = $listener or exit($exit_code); + unlink($path) if defined($path); + syswrite($eof_w, '.'); + $l = $listener = $path = undef; + $tmp->close if $tmp; # DS::close + PublicInbox::DS->SetLoopTimeout(1000); + }; + PublicInbox::EOFpipe->new($eof_r, sub {}, undef); + my $sig = { + CHLD => \&PublicInbox::DS::enqueue_reap, + QUIT => $quit, + INT => $quit, + TERM => $quit, + HUP => \&noop, + USR1 => \&noop, + USR2 => \&noop, + }; + my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); + local %SIG = (%SIG, %$sig) if !$sigfd; + if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets + PublicInbox::DS->SetLoopTimeout(5000); + } else { + # wake up every second to accept signals if we don't + # have signalfd or IO::KQueue: + PublicInbox::Sigfd::sig_setmask($oldset); + PublicInbox::DS->SetLoopTimeout(1000); + } + PublicInbox::DS->SetPostLoopCallback(sub { + my ($dmap, undef) = @_; + if (@st = defined($path) ? stat($path) : ()) { + if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) { + warn "$path dev/ino changed, quitting\n"; + $path = undef; + } + } elsif (defined($path)) { + warn "stat($path): $!, quitting ...\n"; + undef $path; # don't unlink + $quit->(); + } + return 1 if defined($path); + my $now = now(); + my $n = 0; + for my $s (values %$dmap) { + $s->can('busy') or next; + if ($s->busy($now)) { + ++$n; + } else { + $s->close; + } + } + $n; # true: continue, false: stop + }); + PublicInbox::DS->EventLoop; + exit($exit_code // 0); +} + +# for users w/o IO::FDPass +sub oneshot { + my ($main_pkg) = @_; + my $exit = $main_pkg->can('exit'); # caller may override exit() + local $quit = $exit if $exit; + local %PATH2CFG; + umask(077) // die("umask(077): $!"); + dispatch({ + 0 => *STDIN{IO}, + 1 => *STDOUT{IO}, + 2 => *STDERR{IO}, + env => \%ENV + }, @ARGV); +} + +1; diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm deleted file mode 100644 index 56f4aa7d..00000000 --- a/lib/PublicInbox/LeiDaemon.pm +++ /dev/null @@ -1,692 +0,0 @@ -# Copyright (C) 2020 all contributors -# License: AGPL-3.0+ - -# Backend for `lei' (local email interface). Unlike the C10K-oriented -# PublicInbox::Daemon, this is designed exclusively to handle trusted -# local clients with read/write access to the FS and use as many -# system resources as the local user has access to. -package PublicInbox::LeiDaemon; -use strict; -use v5.10.1; -use parent qw(PublicInbox::DS); -use Getopt::Long (); -use Errno qw(EAGAIN ECONNREFUSED ENOENT); -use POSIX qw(setsid); -use IO::Socket::UNIX; -use IO::Handle (); -use Sys::Syslog qw(syslog openlog); -use PublicInbox::Config; -use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT); -use PublicInbox::Sigfd; -use PublicInbox::DS qw(now); -use PublicInbox::Spawn qw(spawn); -use Text::Wrap qw(wrap); -use File::Path qw(mkpath); -use File::Spec; -our $quit = \&CORE::exit; -my $glp = Getopt::Long::Parser->new; -$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); -our %PATH2CFG; # persistent for socket daemon - -# TBD: this is a documentation mechanism to show a subcommand -# (may) pass options through to another command: -sub pass_through { () } - -# TODO: generate shell completion + help using %CMD and %OPTDESC -# command => [ positional_args, 1-line description, Getopt::Long option spec ] -our %CMD = ( # sorted in order of importance/use: -'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( - save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a - limit|n=i sort|s=s@ reverse|r offset=i remote local! extinbox! - since|after=s until|before=s) ], - -'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', - qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), - pass_through('git show') ], - -'add-extinbox' => [ 'URL_OR_PATHNAME', - 'add/set priority of a publicinbox|extindex for extra matches', - qw(prio=i) ], -'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations', - qw(format|f=s z local remote) ], -'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}', - 'exclude further results from a publicinbox|extindex', - qw(prune) ], - -'ls-query' => [ '[FILTER...]', 'list saved search queries', - qw(name-only format|f=s z) ], -'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ], -'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ], - -'plonk' => [ '--thread|--from=IDENT', - 'exclude mail matching From: or thread from non-Message-ID searches', - qw(stdin| thread|t from|f=s mid=s oid=s) ], -'mark' => [ 'MESSAGE_FLAGS...', - 'set/unset flags on message(s) from stdin', - qw(stdin| oid=s exact by-mid|mid:s) ], -'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', - 'exclude message(s) on stdin from query results', - qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], - -'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}', - 'remove imported messages from IMAP, Maildirs, and MH', - qw(exact! all jobs:i indexed) ], - -# code repos are used for `show' to solve blobs from patch mails -'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', - qw(prio=i) ], -'ls-coderepo' => [ '[FILTER_TERMS...]', - 'list known code repos', qw(format|f=s z) ], -'forget-coderepo' => [ 'PATHNAME', - 'stop using repo to solve blobs from patches', - qw(prune) ], - -'add-watch' => [ '[URL_OR_PATHNAME]', - 'watch for new messages and flag changes', - qw(import! flags! interval=s recursive|r exclude=s include=s) ], -'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', - qw(format|f=s z) ], -'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], -'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], -'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', - qw(prune) ], - -'import' => [ '{URL_OR_PATHNAME|--stdin}', - 'one-shot import/update from URL or filesystem', - qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags), - ], - -'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config', - pass_through('git config') ], -'init' => [ '[PATHNAME]', - 'initialize storage, default: ~/.local/share/lei/store', - qw(quiet|q) ], -'daemon-stop' => [ '', 'stop the lei-daemon' ], -'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], -'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment', - qw(clear| unset|u=s@ z|0) ], -'help' => [ '[SUBCOMMAND]', 'show help' ], - -# XXX do we need this? -# 'git' => [ '[ANYTHING...]', 'git(1) wrapper', pass_through('git') ], - -'reorder-local-store-and-break-history' => [ '[REFNAME]', - 'rewrite git history in an attempt to improve compression', - 'gc!' ] -); # @CMD - -# switch descriptions, try to keep consistent across commands -# $spec: Getopt::Long option specification -# $spec => [@ALLOWED_VALUES (default is first), $description], -# $spec => $description -# "$SUB_COMMAND TAB $spec" => as above -my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo', - 'specify message input format' ]; -my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; - -my %OPTDESC = ( -'help|h' => 'show this built-in help', -'quiet|q' => 'be quiet', -'solve!' => 'do not attempt to reconstruct blobs from emails', -'save-as=s' => ['NAME', 'save a search terms by given name'], - -'type=s' => [ 'any|mid|git', 'disambiguate type' ], - -'dedupe|d=s' => ['STRAT|content|oid|mid', - 'deduplication strategy'], -'show thread|t' => 'display entire thread a message belongs to', -'query thread|t' => - 'return all messages in the same thread as the actual match(es)', -'augment|a' => 'augment --output destination instead of clobbering', - -'output|o=s' => [ 'DEST', - "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ], - -'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl', - 'message/object output format' ], -'mark format|f=s' => $stdin_formats, -'forget format|f=s' => $stdin_formats, -'query format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid', - 'specify output format, default depends on --output'], -'ls-query format|f=s' => $ls_format, -'ls-extinbox format|f=s' => $ls_format, - -'limit|n=i' => ['NUM', - 'limit on number of matches (default: 10000)' ], -'offset=i' => ['OFF', 'search result offset (default: 0)'], - -'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', - "order of results `--output'-dependent"], - -'prio=i' => 'priority of query source', - -'local' => 'limit operations to the local filesystem', -'local!' => 'exclude results from the local filesystem', -'remote' => 'limit operations to those requiring network access', -'remote!' => 'prevent operations requiring network access', - -'mid=s' => 'specify the Message-ID of a message', -'oid=s' => 'specify the git object ID of a message', - -'recursive|r' => 'scan directories/mailboxes/newsgroups recursively', -'exclude=s' => 'exclude mailboxes/newsgroups based on pattern', -'include=s' => 'include mailboxes/newsgroups based on pattern', - -'exact' => 'operate on exact header matches only', -'exact!' => 'rely on content match instead of exact header matches', - -'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], -'jobs:i' => 'set parallelism level', - -# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere? -'z' => 'use NUL \\0 instead of newline (CR) to delimit lines', -'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', - -# note: no "--ignore-environment" / "-i" support like env(1) since that -# is one-shot and this is for a persistent daemon: -'clear|' => 'clear the daemon environment', -'unset|u=s@' => ['NAME', - 'unset matching NAME, may be specified multiple times'], -); # %OPTDESC - -sub x_it ($$) { # pronounced "exit" - my ($client, $code) = @_; - if (my $sig = ($code & 127)) { - kill($sig, $client->{pid} // $$); - } else { - $code >>= 8; - if (my $sock = $client->{sock}) { - say $sock "exit=$code"; - } else { # for oneshot - $quit->($code); - } - } -} - -sub emit { - my ($client, $channel) = @_; # $buf = $_[2] - print { $client->{$channel} } $_[2] or die "print FD[$channel]: $!"; -} - -sub err { - my ($client, $buf) = @_; - $buf .= "\n" unless $buf =~ /\n\z/s; - emit($client, 2, $buf); -} - -sub qerr { $_[0]->{opt}->{quiet} or err(@_) } - -sub fail ($$;$) { - my ($client, $buf, $exit_code) = @_; - err($client, $buf); - x_it($client, ($exit_code // 1) << 8); - undef; -} - -sub _help ($;$) { - my ($client, $errmsg) = @_; - my $cmd = $client->{cmd} // 'COMMAND'; - my @info = @{$CMD{$cmd} // [ '...', '...' ]}; - my @top = ($cmd, shift(@info) // ()); - my $cmd_desc = shift(@info); - my @opt_desc; - my $lpad = 2; - for my $sw (@info) { # qw(prio=s - my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; - my $arg_vals = ''; - ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; - - # lower-case is a keyword (e.g. `content', `oid'), - # ALL_CAPS is a string description (e.g. `PATH') - if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { - $desc .= "\ndefault: `$1'"; - } - my (@vals, @s, @l); - my $x = $sw; - if ($x =~ s/!\z//) { # solve! => --no-solve - $x = "no-$x"; - } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" - @vals = (' [', undef, ']'); - } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" - @vals = (' ', undef); - } # else: no args $x = 'thread|t' - for (split(/\|/, $x)) { # help|h - length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); - } - if (!scalar(@vals)) { # no args 'thread|t' - } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" - $vals[1] = $1; - } else { - $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" - } - if ($arg_vals =~ /([,\|])/) { - my $sep = $1; - my @allow = split(/\Q$sep\E/, $arg_vals); - my $must = $sep eq '|' ? 'Must' : 'Can'; - @allow = map { "`$_'" } @allow; - my $last = pop @allow; - $desc .= "\n$must be one of: " . - join(', ', @allow) . " or $last"; - } - my $lhs = join(', ', @s, @l) . join('', @vals); - if ($x =~ /\|\z/) { # "stdin|" or "clear|" - $lhs =~ s/\A--/- , --/; - } else { - $lhs =~ s/\A--/ --/; # pad if no short options - } - $lpad = length($lhs) if length($lhs) > $lpad; - push @opt_desc, $lhs, $desc; - } - my $msg = $errmsg ? "E: $errmsg\n" : ''; - $msg .= < failure - undef; -} - -sub optparse ($$$) { - my ($client, $cmd, $argv) = @_; - $client->{cmd} = $cmd; - my $opt = $client->{opt} = {}; - my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ]; - my ($proto, $desc, @spec) = @$info; - push @spec, qw(help|h); - my $lone_dash; - if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias - $lone_dash = $spec[0]; - $opt->{$spec[0]} = \(my $var); - push @spec, '' => \$var; - } - $glp->getoptionsfromarray($argv, $opt, @spec) or - return _help($client, "bad arguments or options for $cmd"); - return _help($client) if $opt->{help}; - - # "-" aliases "stdin" or "clear" - $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash; - - my $i = 0; - my $POS_ARG = '[A-Z][A-Z0-9_]+'; - my ($err, $inf); - my @args = split(/ /, $proto); - for my $var (@args) { - if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args; - $inf = defined($argv->[$i]) and last; - $var =~ s/\.\.\.\z//; - $err = "$var not supplied"; - } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i - $argv->[$i++] // ($err = "$var not supplied"); - } elsif ($var =~ /\.\.\.\]\z/) { # optional args start - $inf = 1; - last; - } elsif ($var =~ /\A\[$POS_ARG\]\z/) { # one optional arg - $i++; - } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin - my @or = split(/\|/, $var); - my $ok; - for my $o (@or) { - if ($o =~ /\A--([a-z0-9\-]+)/) { - $ok = defined($opt->{$1}); - last; - } elsif (defined($argv->[$i])) { - $ok = 1; - $i++; - last; - } # else continue looping - } - my $last = pop @or; - $err = join(', ', @or) . " or $last must be set"; - } else { - warn "BUG: can't parse `$var' in $proto"; - } - last if $err; - } - # warn "inf=$inf ".scalar(@$argv). ' '.scalar(@args)."\n"; - if (!$inf && scalar(@$argv) > scalar(@args)) { - $err //= 'too many arguments'; - } - $err ? fail($client, "usage: lei $cmd $proto\nE: $err") : 1; -} - -sub dispatch { - my ($client, $cmd, @argv) = @_; - local $SIG{__WARN__} = sub { err($client, "@_") }; - local $SIG{__DIE__} = 'DEFAULT'; - return _help($client, 'no command given') unless defined($cmd); - my $func = "lei_$cmd"; - $func =~ tr/-/_/; - if (my $cb = __PACKAGE__->can($func)) { - optparse($client, $cmd, \@argv) or return; - $cb->($client, @argv); - } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only - my $opt = {}; - $glp->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or - return _help($client, 'bad arguments or options'); - _help($client); - } else { - fail($client, "`$cmd' is not an lei command"); - } -} - -sub _lei_cfg ($;$) { - my ($client, $creat) = @_; - my $env = $client->{env}; - my $cfg_dir = File::Spec->canonpath(( $env->{XDG_CONFIG_HOME} // - ($env->{HOME} // '/nonexistent').'/.config').'/lei'); - my $f = "$cfg_dir/config"; - my @st = stat($f); - my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size - if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case - return ($client->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; - } - if (!@st) { - unless ($creat) { - delete $client->{cfg}; - return; - } - -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n"; - open my $fh, '>>', $f or die "open($f): $!\n"; - @st = stat($fh) or die "fstat($f): $!\n"; - $cur_st = pack('dd', $st[10], $st[7]); - qerr($client, "I: $f created"); - } - my $cfg = PublicInbox::Config::git_config_dump($f); - $cfg->{-st} = $cur_st; - $cfg->{'-f'} = $f; - $client->{cfg} = $PATH2CFG{$f} = $cfg; -} - -sub _lei_store ($;$) { - my ($client, $creat) = @_; - my $cfg = _lei_cfg($client, $creat); - $cfg->{-lei_store} //= do { - require PublicInbox::LeiStore; - PublicInbox::SearchIdx::load_xapian_writable(); - defined(my $dir = $cfg->{'leistore.dir'}) or return; - PublicInbox::LeiStore->new($dir, { creat => $creat }); - }; -} - -sub lei_show { - my ($client, @argv) = @_; -} - -sub lei_query { - my ($client, @argv) = @_; -} - -sub lei_mark { - my ($client, @argv) = @_; -} - -sub lei_config { - my ($client, @argv) = @_; - my $env = $client->{env}; - if (defined $env->{GIT_CONFIG}) { - my %copy = %$env; - delete $copy{GIT_CONFIG}; - $env = \%copy; - } - if (my @conflict = (grep(/\A-f=?\z/, @argv), - grep(/\A--(?:global|system| - file|config-file)=?\z/x, @argv))) { - return fail($client, "@conflict not supported by lei config"); - } - my $cfg = _lei_cfg($client, 1); - my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; - my %rdr = map { $_ => $client->{$_} } (0..2); - require PublicInbox::Import; - PublicInbox::Import::run_die($cmd, $env, \%rdr); -} - -sub lei_init { - my ($client, $dir) = @_; - my $cfg = _lei_cfg($client, 1); - my $cur = $cfg->{'leistore.dir'}; - my $env = $client->{env}; - $dir //= ( $env->{XDG_DATA_HOME} // - ($env->{HOME} // '/nonexistent').'/.local/share' - ) . '/lei/store'; - $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware - my @cur = stat($cur) if defined($cur); - $cur = File::Spec->canonpath($cur) if $cur; - my @dir = stat($dir); - my $exists = "I: leistore.dir=$cur already initialized" if @dir; - if (@cur) { - if ($cur eq $dir) { - _lei_store($client, 1)->done; - return qerr($client, $exists); - } - - # some folks like symlinks and bind mounts :P - if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") { - lei_config($client, 'leistore.dir', $dir); - _lei_store($client, 1)->done; - return qerr($client, "$exists (as $cur)"); - } - return fail($client, <<""); -E: leistore.dir=$cur already initialized and it is not $dir - - } - lei_config($client, 'leistore.dir', $dir); - _lei_store($client, 1)->done; - $exists //= "I: leistore.dir=$dir newly initialized"; - return qerr($client, $exists); -} - -sub lei_daemon_pid { emit($_[0], 1, "$$\n") } - -sub lei_daemon_stop { $quit->(0) } - -sub lei_daemon_env { - my ($client, @argv) = @_; - my $opt = $client->{opt}; - if (defined $opt->{clear}) { - %ENV = (); - } elsif (my $u = $opt->{unset}) { - delete @ENV{@$u}; - } - if (@argv) { - %ENV = (%ENV, map { split(/=/, $_, 2) } @argv); - } elsif (!defined($opt->{clear}) && !$opt->{unset}) { - my $eor = $opt->{z} ? "\0" : "\n"; - my $buf = ''; - while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" } - emit($client, 1, $buf) - } -} - -sub lei_help { _help($_[0]) } - -sub reap_exec { # dwaitpid callback - my ($client, $pid) = @_; - x_it($client, $?); -} - -sub lei_git { # support passing through random git commands - my ($client, @argv) = @_; - my %rdr = map { $_ => $client->{$_} } (0..2); - my $pid = spawn(['git', @argv], $client->{env}, \%rdr); - PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client); -} - -sub accept_dispatch { # Listener {post_accept} callback - my ($sock) = @_; # ignore other - $sock->blocking(1); - $sock->autoflush(1); - my $client = { sock => $sock }; - vec(my $rin = '', fileno($sock), 1) = 1; - # `say $sock' triggers "die" in lei(1) - for my $i (0..2) { - if (select(my $rout = $rin, undef, undef, 1)) { - my $fd = IO::FDPass::recv(fileno($sock)); - if ($fd >= 0) { - my $rdr = ($fd == 0 ? '<&=' : '>&='); - if (open(my $fh, $rdr, $fd)) { - $client->{$i} = $fh; - } else { - say $sock "open($rdr$fd) (FD=$i): $!"; - return; - } - } else { - say $sock "recv FD=$i: $!"; - return; - } - } else { - say $sock "timed out waiting to recv FD=$i"; - return; - } - } - # $ARGV_STR = join("]\0[", @ARGV); - # $ENV_STR = join('', map { "$_=$ENV{$_}\0" } keys %ENV); - # $line = "$$\0\0>$ARGV_STR\0\0>$ENV_STR\0\0"; - my ($client_pid, $argv, $env) = do { - local $/ = "\0\0\0"; # yes, 3 NULs at EOL, not 2 - chomp(my $line = <$sock>); - split(/\0\0>/, $line, 3); - }; - my %env = map { split(/=/, $_, 2) } split(/\0/, $env); - if (chdir($env{PWD})) { - $client->{env} = \%env; - $client->{pid} = $client_pid; - eval { dispatch($client, split(/\]\0\[/, $argv)) }; - say $sock $@ if $@; - } else { - say $sock "chdir($env{PWD}): $!"; # implicit close - } -} - -sub noop {} - -# lei(1) calls this when it can't connect -sub lazy_start { - my ($path, $err) = @_; - if ($err == ECONNREFUSED) { - unlink($path) or die "unlink($path): $!"; - } elsif ($err != ENOENT) { - die "connect($path): $!"; - } - require IO::FDPass; - umask(077) // die("umask(077): $!"); - my $l = IO::Socket::UNIX->new(Local => $path, - Listen => 1024, - Type => SOCK_STREAM) or - $err = $!; - $l or return die "bind($path): $err"; - my @st = stat($path) or die "stat($path): $!"; - my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino - pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; - my $oldset = PublicInbox::Sigfd::block_signals(); - my $pid = fork // die "fork: $!"; - return if $pid; - openlog($path, 'pid', 'user'); - local $SIG{__DIE__} = sub { - syslog('crit', "@_"); - exit $! if $!; - exit $? >> 8 if $? >> 8; - exit 255; - }; - local $SIG{__WARN__} = sub { syslog('warning', "@_") }; - open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; - open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; - open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; - setsid(); - $pid = fork // die "fork: $!"; - return if $pid; - $0 = "lei-daemon $path"; - local %PATH2CFG; - require PublicInbox::Listener; - require PublicInbox::EOFpipe; - $l->blocking(0); - $eof_w->blocking(0); - $eof_r->blocking(0); - my $listener = PublicInbox::Listener->new($l, \&accept_dispatch, $l); - my $exit_code; - local $quit = sub { - $exit_code //= shift; - my $tmp = $listener or exit($exit_code); - unlink($path) if defined($path); - syswrite($eof_w, '.'); - $l = $listener = $path = undef; - $tmp->close if $tmp; # DS::close - PublicInbox::DS->SetLoopTimeout(1000); - }; - PublicInbox::EOFpipe->new($eof_r, sub {}, undef); - my $sig = { - CHLD => \&PublicInbox::DS::enqueue_reap, - QUIT => $quit, - INT => $quit, - TERM => $quit, - HUP => \&noop, - USR1 => \&noop, - USR2 => \&noop, - }; - my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); - local %SIG = (%SIG, %$sig) if !$sigfd; - if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets - PublicInbox::DS->SetLoopTimeout(5000); - } else { - # wake up every second to accept signals if we don't - # have signalfd or IO::KQueue: - PublicInbox::Sigfd::sig_setmask($oldset); - PublicInbox::DS->SetLoopTimeout(1000); - } - PublicInbox::DS->SetPostLoopCallback(sub { - my ($dmap, undef) = @_; - if (@st = defined($path) ? stat($path) : ()) { - if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) { - warn "$path dev/ino changed, quitting\n"; - $path = undef; - } - } elsif (defined($path)) { - warn "stat($path): $!, quitting ...\n"; - undef $path; # don't unlink - $quit->(); - } - return 1 if defined($path); - my $now = now(); - my $n = 0; - for my $s (values %$dmap) { - $s->can('busy') or next; - if ($s->busy($now)) { - ++$n; - } else { - $s->close; - } - } - $n; # true: continue, false: stop - }); - PublicInbox::DS->EventLoop; - exit($exit_code // 0); -} - -# for users w/o IO::FDPass -sub oneshot { - my ($main_pkg) = @_; - my $exit = $main_pkg->can('exit'); # caller may override exit() - local $quit = $exit if $exit; - local %PATH2CFG; - umask(077) // die("umask(077): $!"); - dispatch({ - 0 => *STDIN{IO}, - 1 => *STDOUT{IO}, - 2 => *STDERR{IO}, - env => \%ENV - }, @ARGV); -} - -1; -- cgit v1.2.3-24-ge0c7 From 6fd9985615542a4d39569956c9de0b9e99b76ab8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 05:16:16 +0000 Subject: lei: support pass-through for `lei config' This will be a handy wrapper for "git config" for manipulating ~/.config/lei/config. Since we'll have many commands, start breaking up t/lei.t into more distinct sections for ease-of-testing. --- lib/PublicInbox/LEI.pm | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index b5ba1f71..dbd2875d 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -24,13 +24,16 @@ use Text::Wrap qw(wrap); use File::Path qw(mkpath); use File::Spec; our $quit = \&CORE::exit; -my $glp = Getopt::Long::Parser->new; -$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); +my $GLP = Getopt::Long::Parser->new; +$GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev)); +my $GLP_PASS = Getopt::Long::Parser->new; +$GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through)); + our %PATH2CFG; # persistent for socket daemon # TBD: this is a documentation mechanism to show a subcommand # (may) pass options through to another command: -sub pass_through { () } +sub pass_through { $GLP_PASS } # TODO: generate shell completion + help using %CMD and %OPTDESC # command => [ positional_args, 1-line description, Getopt::Long option spec ] @@ -97,7 +100,8 @@ our %CMD = ( # sorted in order of importance/use: ], 'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config', - pass_through('git config') ], + qw(config-file|system|global|file|f=s), # conflict detection + pass_through('git config') ], 'init' => [ '[PATHNAME]', 'initialize storage, default: ~/.local/share/lei/store', qw(quiet|q) ], @@ -231,7 +235,7 @@ sub _help ($;$) { my $cmd_desc = shift(@info); my @opt_desc; my $lpad = 2; - for my $sw (@info) { # qw(prio=s + for my $sw (grep { !ref($_) } @info) { # ("prio=s", "z", $GLP_PASS) my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; my $arg_vals = ''; ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; @@ -305,6 +309,7 @@ sub optparse ($$$) { my $opt = $client->{opt} = {}; my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ]; my ($proto, $desc, @spec) = @$info; + my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS push @spec, qw(help|h); my $lone_dash; if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias @@ -374,7 +379,7 @@ sub dispatch { $cb->($client, @argv); } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only my $opt = {}; - $glp->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or + $GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or return _help($client, 'bad arguments or options'); _help($client); } else { @@ -402,7 +407,7 @@ sub _lei_cfg ($;$) { open my $fh, '>>', $f or die "open($f): $!\n"; @st = stat($fh) or die "fstat($f): $!\n"; $cur_st = pack('dd', $st[10], $st[7]); - qerr($client, "I: $f created"); + qerr($client, "I: $f created") if $client->{cmd} ne 'config'; } my $cfg = PublicInbox::Config::git_config_dump($f); $cfg->{-st} = $cur_st; @@ -435,17 +440,10 @@ sub lei_mark { sub lei_config { my ($client, @argv) = @_; + $client->{opt}->{'config-file'} and return fail $client, + "config file switches not supported by `lei config'"; my $env = $client->{env}; - if (defined $env->{GIT_CONFIG}) { - my %copy = %$env; - delete $copy{GIT_CONFIG}; - $env = \%copy; - } - if (my @conflict = (grep(/\A-f=?\z/, @argv), - grep(/\A--(?:global|system| - file|config-file)=?\z/x, @argv))) { - return fail($client, "@conflict not supported by lei config"); - } + delete local $env->{GIT_CONFIG}; my $cfg = _lei_cfg($client, 1); my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; my %rdr = map { $_ => $client->{$_} } (0..2); -- cgit v1.2.3-24-ge0c7 From 553cb0506c798bc27494294107a0d9e45d5011f5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 05:47:57 +0000 Subject: lei: help: show actual paths being operated on This allows us to respect XDG_* environment variables to override HOME. We'll also make the $lei wrapper easier-to-use by auto-clearing $out/$err and reducing [] needed for common cases. --- lib/PublicInbox/LEI.pm | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index dbd2875d..667ef765 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -35,6 +35,20 @@ our %PATH2CFG; # persistent for socket daemon # (may) pass options through to another command: sub pass_through { $GLP_PASS } +sub _store_path ($) { + my ($env) = @_; + File::Spec->rel2abs(($env->{XDG_DATA_HOME} // + ($env->{HOME} // '/nonexistent').'/.local/share') + .'/lei/store', $env->{PWD}); +} + +sub _config_path ($) { + my ($env) = @_; + File::Spec->rel2abs(($env->{XDG_CONFIG_HOME} // + ($env->{HOME} // '/nonexistent').'/.config') + .'/lei/config', $env->{PWD}); +} + # TODO: generate shell completion + help using %CMD and %OPTDESC # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: @@ -99,12 +113,13 @@ our %CMD = ( # sorted in order of importance/use: qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags), ], -'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config', - qw(config-file|system|global|file|f=s), # conflict detection +'config' => [ '[...]', sub { + 'git-config(1) wrapper for '._config_path($_[0]); + }, qw(config-file|system|global|file|f=s), # for conflict detection pass_through('git config') ], -'init' => [ '[PATHNAME]', - 'initialize storage, default: ~/.local/share/lei/store', - qw(quiet|q) ], +'init' => [ '[PATHNAME]', sub { + 'initialize storage, default: '._store_path($_[0]); + }, qw(quiet|q) ], 'daemon-stop' => [ '', 'stop the lei-daemon' ], 'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], 'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment', @@ -233,9 +248,10 @@ sub _help ($;$) { my @info = @{$CMD{$cmd} // [ '...', '...' ]}; my @top = ($cmd, shift(@info) // ()); my $cmd_desc = shift(@info); + $cmd_desc = $cmd_desc->($client->{env}) if ref($cmd_desc) eq 'CODE'; my @opt_desc; my $lpad = 2; - for my $sw (grep { !ref($_) } @info) { # ("prio=s", "z", $GLP_PASS) + for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS) my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; my $arg_vals = ''; ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; @@ -307,8 +323,8 @@ sub optparse ($$$) { my ($client, $cmd, $argv) = @_; $client->{cmd} = $cmd; my $opt = $client->{opt} = {}; - my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ]; - my ($proto, $desc, @spec) = @$info; + my $info = $CMD{$cmd} // [ '[...]' ]; + my ($proto, undef, @spec) = @$info; my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS push @spec, qw(help|h); my $lone_dash; @@ -389,10 +405,7 @@ sub dispatch { sub _lei_cfg ($;$) { my ($client, $creat) = @_; - my $env = $client->{env}; - my $cfg_dir = File::Spec->canonpath(( $env->{XDG_CONFIG_HOME} // - ($env->{HOME} // '/nonexistent').'/.config').'/lei'); - my $f = "$cfg_dir/config"; + my $f = _config_path($client->{env}); my @st = stat($f); my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case @@ -403,6 +416,7 @@ sub _lei_cfg ($;$) { delete $client->{cfg}; return; } + my (undef, $cfg_dir, undef) = File::Spec->splitpath($f); -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n"; open my $fh, '>>', $f or die "open($f): $!\n"; @st = stat($fh) or die "fstat($f): $!\n"; @@ -456,9 +470,7 @@ sub lei_init { my $cfg = _lei_cfg($client, 1); my $cur = $cfg->{'leistore.dir'}; my $env = $client->{env}; - $dir //= ( $env->{XDG_DATA_HOME} // - ($env->{HOME} // '/nonexistent').'/.local/share' - ) . '/lei/store'; + $dir //= _store_path($env); $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware my @cur = stat($cur) if defined($cur); $cur = File::Spec->canonpath($cur) if $cur; -- cgit v1.2.3-24-ge0c7 From 193e86164403dc0d43e7dc44ee68272897727f81 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 06:20:17 +0000 Subject: lei: rename $client => $self and bless lei will get bigger, so follow existing OO conventions to make it easy to call methods in PublicInbox::LEI from other packages. --- lib/PublicInbox/LEI.pm | 146 ++++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 73 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 667ef765..f5824c59 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -209,12 +209,12 @@ my %OPTDESC = ( ); # %OPTDESC sub x_it ($$) { # pronounced "exit" - my ($client, $code) = @_; + my ($self, $code) = @_; if (my $sig = ($code & 127)) { - kill($sig, $client->{pid} // $$); + kill($sig, $self->{pid} // $$); } else { $code >>= 8; - if (my $sock = $client->{sock}) { + if (my $sock = $self->{sock}) { say $sock "exit=$code"; } else { # for oneshot $quit->($code); @@ -223,32 +223,32 @@ sub x_it ($$) { # pronounced "exit" } sub emit { - my ($client, $channel) = @_; # $buf = $_[2] - print { $client->{$channel} } $_[2] or die "print FD[$channel]: $!"; + my ($self, $channel) = @_; # $buf = $_[2] + print { $self->{$channel} } $_[2] or die "print FD[$channel]: $!"; } sub err { - my ($client, $buf) = @_; + my ($self, $buf) = @_; $buf .= "\n" unless $buf =~ /\n\z/s; - emit($client, 2, $buf); + emit($self, 2, $buf); } sub qerr { $_[0]->{opt}->{quiet} or err(@_) } sub fail ($$;$) { - my ($client, $buf, $exit_code) = @_; - err($client, $buf); - x_it($client, ($exit_code // 1) << 8); + my ($self, $buf, $exit_code) = @_; + err($self, $buf); + x_it($self, ($exit_code // 1) << 8); undef; } sub _help ($;$) { - my ($client, $errmsg) = @_; - my $cmd = $client->{cmd} // 'COMMAND'; + my ($self, $errmsg) = @_; + my $cmd = $self->{cmd} // 'COMMAND'; my @info = @{$CMD{$cmd} // [ '...', '...' ]}; my @top = ($cmd, shift(@info) // ()); my $cmd_desc = shift(@info); - $cmd_desc = $cmd_desc->($client->{env}) if ref($cmd_desc) eq 'CODE'; + $cmd_desc = $cmd_desc->($self->{env}) if ref($cmd_desc) eq 'CODE'; my @opt_desc; my $lpad = 2; for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS) @@ -314,15 +314,15 @@ EOF $msg .= "\n"; } my $channel = $errmsg ? 2 : 1; - emit($client, $channel, $msg); - x_it($client, $errmsg ? 1 << 8 : 0); # stderr => failure + emit($self, $channel, $msg); + x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure undef; } sub optparse ($$$) { - my ($client, $cmd, $argv) = @_; - $client->{cmd} = $cmd; - my $opt = $client->{opt} = {}; + my ($self, $cmd, $argv) = @_; + $self->{cmd} = $cmd; + my $opt = $self->{opt} = {}; my $info = $CMD{$cmd} // [ '[...]' ]; my ($proto, undef, @spec) = @$info; my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS @@ -334,8 +334,8 @@ sub optparse ($$$) { push @spec, '' => \$var; } $glp->getoptionsfromarray($argv, $opt, @spec) or - return _help($client, "bad arguments or options for $cmd"); - return _help($client) if $opt->{help}; + return _help($self, "bad arguments or options for $cmd"); + return _help($self) if $opt->{help}; # "-" aliases "stdin" or "clear" $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash; @@ -380,40 +380,40 @@ sub optparse ($$$) { if (!$inf && scalar(@$argv) > scalar(@args)) { $err //= 'too many arguments'; } - $err ? fail($client, "usage: lei $cmd $proto\nE: $err") : 1; + $err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1; } sub dispatch { - my ($client, $cmd, @argv) = @_; - local $SIG{__WARN__} = sub { err($client, "@_") }; + my ($self, $cmd, @argv) = @_; + local $SIG{__WARN__} = sub { err($self, "@_") }; local $SIG{__DIE__} = 'DEFAULT'; - return _help($client, 'no command given') unless defined($cmd); + return _help($self, 'no command given') unless defined($cmd); my $func = "lei_$cmd"; $func =~ tr/-/_/; if (my $cb = __PACKAGE__->can($func)) { - optparse($client, $cmd, \@argv) or return; - $cb->($client, @argv); + optparse($self, $cmd, \@argv) or return; + $cb->($self, @argv); } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only my $opt = {}; $GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or - return _help($client, 'bad arguments or options'); - _help($client); + return _help($self, 'bad arguments or options'); + _help($self); } else { - fail($client, "`$cmd' is not an lei command"); + fail($self, "`$cmd' is not an lei command"); } } sub _lei_cfg ($;$) { - my ($client, $creat) = @_; - my $f = _config_path($client->{env}); + my ($self, $creat) = @_; + my $f = _config_path($self->{env}); my @st = stat($f); my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case - return ($client->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; + return ($self->{cfg} = $cfg) if $cur_st eq $cfg->{-st}; } if (!@st) { unless ($creat) { - delete $client->{cfg}; + delete $self->{cfg}; return; } my (undef, $cfg_dir, undef) = File::Spec->splitpath($f); @@ -421,17 +421,17 @@ sub _lei_cfg ($;$) { open my $fh, '>>', $f or die "open($f): $!\n"; @st = stat($fh) or die "fstat($f): $!\n"; $cur_st = pack('dd', $st[10], $st[7]); - qerr($client, "I: $f created") if $client->{cmd} ne 'config'; + qerr($self, "I: $f created") if $self->{cmd} ne 'config'; } my $cfg = PublicInbox::Config::git_config_dump($f); $cfg->{-st} = $cur_st; $cfg->{'-f'} = $f; - $client->{cfg} = $PATH2CFG{$f} = $cfg; + $self->{cfg} = $PATH2CFG{$f} = $cfg; } sub _lei_store ($;$) { - my ($client, $creat) = @_; - my $cfg = _lei_cfg($client, $creat); + my ($self, $creat) = @_; + my $cfg = _lei_cfg($self, $creat); $cfg->{-lei_store} //= do { require PublicInbox::LeiStore; PublicInbox::SearchIdx::load_xapian_writable(); @@ -441,35 +441,35 @@ sub _lei_store ($;$) { } sub lei_show { - my ($client, @argv) = @_; + my ($self, @argv) = @_; } sub lei_query { - my ($client, @argv) = @_; + my ($self, @argv) = @_; } sub lei_mark { - my ($client, @argv) = @_; + my ($self, @argv) = @_; } sub lei_config { - my ($client, @argv) = @_; - $client->{opt}->{'config-file'} and return fail $client, + my ($self, @argv) = @_; + $self->{opt}->{'config-file'} and return fail $self, "config file switches not supported by `lei config'"; - my $env = $client->{env}; + my $env = $self->{env}; delete local $env->{GIT_CONFIG}; - my $cfg = _lei_cfg($client, 1); + my $cfg = _lei_cfg($self, 1); my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; - my %rdr = map { $_ => $client->{$_} } (0..2); + my %rdr = map { $_ => $self->{$_} } (0..2); require PublicInbox::Import; PublicInbox::Import::run_die($cmd, $env, \%rdr); } sub lei_init { - my ($client, $dir) = @_; - my $cfg = _lei_cfg($client, 1); + my ($self, $dir) = @_; + my $cfg = _lei_cfg($self, 1); my $cur = $cfg->{'leistore.dir'}; - my $env = $client->{env}; + my $env = $self->{env}; $dir //= _store_path($env); $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware my @cur = stat($cur) if defined($cur); @@ -478,24 +478,24 @@ sub lei_init { my $exists = "I: leistore.dir=$cur already initialized" if @dir; if (@cur) { if ($cur eq $dir) { - _lei_store($client, 1)->done; - return qerr($client, $exists); + _lei_store($self, 1)->done; + return qerr($self, $exists); } # some folks like symlinks and bind mounts :P if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") { - lei_config($client, 'leistore.dir', $dir); - _lei_store($client, 1)->done; - return qerr($client, "$exists (as $cur)"); + lei_config($self, 'leistore.dir', $dir); + _lei_store($self, 1)->done; + return qerr($self, "$exists (as $cur)"); } - return fail($client, <<""); + return fail($self, <<""); E: leistore.dir=$cur already initialized and it is not $dir } - lei_config($client, 'leistore.dir', $dir); - _lei_store($client, 1)->done; + lei_config($self, 'leistore.dir', $dir); + _lei_store($self, 1)->done; $exists //= "I: leistore.dir=$dir newly initialized"; - return qerr($client, $exists); + return qerr($self, $exists); } sub lei_daemon_pid { emit($_[0], 1, "$$\n") } @@ -503,8 +503,8 @@ sub lei_daemon_pid { emit($_[0], 1, "$$\n") } sub lei_daemon_stop { $quit->(0) } sub lei_daemon_env { - my ($client, @argv) = @_; - my $opt = $client->{opt}; + my ($self, @argv) = @_; + my $opt = $self->{opt}; if (defined $opt->{clear}) { %ENV = (); } elsif (my $u = $opt->{unset}) { @@ -516,29 +516,29 @@ sub lei_daemon_env { my $eor = $opt->{z} ? "\0" : "\n"; my $buf = ''; while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" } - emit($client, 1, $buf) + emit($self, 1, $buf) } } sub lei_help { _help($_[0]) } sub reap_exec { # dwaitpid callback - my ($client, $pid) = @_; - x_it($client, $?); + my ($self, $pid) = @_; + x_it($self, $?); } sub lei_git { # support passing through random git commands - my ($client, @argv) = @_; - my %rdr = map { $_ => $client->{$_} } (0..2); - my $pid = spawn(['git', @argv], $client->{env}, \%rdr); - PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client); + my ($self, @argv) = @_; + my %rdr = map { $_ => $self->{$_} } (0..2); + my $pid = spawn(['git', @argv], $self->{env}, \%rdr); + PublicInbox::DS::dwaitpid($pid, \&reap_exec, $self); } sub accept_dispatch { # Listener {post_accept} callback my ($sock) = @_; # ignore other $sock->blocking(1); $sock->autoflush(1); - my $client = { sock => $sock }; + my $self = bless { sock => $sock }, __PACKAGE__; vec(my $rin = '', fileno($sock), 1) = 1; # `say $sock' triggers "die" in lei(1) for my $i (0..2) { @@ -547,7 +547,7 @@ sub accept_dispatch { # Listener {post_accept} callback if ($fd >= 0) { my $rdr = ($fd == 0 ? '<&=' : '>&='); if (open(my $fh, $rdr, $fd)) { - $client->{$i} = $fh; + $self->{$i} = $fh; } else { say $sock "open($rdr$fd) (FD=$i): $!"; return; @@ -571,9 +571,9 @@ sub accept_dispatch { # Listener {post_accept} callback }; my %env = map { split(/=/, $_, 2) } split(/\0/, $env); if (chdir($env{PWD})) { - $client->{env} = \%env; - $client->{pid} = $client_pid; - eval { dispatch($client, split(/\]\0\[/, $argv)) }; + $self->{env} = \%env; + $self->{pid} = $client_pid; + eval { dispatch($self, split(/\]\0\[/, $argv)) }; say $sock $@ if $@; } else { say $sock "chdir($env{PWD}): $!"; # implicit close @@ -691,12 +691,12 @@ sub oneshot { local $quit = $exit if $exit; local %PATH2CFG; umask(077) // die("umask(077): $!"); - dispatch({ + dispatch((bless { 0 => *STDIN{IO}, 1 => *STDOUT{IO}, 2 => *STDERR{IO}, env => \%ENV - }, @ARGV); + }, __PACKAGE__), @ARGV); } 1; -- cgit v1.2.3-24-ge0c7 From 50ce71e698e038e643d81d9f5948e002384b5898 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 06:54:41 +0000 Subject: lei: micro-optimize startup time We'll use lower-level Socket and avoid IO::Socket::UNIX, use Cwd::fastcwd(*), avoid IO::Handle->autoflush by using the select operator, and reuse buffer for reading the socket while avoiding unnecessary $/ localization in a tiny script. All these things adds up to ~5-10 ms savings on my loaded system. (*) caveats about fastcwd won't apply since lei won't work in removed directories. --- lib/PublicInbox/LEI.pm | 13 ++++++------- lib/PublicInbox/TestCommon.pm | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index f5824c59..5399fade 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -10,9 +10,9 @@ use strict; use v5.10.1; use parent qw(PublicInbox::DS); use Getopt::Long (); +use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un); use Errno qw(EAGAIN ECONNREFUSED ENOENT); use POSIX qw(setsid); -use IO::Socket::UNIX; use IO::Handle (); use Sys::Syslog qw(syslog openlog); use PublicInbox::Config; @@ -585,18 +585,17 @@ sub noop {} # lei(1) calls this when it can't connect sub lazy_start { my ($path, $err) = @_; + require IO::FDPass; # require this early so caller sees it if ($err == ECONNREFUSED) { unlink($path) or die "unlink($path): $!"; } elsif ($err != ENOENT) { + $! = $err; # allow interpolation to stringify in die die "connect($path): $!"; } - require IO::FDPass; umask(077) // die("umask(077): $!"); - my $l = IO::Socket::UNIX->new(Local => $path, - Listen => 1024, - Type => SOCK_STREAM) or - $err = $!; - $l or return die "bind($path): $err"; + socket(my $l, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!"; + bind($l, pack_sockaddr_un($path)) or die "bind($path): $!"; + listen($l, 1024) or die "listen $!"; my @st = stat($path) or die "stat($path): $!"; my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index c236c589..338e760c 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -261,6 +261,7 @@ sub run_script ($;$$) { my $orig_io = _prepare_redirects($fhref); _run_sub($sub, $key, \@argv); _undo_redirects($orig_io); + select STDOUT; } # slurp the redirects back into user-supplied strings -- cgit v1.2.3-24-ge0c7 From 2b3e0878f79efcfce1f572f5f57eb51dc3a0b370 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 08:13:16 +0000 Subject: lei_store: relax GIT_COMMITTER_IDENT check It's pretty meaningless, since probably nobody notices committer info we extract author info from individual emails, anyways. --- lib/PublicInbox/LeiStore.pm | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index d3667d29..c95df785 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -52,6 +52,14 @@ sub git_epoch_max { } } +sub git_ident ($) { + my ($git) = @_; + chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); + warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; + $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) : + ('lei user', 'x@example.com') +} + sub importer { my ($self) = @_; my $max; @@ -79,10 +87,7 @@ sub importer { $max++; next; } - chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); - die "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; - my ($n, $e) = ($i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/g) - or die "could not extract name/email from `$i'\n"; + my ($n, $e) = git_ident($git); $self->{im} = $im = PublicInbox::Import->new($git, $n, $e); $im->{bytes_added} = int($packed_bytes / $self->packing_factor); $im->{lock_path} = undef; -- cgit v1.2.3-24-ge0c7 From 68fea5b055787c65f0e7164cbd5463f140382ea9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 09:20:29 +0000 Subject: lei_store: keyword extraction from mbox and Maildir Dovecot, mutt, and likely much other software support mbox Status/X-Status headers. Ensure we have a way to extract these headers as JMAP-compatible keywords before removing them for git storage. ->add_eml now accepts setting keywords at import time, and will probably be called like this: $lst->add_eml($eml, $lst->mbox_keywords($eml)); $lst->add_eml($eml, $lst->maildir_keywords($fn)); --- lib/PublicInbox/LeiStore.pm | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index c95df785..553adbc8 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -162,8 +162,27 @@ sub remove_eml_keywords { \@docids; } +# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/ +my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft'); +# O (old/non-recent), and D (deleted) aren't in JMAP, +# so probably won't be supported by us. +sub mbox_keywords { + my $eml = $_[-1]; + my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}"; + my %kw; + $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge; + sort(keys %kw); +} + +# cf: https://cr.yp.to/proto/maildir.html +my %c2kw = ('D' => 'draft', F => 'flagged', R => 'answered', S => 'seen'); +sub maildir_keywords { + $_[-1] =~ /:2,([A-Z]+)\z/i ? + sort(map { $c2kw{$_} // () } split(//, $1)) : (); +} + sub add_eml { - my ($self, $eml) = @_; + my ($self, $eml, @kw) = @_; my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; @@ -178,6 +197,7 @@ sub add_eml { my $idx = $eidx->idx_shard($docid); $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id + $idx->shard_add_keywords($docid, @kw) if @kw; } } else { $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); @@ -185,6 +205,7 @@ sub add_eml { $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_raw($msgref, $eml, $smsg); + $idx->shard_add_keywords($smsg->{num}, @kw) if @kw; } $smsg->{blob} } -- cgit v1.2.3-24-ge0c7 From 7e7f4bfca5f2ef0d123445e074280f5e65cdfb85 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 10:45:12 +0000 Subject: on_destroy: generic localized END This is a localized version of the process-wide END{}, but runs at the end of variable scope. A subroutine ref and arguments may be passed, which allows us to avoid anonymous subs and problems they cause. It's similar to `defer' or `ensure' in other languages; Perl can rely on deterministic destructors due to refcounting. --- lib/PublicInbox/OnDestroy.pm | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 lib/PublicInbox/OnDestroy.pm (limited to 'lib') diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm new file mode 100644 index 00000000..841f87d4 --- /dev/null +++ b/lib/PublicInbox/OnDestroy.pm @@ -0,0 +1,16 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +package PublicInbox::OnDestroy; + +sub new { + shift; # ($class, $cb, @args) + bless [ @_ ], __PACKAGE__; +} + +sub DESTROY { + my ($cb, @args) = @{$_[0]}; + $cb->(@args) if $cb; +} + +1; -- cgit v1.2.3-24-ge0c7 From cf731a1422064344f25c214670fb0007ab1d4c2c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 11:23:57 +0000 Subject: lei: restore default __DIE__ handler for event loop The kqueue code paths will trigger exceptions which are caught by eval{}, so we can't be calling exit() from the __DIE__ handler and expect eval to catch it. We only need the __DIE__ handler to deal with fork or open failures at startup (since stderr is pointed to /dev/null). After that we can rely on OnDestroy writing errors to syslog when it goes out of scope. --- lib/PublicInbox/LEI.pm | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 5399fade..95b48095 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -20,6 +20,7 @@ use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT); use PublicInbox::Sigfd; use PublicInbox::DS qw(now); use PublicInbox::Spawn qw(spawn); +use PublicInbox::OnDestroy; use Text::Wrap qw(wrap); use File::Path qw(mkpath); use File::Spec; @@ -386,7 +387,6 @@ sub optparse ($$$) { sub dispatch { my ($self, $cmd, @argv) = @_; local $SIG{__WARN__} = sub { err($self, "@_") }; - local $SIG{__DIE__} = 'DEFAULT'; return _help($self, 'no command given') unless defined($cmd); my $func = "lei_$cmd"; $func =~ tr/-/_/; @@ -602,12 +602,12 @@ sub lazy_start { my $oldset = PublicInbox::Sigfd::block_signals(); my $pid = fork // die "fork: $!"; return if $pid; + require PublicInbox::Listener; + require PublicInbox::EOFpipe; openlog($path, 'pid', 'user'); local $SIG{__DIE__} = sub { syslog('crit', "@_"); - exit $! if $!; - exit $? >> 8 if $? >> 8; - exit 255; + die; # calls the default __DIE__ handler }; local $SIG{__WARN__} = sub { syslog('warning', "@_") }; open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; @@ -616,10 +616,13 @@ sub lazy_start { setsid(); $pid = fork // die "fork: $!"; return if $pid; + $SIG{__DIE__} = 'DEFAULT'; + my $on_destroy = PublicInbox::OnDestroy->new(sub { + my ($owner_pid) = @_; + syslog('crit', "$@") if $@ && $$ == $owner_pid; + }, $$); $0 = "lei-daemon $path"; local %PATH2CFG; - require PublicInbox::Listener; - require PublicInbox::EOFpipe; $l->blocking(0); $eof_w->blocking(0); $eof_r->blocking(0); @@ -680,6 +683,7 @@ sub lazy_start { $n; # true: continue, false: stop }); PublicInbox::DS->EventLoop; + $@ = undef if $on_destroy; # quiet OnDestroy if we got here exit($exit_code // 0); } -- cgit v1.2.3-24-ge0c7 From f2c7b911a1c4a7520091ba7224773c30e409c337 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 23:54:04 +0000 Subject: lei: drop $SIG{__DIE__}, add oneshot fallbacks We'll force stdout+stderr to be a pipe the spawning client controls, thus there's no need to lose error reporting by prematurely redirecting stdout+stderr to /dev/null. We can now rely exclusively on OnDestroy to write to syslog() on uncaught die failures. Also support falling back to oneshot mode on socket and cwd failures, since some commands may still be useful if the current working directory goes missing :P --- lib/PublicInbox/LEI.pm | 67 ++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 95b48095..fd412324 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -12,7 +12,7 @@ use parent qw(PublicInbox::DS); use Getopt::Long (); use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un); use Errno qw(EAGAIN ECONNREFUSED ENOENT); -use POSIX qw(setsid); +use POSIX (); use IO::Handle (); use Sys::Syslog qw(syslog openlog); use PublicInbox::Config; @@ -584,60 +584,44 @@ sub noop {} # lei(1) calls this when it can't connect sub lazy_start { - my ($path, $err) = @_; - require IO::FDPass; # require this early so caller sees it - if ($err == ECONNREFUSED) { + my ($path, $errno) = @_; + if ($errno == ECONNREFUSED) { unlink($path) or die "unlink($path): $!"; - } elsif ($err != ENOENT) { - $! = $err; # allow interpolation to stringify in die + } elsif ($errno != ENOENT) { + $! = $errno; # allow interpolation to stringify in die die "connect($path): $!"; } umask(077) // die("umask(077): $!"); socket(my $l, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!"; bind($l, pack_sockaddr_un($path)) or die "bind($path): $!"; - listen($l, 1024) or die "listen $!"; + listen($l, 1024) or die "listen: $!"; my @st = stat($path) or die "stat($path): $!"; my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino pipe(my ($eof_r, $eof_w)) or die "pipe: $!"; my $oldset = PublicInbox::Sigfd::block_signals(); - my $pid = fork // die "fork: $!"; - return if $pid; + require IO::FDPass; require PublicInbox::Listener; require PublicInbox::EOFpipe; - openlog($path, 'pid', 'user'); - local $SIG{__DIE__} = sub { - syslog('crit', "@_"); - die; # calls the default __DIE__ handler - }; - local $SIG{__WARN__} = sub { syslog('warning', "@_") }; - open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n"; - open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; - open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; - setsid(); - $pid = fork // die "fork: $!"; + (-p STDOUT && -p STDERR) or die "E: stdout+stderr must be pipes\n"; + open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!"; + POSIX::setsid() > 0 or die "setsid: $!"; + my $pid = fork // die "fork: $!"; return if $pid; - $SIG{__DIE__} = 'DEFAULT'; - my $on_destroy = PublicInbox::OnDestroy->new(sub { - my ($owner_pid) = @_; - syslog('crit', "$@") if $@ && $$ == $owner_pid; - }, $$); $0 = "lei-daemon $path"; local %PATH2CFG; - $l->blocking(0); - $eof_w->blocking(0); - $eof_r->blocking(0); - my $listener = PublicInbox::Listener->new($l, \&accept_dispatch, $l); + $_->blocking(0) for ($l, $eof_r, $eof_w); + $l = PublicInbox::Listener->new($l, \&accept_dispatch, $l); my $exit_code; local $quit = sub { $exit_code //= shift; - my $tmp = $listener or exit($exit_code); + my $listener = $l or exit($exit_code); unlink($path) if defined($path); - syswrite($eof_w, '.'); - $l = $listener = $path = undef; - $tmp->close if $tmp; # DS::close + # closing eof_w triggers \&noop wakeup + $eof_w = $l = $path = undef; + $listener->close; # DS::close PublicInbox::DS->SetLoopTimeout(1000); }; - PublicInbox::EOFpipe->new($eof_r, sub {}, undef); + PublicInbox::EOFpipe->new($eof_r, \&noop, undef); my $sig = { CHLD => \&PublicInbox::DS::enqueue_reap, QUIT => $quit, @@ -682,8 +666,21 @@ sub lazy_start { } $n; # true: continue, false: stop }); + + # STDIN was redirected to /dev/null above, closing STDOUT and + # STDERR will cause the calling `lei' client process to finish + # reading <$daemon> pipe. + open STDOUT, '>&STDIN' or die "redirect stdout failed: $!"; + openlog($path, 'pid', 'user'); + local $SIG{__WARN__} = sub { syslog('warning', "@_") }; + my $owner_pid = $$; + my $on_destroy = PublicInbox::OnDestroy->new(sub { + syslog('crit', "$@") if $@ && $$ == $owner_pid; + }); + open STDERR, '>&STDIN' or die "redirect stderr failed: $!"; + # $daemon pipe to `lei' closed, main loop begins: PublicInbox::DS->EventLoop; - $@ = undef if $on_destroy; # quiet OnDestroy if we got here + @$on_destroy = (); # cancel on_destroy if we get here exit($exit_code // 0); } -- cgit v1.2.3-24-ge0c7 From 2fe6af26d737773e0a7cafa5902360ab1309c807 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Dec 2020 05:05:09 +0000 Subject: lei: start working on bash completion Much work still needs to be done, but that goes for this entire project :P --- lib/PublicInbox/LEI.pm | 61 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index fd412324..7004e9d7 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -132,7 +132,11 @@ our %CMD = ( # sorted in order of importance/use: 'reorder-local-store-and-break-history' => [ '[REFNAME]', 'rewrite git history in an attempt to improve compression', - 'gc!' ] + 'gc!' ], + +# internal commands are prefixed with '_' +'_complete' => [ '[...]', 'internal shell completion helper', + pass_through('everything') ], ); # @CMD # switch descriptions, try to keep consistent across commands @@ -209,6 +213,10 @@ my %OPTDESC = ( 'unset matching NAME, may be specified multiple times'], ); # %OPTDESC +my %CONFIG_KEYS = ( + 'leistore.dir' => 'top-level storage location', +); + sub x_it ($$) { # pronounced "exit" my ($self, $code) = @_; if (my $sig = ($code & 127)) { @@ -223,6 +231,8 @@ sub x_it ($$) { # pronounced "exit" } } +sub puts ($;@) { print { shift->{1} } map { "$_\n" } @_ } + sub emit { my ($self, $channel) = @_; # $buf = $_[2] print { $self->{$channel} } $_[2] or die "print FD[$channel]: $!"; @@ -522,6 +532,55 @@ sub lei_daemon_env { sub lei_help { _help($_[0]) } +# Shell completion helper. Used by lei-completion.bash and hopefully +# other shells. Try to do as much here as possible to avoid redundancy +# and improve maintainability. +sub lei__complete { + my ($self, @argv) = @_; # argv = qw(lei and any other args...) + shift @argv; # ignore "lei", the entire command is sent + @argv or return puts $self, grep(!/^_/, keys %CMD); + my $cmd = shift @argv; + my $info = $CMD{$cmd} // do { # filter matching commands + @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD); + return; + }; + my ($proto, undef, @spec) = @$info; + my $cur = pop @argv; + my $re = defined($cur) ? qr/\A\Q$cur\E/ : qr/./; + if (substr($cur // '-', 0, 1) eq '-') { # --switches + # gross special case since the only git-config options + # Consider moving to a table if we need more special cases + # we use Getopt::Long for are the ones we reject, so these + # are the ones we don't reject: + if ($cmd eq 'config') { + puts $self, grep(/$re/, keys %CONFIG_KEYS); + @spec = qw(add z|null get get-all unset unset-all + replace-all get-urlmatch + remove-section rename-section + name-only list|l edit|e + get-color-name get-colorbool); + # fall-through + } + # TODO: arg support + puts $self, grep(/$re/, map { # generate short/long names + my $eq = ''; + if (s/=.+\z//) { # required arg, e.g. output|o=i + $eq = '='; + } elsif (s/:.+\z//) { # optional arg, e.g. mid:s + } else { # negation: solve! => no-solve|solve + s/\A(.+)!\z/no-$1|$1/; + } + map { + length > 1 ? "--$_$eq" : "-$_" + } split(/\|/, $_, -1) # help|h + } grep { !ref } @spec); # filter out $GLP_PASS ref + } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) { + puts $self, grep(/$re/, keys %CONFIG_KEYS); + } + # TODO: URLs, pathnames, OIDs, MIDs, etc... See optparse() for + # proto parsing. +} + sub reap_exec { # dwaitpid callback my ($self, $pid) = @_; x_it($self, $?); -- cgit v1.2.3-24-ge0c7 From b28dcf96700a79616e7338bb8610f89155dd57e4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Dec 2020 07:16:58 +0000 Subject: lei: support for -$DIGIT and -$SIG CLI switches I'm a bit spoiled by using single-dash digit options from common tools: ("git log -$DIGIT", "kill -9", "tail -1", ...), so we'll support it for limiting query results. But first, make it easier to send arbitrary signals to the daemon via "daemon-kill". "daemon-stop" is redundant, now, and removed, since the default for "daemon-kill" is SIGTERM to match kill(1) behavior. --- lib/PublicInbox/LEI.pm | 55 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 7004e9d7..c28c9b59 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -36,6 +36,21 @@ our %PATH2CFG; # persistent for socket daemon # (may) pass options through to another command: sub pass_through { $GLP_PASS } +my $OPT; +sub opt_dash { + my ($spec, $re_str) = @_; # 'limit|n=i', '([0-9]+)' + my ($key) = ($spec =~ m/\A([a-z]+)/g); + my $cb = sub { # Getopt::Long "<>" catch-all handler + my ($arg) = @_; + if ($arg =~ /\A-($re_str)\z/) { + $OPT->{$key} = $1; + } else { + die "bad argument for --$key: $arg\n"; + } + }; + ($spec, '<>' => $cb, $GLP_PASS) +} + sub _store_path ($) { my ($env) = @_; File::Spec->rel2abs(($env->{XDG_DATA_HOME} // @@ -55,8 +70,8 @@ sub _config_path ($) { our %CMD = ( # sorted in order of importance/use: 'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a - limit|n=i sort|s=s@ reverse|r offset=i remote local! extinbox! - since|after=s until|before=s) ], + sort|s=s@ reverse|r offset=i remote local! extinbox! + since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ], 'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), @@ -111,7 +126,7 @@ our %CMD = ( # sorted in order of importance/use: 'import' => [ '{URL_OR_PATHNAME|--stdin}', 'one-shot import/update from URL or filesystem', - qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags), + qw(stdin| offset=i recursive|r exclude=s include=s !flags), ], 'config' => [ '[...]', sub { @@ -121,7 +136,8 @@ our %CMD = ( # sorted in order of importance/use: 'init' => [ '[PATHNAME]', sub { 'initialize storage, default: '._store_path($_[0]); }, qw(quiet|q) ], -'daemon-stop' => [ '', 'stop the lei-daemon' ], +'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon', + opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ], 'daemon-pid' => [ '', 'show the PID of the lei-daemon' ], 'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment', qw(clear| unset|u=s@ z|0) ], @@ -175,8 +191,7 @@ my %OPTDESC = ( 'ls-query format|f=s' => $ls_format, 'ls-extinbox format|f=s' => $ls_format, -'limit|n=i' => ['NUM', - 'limit on number of matches (default: 10000)' ], +'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ], 'offset=i' => ['OFF', 'search result offset (default: 0)'], 'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', @@ -211,6 +226,8 @@ my %OPTDESC = ( 'clear|' => 'clear the daemon environment', 'unset|u=s@' => ['NAME', 'unset matching NAME, may be specified multiple times'], + +'signal|s=s' => [ 'SIG', 'signal to send lei-daemon (default: TERM)' ], ); # %OPTDESC my %CONFIG_KEYS = ( @@ -333,23 +350,23 @@ EOF sub optparse ($$$) { my ($self, $cmd, $argv) = @_; $self->{cmd} = $cmd; - my $opt = $self->{opt} = {}; + $OPT = $self->{opt} = {}; my $info = $CMD{$cmd} // [ '[...]' ]; my ($proto, undef, @spec) = @$info; - my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS + my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP; push @spec, qw(help|h); my $lone_dash; if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias $lone_dash = $spec[0]; - $opt->{$spec[0]} = \(my $var); + $OPT->{$spec[0]} = \(my $var); push @spec, '' => \$var; } - $glp->getoptionsfromarray($argv, $opt, @spec) or + $glp->getoptionsfromarray($argv, $OPT, @spec) or return _help($self, "bad arguments or options for $cmd"); - return _help($self) if $opt->{help}; + return _help($self) if $OPT->{help}; # "-" aliases "stdin" or "clear" - $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash; + $OPT->{$lone_dash} = ${$OPT->{$lone_dash}} if defined $lone_dash; my $i = 0; my $POS_ARG = '[A-Z][A-Z0-9_]+'; @@ -365,14 +382,14 @@ sub optparse ($$$) { } elsif ($var =~ /\.\.\.\]\z/) { # optional args start $inf = 1; last; - } elsif ($var =~ /\A\[$POS_ARG\]\z/) { # one optional arg + } elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg $i++; } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin my @or = split(/\|/, $var); my $ok; for my $o (@or) { if ($o =~ /\A--([a-z0-9\-]+)/) { - $ok = defined($opt->{$1}); + $ok = defined($OPT->{$1}); last; } elsif (defined($argv->[$i])) { $ok = 1; @@ -510,7 +527,11 @@ E: leistore.dir=$cur already initialized and it is not $dir sub lei_daemon_pid { emit($_[0], 1, "$$\n") } -sub lei_daemon_stop { $quit->(0) } +sub lei_daemon_kill { + my ($self) = @_; + my $sig = $self->{opt}->{signal} // 'TERM'; + kill($sig, $$) or fail($self, "kill($sig, $$): $!"); +} sub lei_daemon_env { my ($self, @argv) = @_; @@ -538,7 +559,7 @@ sub lei_help { _help($_[0]) } sub lei__complete { my ($self, @argv) = @_; # argv = qw(lei and any other args...) shift @argv; # ignore "lei", the entire command is sent - @argv or return puts $self, grep(!/^_/, keys %CMD); + @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h); my $cmd = shift @argv; my $info = $CMD{$cmd} // do { # filter matching commands @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD); @@ -573,7 +594,7 @@ sub lei__complete { map { length > 1 ? "--$_$eq" : "-$_" } split(/\|/, $_, -1) # help|h - } grep { !ref } @spec); # filter out $GLP_PASS ref + } grep { $OPTDESC{"$cmd\t$_"} || $OPTDESC{$_} } @spec); } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) { puts $self, grep(/$re/, keys %CONFIG_KEYS); } -- cgit v1.2.3-24-ge0c7 From 7ab46690f51a7f1f22299e4fd385a56e5bcddef7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 22:23:48 +0000 Subject: lei: revise output routines Drop emit(), since we hard code the channel (client FD) 99% of the time and use prototypes to avoid parentheses because my hands are tired. --- lib/PublicInbox/LEI.pm | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index c28c9b59..97c5d91b 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -250,18 +250,13 @@ sub x_it ($$) { # pronounced "exit" sub puts ($;@) { print { shift->{1} } map { "$_\n" } @_ } -sub emit { - my ($self, $channel) = @_; # $buf = $_[2] - print { $self->{$channel} } $_[2] or die "print FD[$channel]: $!"; -} +sub out ($;@) { print { shift->{1} } @_ } -sub err { - my ($self, $buf) = @_; - $buf .= "\n" unless $buf =~ /\n\z/s; - emit($self, 2, $buf); +sub err ($;@) { + print { shift->{2} } @_, (substr($_[-1], -1, 1) eq "\n" ? () : "\n"); } -sub qerr { $_[0]->{opt}->{quiet} or err(@_) } +sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) } sub fail ($$;$) { my ($self, $buf, $exit_code) = @_; @@ -341,8 +336,7 @@ EOF $msg .= $rhs; $msg .= "\n"; } - my $channel = $errmsg ? 2 : 1; - emit($self, $channel, $msg); + print { $self->{$errmsg ? 2 : 1} } $msg; x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure undef; } @@ -404,7 +398,6 @@ sub optparse ($$$) { } last if $err; } - # warn "inf=$inf ".scalar(@$argv). ' '.scalar(@args)."\n"; if (!$inf && scalar(@$argv) > scalar(@args)) { $err //= 'too many arguments'; } @@ -413,7 +406,7 @@ sub optparse ($$$) { sub dispatch { my ($self, $cmd, @argv) = @_; - local $SIG{__WARN__} = sub { err($self, "@_") }; + local $SIG{__WARN__} = sub { err($self, @_) }; return _help($self, 'no command given') unless defined($cmd); my $func = "lei_$cmd"; $func =~ tr/-/_/; @@ -525,7 +518,7 @@ E: leistore.dir=$cur already initialized and it is not $dir return qerr($self, $exists); } -sub lei_daemon_pid { emit($_[0], 1, "$$\n") } +sub lei_daemon_pid { puts shift, $$ } sub lei_daemon_kill { my ($self) = @_; @@ -547,7 +540,7 @@ sub lei_daemon_env { my $eor = $opt->{z} ? "\0" : "\n"; my $buf = ''; while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" } - emit($self, 1, $buf) + out $self, $buf; } } -- cgit v1.2.3-24-ge0c7 From 12583f45f29f3acd6cd704df9a7e5aaff5acc3f7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Dec 2020 11:34:38 +0000 Subject: lei: extinbox: start implementing in config file They need to be indexed by MiscIdx, but MiscIdx still needs more work to support faster config loading when dealing with ~100K data sources. --- lib/PublicInbox/LEI.pm | 19 ++++++++-------- lib/PublicInbox/LeiExtinbox.pm | 51 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 lib/PublicInbox/LeiExtinbox.pm (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 97c5d91b..b254e2c5 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -8,7 +8,7 @@ package PublicInbox::LEI; use strict; use v5.10.1; -use parent qw(PublicInbox::DS); +use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox); use Getopt::Long (); use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un); use Errno qw(EAGAIN ECONNREFUSED ENOENT); @@ -79,12 +79,12 @@ our %CMD = ( # sorted in order of importance/use: 'add-extinbox' => [ 'URL_OR_PATHNAME', 'add/set priority of a publicinbox|extindex for extra matches', - qw(prio=i) ], + qw(boost=i quiet|q) ], 'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations', - qw(format|f=s z local remote) ], + qw(format|f=s z|0 local remote quiet|q) ], 'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}', 'exclude further results from a publicinbox|extindex', - qw(prune) ], + qw(prune quiet|q) ], 'ls-query' => [ '[FILTER...]', 'list saved search queries', qw(name-only format|f=s z) ], @@ -107,7 +107,7 @@ our %CMD = ( # sorted in order of importance/use: # code repos are used for `show' to solve blobs from patch mails 'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', - qw(prio=i) ], + qw(boost=i) ], 'ls-coderepo' => [ '[FILTER_TERMS...]', 'list known code repos', qw(format|f=s z) ], 'forget-coderepo' => [ 'PATHNAME', @@ -197,7 +197,7 @@ my %OPTDESC = ( 'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', "order of results `--output'-dependent"], -'prio=i' => 'priority of query source', +'boost=i' => 'increase/decrease priority of results (default: 0)', 'local' => 'limit operations to the local filesystem', 'local!' => 'exclude results from the local filesystem', @@ -217,8 +217,7 @@ my %OPTDESC = ( 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], 'jobs:i' => 'set parallelism level', -# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere? -'z' => 'use NUL \\0 instead of newline (CR) to delimit lines', +# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere 'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', # note: no "--ignore-environment" / "-i" support like env(1) since that @@ -455,7 +454,9 @@ sub _lei_store ($;$) { $cfg->{-lei_store} //= do { require PublicInbox::LeiStore; PublicInbox::SearchIdx::load_xapian_writable(); - defined(my $dir = $cfg->{'leistore.dir'}) or return; + my $dir = $cfg->{'leistore.dir'}; + $dir //= _store_path($self->{env}) if $creat; + return unless $dir; PublicInbox::LeiStore->new($dir, { creat => $creat }); }; } diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm new file mode 100644 index 00000000..c2de7735 --- /dev/null +++ b/lib/PublicInbox/LeiExtinbox.pm @@ -0,0 +1,51 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# *-extinbox commands of lei +package PublicInbox::LeiExtinbox; +use strict; +use v5.10.1; +use parent qw(Exporter); +our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox); + +sub lei_ls_extinbox { + my ($self, @argv) = @_; + my $stor = $self->_lei_store(0); + my $cfg = $self->_lei_cfg(0); + my $out = $self->{1}; + my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n"); + my (%boost, @loc); + for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) { + my $loc = substr($sec, length('extinbox.')); + $boost{$loc} = $cfg->{"$sec.boost"}; + push @loc, $loc; + } + use sort 'stable'; + # highest boost first, but stable for alphabetic tie break + for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) { + # TODO: use miscidx and show docid so forget/set is easier + print $out $_, $OFS, 'boost=', $boost{$_}, $ORS; + } +} + +sub lei_add_extinbox { + my ($self, $url_or_dir) = @_; + my $cfg = $self->_lei_cfg(1); + if ($url_or_dir !~ m!\Ahttps?://!) { + $url_or_dir = File::Spec->canonpath($url_or_dir); + } + my $new_boost = $self->{opt}->{boost} // 0; + my $key = "extinbox.$url_or_dir.boost"; + my $cur_boost = $cfg->{$key}; + return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent + $self->lei_config($key, $new_boost); + my $stor = $self->_lei_store(1); + # TODO: add to MiscIdx + $stor->done; +} + +sub lei_forget_extinbox { + # TODO +} + +1; -- cgit v1.2.3-24-ge0c7 From 08de05443804120a2663aa3611c47c84a18e0c35 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Dec 2020 20:02:51 +0000 Subject: lei_xsearch: cross-(inbox|extindex) search While a single extindex combines multiple inboxes into a single search index, extindex still requires up-front indexing on items which can be searched. XSearch has no on-disk footprint itself and uses Xapian DBs of existing publicinbox and extindex ("extinbox") exclusively. XSearch still suffers from the multi-shard Xapian scalability problems which led to the creation of extindex, but I expect the number of shards to remain relatively low. I envision users hosting public-inbox instances on their workstations will only have two extindex combined by this, one read-only extindex for serving public archives, and one read-write extindex managed by LeiStore for private mail. --- lib/PublicInbox/LeiSearch.pm | 14 ++++----- lib/PublicInbox/LeiXSearch.pm | 72 +++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/Search.pm | 19 +++++------- 3 files changed, 85 insertions(+), 20 deletions(-) create mode 100644 lib/PublicInbox/LeiXSearch.pm (limited to 'lib') diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 66c16e04..0b962b11 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -7,20 +7,18 @@ use v5.10.1; use parent qw(PublicInbox::ExtSearch); use PublicInbox::Search; -sub combined_docid ($$) { +# get combined docid from over.num: +# (not generic Xapian, only works with our sharding scheme) +sub num2docid ($$) { my ($self, $num) = @_; - ($num - 1) * $self->{nshard} + 1; + my $nshard = $self->{nshard}; + ($num - 1) * $nshard + $num % $nshard + 1; } sub msg_keywords { my ($self, $num) = @_; # num_or_mitem my $xdb = $self->xdb; # set {nshard}; - my $docid = ref($num) ? $num->get_docid : do { - # get combined docid from over.num: - # (not generic Xapian, only works with our sharding scheme) - my $nshard = $self->{nshard}; - ($num - 1) * $nshard + $num % $nshard + 1; - }; + my $docid = ref($num) ? $num->get_docid : num2docid($self, $num); my %kw; eval { my $end = $xdb->termlist_end($docid); diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm new file mode 100644 index 00000000..1a81b14a --- /dev/null +++ b/lib/PublicInbox/LeiXSearch.pm @@ -0,0 +1,72 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# Combine any combination of PublicInbox::Search, +# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects +# into one Xapian DB +package PublicInbox::LeiXSearch; +use strict; +use v5.10.1; +use parent qw(PublicInbox::LeiSearch); + +sub new { + my ($class) = @_; + PublicInbox::Search::load_xapian(); + bless { + qp_flags => $PublicInbox::Search::QP_FLAGS | + PublicInbox::Search::FLAG_PURE_NOT(), + }, $class +} + +sub attach_extinbox { + my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox + if (!$ibxish->can('over')) { + push @{$self->{remotes}}, $ibxish + } + if (delete $self->{xdb}) { # XXX: do we need this? + # clobber existing {xdb} if amending + my $expect = delete $self->{nshard}; + my $shards = delete $self->{shards_flat}; + scalar(@$shards) == $expect or die + "BUG: {nshard}$expect != shards=".scalar(@$shards); + + my $prev = {}; + for my $old_ibxish (@{$self->{shard2ibx}}) { + next if $prev == $old_ibxish; + $prev = $old_ibxish; + my @shards = $old_ibxish->search->xdb_shards_flat; + push @{$self->{shards_flat}}, @shards; + } + my $nr = scalar(@{$self->{shards_flat}}); + $nr == $expect or die + "BUG: reloaded $nr shards, expected $expect" + } + my @shards = $ibxish->search->xdb_shards_flat; + push @{$self->{shards_flat}}, @shards; + push(@{$self->{shard2ibx}}, $ibxish) for (@shards); +} + +# called by PublicInbox::Search::xdb +sub xdb_shards_flat { @{$_[0]->{shards_flat}} } + +# like over->get_art +sub smsg_for { + my ($self, $mitem) = @_; + # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID + my $nshard = $self->{nshard}; + my $docid = $mitem->get_docid; + my $shard = ($docid - 1) % $nshard; + my $num = int(($docid - 1) / $nshard) + 1; + my $smsg = $self->{shard2ibx}->[$shard]->over->get_art($num); + $smsg->{docid} = $docid; + $smsg; +} + +sub recent { + my ($self, $qstr, $opt) = @_; + $opt //= {}; + $opt->{relevance} //= -2; + $self->mset($qstr //= 'bytes:1..', $opt); +} + +1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index bbc5e32f..bca2036c 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -196,6 +196,7 @@ sub xdb_shards_flat ($) { my ($self) = @_; my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); + load_xapian(); if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { @xdb = ($X{Database}->new($xpfx)); $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; @@ -214,16 +215,6 @@ sub xdb_shards_flat ($) { @xdb; } -sub _xdb { - my ($self) = @_; - $self->{qp_flags} //= $QP_FLAGS; - my @xdb = xdb_shards_flat($self) or return; - $self->{nshard} = scalar(@xdb); - my $xdb = shift @xdb; - $xdb->add_database($_) for @xdb; - $xdb; -} - # v2 Xapian docids don't conflict, so they're identical to # NNTP article numbers and IMAP UIDs. # https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID @@ -242,8 +233,12 @@ sub mset_to_artnums { sub xdb ($) { my ($self) = @_; $self->{xdb} //= do { - load_xapian(); - $self->_xdb; + $self->{qp_flags} //= $QP_FLAGS; + my @xdb = $self->xdb_shards_flat or return; + $self->{nshard} = scalar(@xdb); + my $xdb = shift @xdb; + $xdb->add_database($_) for @xdb; + $xdb; }; } -- cgit v1.2.3-24-ge0c7 From 9d29ceda4eb8c9973749d74b928416f5c3cc78f8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 11:13:11 +0000 Subject: lei: rename proposed "query" command to "q", add JSON output Using "query" as a verb may be confusing when we'll also refer to them as nouns with the "-query" sub commands. "query" is also many characters to type without tab-completion on what I expect to be one of the most commonly used sub-commands Furthermore, "q" is also the common query parameter name used by our PSGI interface, as is the case with several major web search engines; so there's an element of familiarity there. The name "search" was disregarded because "show" could be a commonly used lei sub-command, too, and typing "se" for tab-completion may be slow since two-handed typists on QWERTY keyboards won't be able to use alternating hands. "f" or "find" could be a possibility here, too; but we're currently using the term "forget" as a weaker version of "remove" or "rm", though "ignore" could be substituted for "forget", perhaps... Kyle Meyer noted the lack of (proposed) JSON output support so that's been added to the proposed UI. --- lib/PublicInbox/LEI.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index b254e2c5..7002a1f7 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -68,7 +68,7 @@ sub _config_path ($) { # TODO: generate shell completion + help using %CMD and %OPTDESC # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: -'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( +'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a sort|s=s@ reverse|r offset=i remote local! extinbox! since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ], @@ -98,7 +98,7 @@ our %CMD = ( # sorted in order of importance/use: 'set/unset flags on message(s) from stdin', qw(stdin| oid=s exact by-mid|mid:s) ], 'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', - 'exclude message(s) on stdin from query results', + "exclude message(s) on stdin from `q' search results", qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], 'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}', @@ -175,7 +175,7 @@ my %OPTDESC = ( 'dedupe|d=s' => ['STRAT|content|oid|mid', 'deduplication strategy'], 'show thread|t' => 'display entire thread a message belongs to', -'query thread|t' => +'q thread|t' => 'return all messages in the same thread as the actual match(es)', 'augment|a' => 'augment --output destination instead of clobbering', @@ -186,7 +186,7 @@ my %OPTDESC = ( 'message/object output format' ], 'mark format|f=s' => $stdin_formats, 'forget format|f=s' => $stdin_formats, -'query format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid', +'q format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid|json', 'specify output format, default depends on --output'], 'ls-query format|f=s' => $ls_format, 'ls-extinbox format|f=s' => $ls_format, -- cgit v1.2.3-24-ge0c7