diff options
Diffstat (limited to 'Documentation')
54 files changed, 1219 insertions, 241 deletions
diff --git a/Documentation/RelNotes/v1.9.0.wip b/Documentation/RelNotes/v1.9.0.eml index c725e332..08e16a66 100644 --- a/Documentation/RelNotes/v1.9.0.wip +++ b/Documentation/RelNotes/v1.9.0.eml @@ -1,13 +1,24 @@ From: Eric Wong <e@80x24.org> To: meta@public-inbox.org -Subject: [WIP] public-inbox 1.9 +Subject: [ANNOUNCE] public-inbox 1.9.0 +Date: Sun, 21 Aug 2022 02:36:59 +0000 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline +Message-ID: <2022-08-21T023659Z-public-inbox-1.9.0-rele@sed> + +Upgrading: + + lei users need to "lei daemon-kill" after installation to load + new code. Normal daemons (read-only, and public-inbox-watch) + will also need restarts, of course, but there's no + backwards-incompatible data format changes so rolling back to + older versions is harmless. Major bugfixes: - * lei no longer deadlocks from inotify/EVFILT_VNODE handling + * lei no longer freezes from inotify/EVFILT_VNODE handling, + user interrupts (Ctrl-C), nor excessive errors/warnings * IMAP server fairness improved to avoid excessive blob prefetch @@ -24,6 +35,9 @@ New features: * SIGHUP reloads TLS certs and keys in addition to config and .psgi files + * "lei reindex" command for lei users to update personal index + in ~/.local/share/lei/store for search improvements below: + Search improvements: These will require --reindex with public-inbox-index and/or @@ -32,9 +46,12 @@ Search improvements: * patchid: prefix search support added to WWW and lei for "git patch-id --stable" support - * text inside base-85 binary patches are no longer indexed + * text inside base-85 binary patches is no longer indexed to avoid false positives + * for lei users, "lei reindex" now exists and is required + to take advantage of aforementioned indexing changes + Performance improvements: * IMAP server startup is faster with many mailboxes when using diff --git a/Documentation/RelNotes/v2.0.0.wip b/Documentation/RelNotes/v2.0.0.wip new file mode 100644 index 00000000..f04d8144 --- /dev/null +++ b/Documentation/RelNotes/v2.0.0.wip @@ -0,0 +1,193 @@ +To: meta@public-inbox.org +Subject: [WIP] public-inbox 2.2.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline + +This release includes several new features and fixes; mostly +around improved integration between inboxes and coderepos for +solver. Portability and reliability is also improved, especially +in the internal process management of lei. + +public-inbox-cindex is a new command to index coderepos for +WWW search and perform automatic associations between +coderepos and inboxes. This makes solver vastly more useful +for the WWW UI as admins will no longer have to manually +associate coderepos with inboxes. + +public-inbox-clone gains the ability to mirror entire (or partial) +grokmirror-compatible manifests. + +Internal process and object management data structures are vastly +simplified throughout and error handling made more robust. + +git SHA-256 support remains a work-in-progress for inboxes and +extindex due to the need to interoperate with SHA-1 epochs. + +Upgrading: + + lei users need to "lei daemon-kill" after installation to load + new code. Normal daemons (read-only, and public-inbox-watch) + will also need restarts, of course, but there's no + backwards-incompatible data format changes so rolling back to + older versions is harmless. + +Compatibility: + + Uppercase newsgroup names were always broken with IMAP, POP3, and + -extindex. Uppercase names will now be lowercased by default and + warnings will be emitted. Conflicting newsgroup names (and `inboxdir' + entries if `newsgroup' isn't specified) will also generate warnings + since they break -extindex and the new -cindex (coderepo index). + +New users + hackers: + + The install/ directory includes tools to automate installation and + removal of dependencies for stripped-down or full setups. See + install/README for more details. + +treewide + + * support raw UTF-8 headers from SMTPUTF8 hosts + + * standardize on `#' prefix for stderr diagnostics (previously `I:') + + * SHA-256 coderepos are fully supported (but not inboxes, yet) + + * for daemons serving public traffic, MALLOC_MMAP_THRESHOLD_=131072 is + recommended to reduce fragmentation in glibc malloc, while jemalloc + (tested as an LD_PRELOAD) is another option (at least for 64-bit). + +PublicInbox::WWW + + * support `+' in inbox names + + * support coderepo displays for systems without cgit + + * improve display of git tags, commits and trees in $INBOX/$OID/s/ endpoint + + * numerous memory usage reductions by avoiding Perl scratchpads + + * add #related anchor and search form to find related patches + based on blob OIDs (IOW, exposing `lei p2q' to the web) + + * fix footer in listing of >200 inboxes + + * support dumb HTTP clones of SHA-256 git repos + + * add /$INBOX/$MSGID/d/ endpoint to show diffs in reused Message-IDs + (`lei mail-diff' for the web) + + * support POST /$INBOX/$MSGID/?x=m&q= to limit mbox results to a thread + + * add topics_(new|active).(html|atom) endpoints + + * linkify peer public-inbox addresses in To/Cc headers + +public-inbox-watch: + + * watching MH folders is now supported + +lei + + * use http.proxy / http.<remote>.proxy from system-wide git-config if + unconfigured for lei + + * improve IMAP and NNTP error reporting + + * reduce default IMAP connections to avoid overloading servers + + * compatibility with SQLite <3.8.3 on CentOS 7.x + + * fix `lei q -tt' on locally indexed messages (still broken for remotes: + https://public-inbox.org/meta/20230226170931.M947721@dcvr/ ) + + * `lei import' now sets labels+keywords consistently on all + already imported messages + + * fix `lei up' on saved local queries which previously used -t/--threads + + * `lei convert' output to v2 public-inboxes is now idempotent + + * improved bash completion for labels (see contrib/completion) + + * support for reading (but not writing) MH folders + + * `lei index' accepts `+L:$LABEL' like `lei import' does + +solver (used by lei (rediff|blob), and PublicInbox::WWW) + + * handle copies in patches properly + + * no longer redundantly parallelized within each WWW process + +portability + + * SIGWINCH is handled properly on less common architectures and OSes + + * fix EINTR handling for kqueue users + + * various fixes for CentOS 7.x + + * fix excessive pipelining to `git cat-file' on systems with small + getdelim(3) buffers (mainly affects musl) + + * support Alpine Linux, Dragonfly, NetBSD and OpenBSD. This resulted + not only in bugfixes to our code, but also to Dragonfly and OpenBSD. + + * Inline::C||Socket::MsgHdr no longer required for SCM_RIGHTS + with sendmsg/recvmsg on supported *BSDs. + + * inotify support no longer requires Linux::Inotify2 XS package + for most architectures + +public-inbox-pop3d + + * support `limit=NUM' and `initial_limit=NUM' query parameters + in mailbox names to limit results + +public-inbox-nntpd + + * fix LISTGROUP with range (affects neomutt) + +public-inbox-clone / public-inbox-fetch / `lei add-external --mirror' + + * mtime of downloaded manifest preserved + +public-inbox-clone: + + * parallel mirroring of multiple inboxes/coderepos via manifest, + public-inbox-fetch is not used in this mode + + * new flags to support manifest mirroring include: + --dry-run, --inbox-config=, --project-list=, --prune, --purge, + --keep-going, --jobs, --include=, --exclude=, --objstore=, + --manifest=, --remote-manifest= + See public-inbox-clone(1) man page for more details. + +PublicInbox::SaPlugin::ListMirror + + * List-ID handling special-cased according to RFC 2919 rules + +Search improvements (lei and PublicInbox::WWW) + + * quoted text inside base-85 binary patches is no longer indexed + + * `public-inbox-cindex --join' prefers using Xapian's C++ API + directly to avoid Perl method dispatch overhead to get usable + performance associating ~300 inboxes with over 1K coderepos + (and vice versa). Users requiring such performance will need + a C++ compiler, pkg-config, and the Xapian development files + (see INSTALL). + + This C++ helper will be used more heavily in the future + to enable query parser customizations and other functionality + unavailable from the Xapian SWIG or XS bindings. + +Thanks to all the bug reporters and users who made this release +possible, and thanks for bearing with my anxiety over making releases. + +Please report bugs via plain-text mail to: meta@public-inbox.org + +See archives at https://public-inbox.org/meta/ for all history. +See https://public-inbox.org/TODO for what the future holds. diff --git a/Documentation/dc-dlvr-spam-flow.txt b/Documentation/dc-dlvr-spam-flow.txt index d151d272..6210fc7d 100644 --- a/Documentation/dc-dlvr-spam-flow.txt +++ b/Documentation/dc-dlvr-spam-flow.txt @@ -39,7 +39,7 @@ delivery path as well as removing the message from the git tree. * incron - run commands based on filesystem events: http://incron.aiken.cz/ -* sendmail / MTA - we use and recommend use postfix, which includes a +* sendmail / MTA - we use and recommend postfix, which includes a sendmail-compatible wrapper: http://www.postfix.org/ * spamc / spamd - SpamAssassin: http://spamassassin.apache.org/ diff --git a/Documentation/design_notes.txt b/Documentation/design_notes.txt index 3df5af3e..95f02556 100644 --- a/Documentation/design_notes.txt +++ b/Documentation/design_notes.txt @@ -52,15 +52,15 @@ Why email? There is no need to ask the NSA for backups of your mail archives :) * git, one of the most widely-used version control systems, includes many - tools for for email, including: git-format-patch(1), git-send-email(1), + tools for email, including: git-format-patch(1), git-send-email(1), git-am(1), git-imap-send(1). Furthermore, the development of git itself is based on the git mailing list: https://public-inbox.org/git/ (or http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/git/ - for Tor users) + for Tor users). * Email is already the de-facto form of communication in many Free Software - communities.. + communities. * Fallback/transition to private email and other lists, in case the public-inbox host becomes unavailable, users may still directly email @@ -76,13 +76,13 @@ Why git? * As of 2016, git is widely used and known to nearly all Free Software developers. For non-developers it is packaged for all major GNU/Linux - and *BSD distributions. NNTP is not as widely-used nowadays, and + and *BSD distributions. NNTP is not as widely used nowadays, and most IMAP clients do not have good support for read-only mailboxes. Why perl 5? ----------- -* Perl 5 is widely available on modern *nix systems with good a history +* Perl 5 is widely available on modern *nix systems, with a good history of backwards and forward compatibility. * git and SpamAssassin both use it, so it should be one less thing for diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt index b1f916dd..a0003f99 100644 --- a/Documentation/design_www.txt +++ b/Documentation/design_www.txt @@ -7,7 +7,7 @@ URL and anchor naming /$INBOX/?r=$GIT_COMMIT -> HTML only /$INBOX/new.atom -> Atom feed -#### Optional, relies on Search::Xapian (or Xapian SWIG binding) +#### Optional, relies on Xapian /$INBOX/$MESSAGE_ID/t/ -> HTML content of thread (nested) /$INBOX/$MESSAGE_ID/T/ -> HTML content of thread (flat) anchors: @@ -102,7 +102,7 @@ We also set <title> to make window management easier. We favor <pre>-formatted text since public-inbox is intended as a place to share and discuss patches and code. Unfortunately, long paragraphs -tends to be less readable with fixed-width serif fonts which GUI +tend to be less readable with fixed-width serif fonts which GUI browsers default to. * No graphics, images, or icons at all. We tolerate, but do not @@ -122,12 +122,12 @@ browsers default to. avoided as they do not render well with some displays or user-chosen fonts. -* No JavaScript. JS is historically too buggy and insecure, and we will +* No JavaScript. JS is historically too buggy and insecure, and we will never expect our readers to do either of the following: - a) read and audit all our code for on every single page load - b) trust us and and run code without reading it + a) read and audit all our code on every single page load + b) trust us and run code without reading it -* We only use CSS for one reason: wrapping pre-formatted text +* We only use CSS for one reason: wrapping pre-formatted text. This is necessary because unfortunate GUI browsers tend to be prone to layout widening from unwrapped mailers. Do not expect CSS to be enabled, especially with scary things like: @@ -141,4 +141,4 @@ CSS classes (for user-supplied CSS) ----------------------------------- See examples in contrib/css/ and lib/PublicInbox/WwwText.pm -(or https://public-inbox.org/meta/_/text/color/ soon) +(or <https://public-inbox.org/meta/_/text/color/>) diff --git a/Documentation/flow.ge b/Documentation/flow.ge index 4308989a..5ad92fec 100644 --- a/Documentation/flow.ge +++ b/Documentation/flow.ge @@ -1,9 +1,11 @@ # public-inbox data flow # # Note: choose either "delivery tools" OR "git mirroring tools" -# for a given inboxdir. Combining them for the SAME inboxdir -# will cause conflicts. Of course, different inboxdirs may -# choose different means of getting mail into them. +# for a given inboxdir. Using them simultaneously for the +# SAME inboxdir will cause conflicts. Of course, different +# inboxdirs may choose different means of getting mail into them. +# You may fork any inbox by starting with "git mirroring tools", +# and switching to "delivery tools". graph { flow: down } @@ -13,6 +15,8 @@ graph { flow: down } public-inbox-learn] -> [inboxdir] [git mirroring tools:\n + public-inbox-clone,\n + public-inbox-fetch,\n grok-pull,\n various scripts ] -- git (clone|fetch) &&\n @@ -20,9 +24,10 @@ graph { flow: down } [inboxdir] -> [read-only daemons:\n + public-inbox-netd\n public-inbox-httpd\n public-inbox-imapd\n public-inbox-nntpd] -# Copyright 2020-2021 all contributors <meta@public-inbox.org> +# Copyright all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/flow.txt b/Documentation/flow.txt index 1116a917..ed2dd80b 100644 --- a/Documentation/flow.txt +++ b/Documentation/flow.txt @@ -1,9 +1,11 @@ # public-inbox data flow # # Note: choose either "delivery tools" OR "git mirroring tools" -# for a given inboxdir. Combining them for the SAME inboxdir -# will cause conflicts. Of course, different inboxdirs may -# choose different means of getting mail into them. +# for a given inboxdir. Using them simultaneously for the +# SAME inboxdir will cause conflicts. Of course, different +# inboxdirs may choose different means of getting mail into them. +# You may fork any inbox by starting with "git mirroring tools", +# and switching to "delivery tools". +--------------------+ | delivery tools: | @@ -15,8 +17,10 @@ | v +----------------------+ +--------------------+ -| git mirroring tools: | git (clone|fetch) && | | -| grok-pull, | public-inbox-index | inboxdir | +| git mirroring tools: | | | +| public-inbox-clone, | | | +| public-inbox-fetch, | git (clone|fetch) && | inboxdir | +| grok-pull, | public-inbox-index | | | various scripts | ----------------------> | | +----------------------+ +--------------------+ | @@ -24,10 +28,12 @@ v +--------------------+ | read-only daemons: | + | public-inbox-netd | | public-inbox-httpd | | public-inbox-imapd | | public-inbox-nntpd | +--------------------+ -# Copyright 2020-2021 all contributors <meta@public-inbox.org> +# Copyright all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# This file was generated from flow.txt using Graph::Easy diff --git a/Documentation/hosted.txt b/Documentation/hosted.txt deleted file mode 100644 index 188ad254..00000000 --- a/Documentation/hosted.txt +++ /dev/null @@ -1,41 +0,0 @@ -unofficially hosted mirrors at public-inbox.org - -In addition to eating our own dogfood at <https://public-inbox.org/meta/>, -public-inbox.org hosts unofficial archives for several other projects -to further test our own software. - -These mirrors are NOT to be considered reliable or permanent. -Interested parties are strongly encouraged to host their own mirrors. - -The presence of these archives does not imply these projects endorse -public-inbox or public-inbox.org in any way. - -* https://public-inbox.org/bug-gnulib/ - bug-gnulib@gnu.org - Discussion for Gnulib portability/common source project - https://lists.gnu.org/mailman/listinfo/bug-gnulib - -* https://public-inbox.org/git/ - git@vger.kernel.org - Mailing list for the git version control system - http://vger.kernel.org/majordomo-info.html - -* https://public-inbox.org/libc-alpha/ - libc-alpha@sourceware.org - Mailing list for GNU C library development - https://www.gnu.org/software/libc/involved.html - -* https://public-inbox.org/rack-devel/ - rack-devel@googlegroups.com - Development list for the Ruby webserver interface - https://groups.google.com/group/rack-devel - -* https://public-inbox.org/sox-users/ - sox-users@lists.sourceforge.net - Users' list for the SoX sound processing tool - https://lists.sourceforge.net/lists/listinfo/sox-users - -* https://public-inbox.org/sox-devel/ - sox-devel@lists.sourceforge.net - Developers' list for the SoX sound processing tool - https://lists.sourceforge.net/lists/listinfo/sox-devel diff --git a/Documentation/include.mk b/Documentation/include.mk index bfbc495f..86851376 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> all:: @@ -6,6 +6,8 @@ RSYNC = rsync RSYNC_DEST = public-inbox.org:/srv/public-inbox/ AWK = awk MAN = man + +# part of `man-db' on Debian, not sure about other distros LEXGROG = lexgrog # this is "xml" on FreeBSD and maybe some other distros: @@ -47,14 +49,14 @@ install-man: man doc_install :: install-man -check :: check-man +check : check-man check_man = $(AWK) \ '{gsub(/\b./,"")}$$0 !~ /\.onion/&&length>80{print;e=1}END{exit(e)}' \ >&2 -check-man :: $(check_80) +check-man : $(check_80) -check-lexgrog :: $(check_lexgrog) +check-lexgrog : $(check_lexgrog) all :: $(docs) @@ -67,13 +69,16 @@ Documentation/standards.txt : Documentation/standards.perl # flow.txt is checked into git since Graph::Easy isn't in many distros Documentation/flow.txt : Documentation/flow.ge - (sed -ne '1,/^$$/p' <Documentation/flow.ge; \ - $(GRAPH_EASY) Documentation/flow.ge || \ - cat Documentation/flow.txt; \ + +%.txt : %.ge + (sed -ne '1,/^$$/p' <$<; \ + $(GRAPH_EASY) $< || grep -v '^#' $@; \ echo; \ - sed -ne '/^# Copyright/,$$p' <Documentation/flow.ge \ + sed -ne '/^# Copyright/,$$p' <$< \ ) >$@+ - touch -r Documentation/flow.ge $@+ + echo >>$@+ \ + '# This file was generated from $(@F) using Graph::Easy' + touch -r $< $@+ mv $@+ $@ Documentation/lei-q.pod : lib/PublicInbox/Search.pm Documentation/common.perl @@ -83,7 +88,7 @@ NEWS NEWS.atom NEWS.html : $(news_deps) $(PERL) -I lib -w Documentation/mknews.perl $@ $(RELEASES) # check for internal API changes: -check :: NEWS .NEWS.atom.check NEWS.html +check : NEWS .NEWS.atom.check NEWS.html .NEWS.atom.check: NEWS.atom $(XMLSTARLET) val NEWS.atom || \ diff --git a/Documentation/lei-add-external.pod b/Documentation/lei-add-external.pod index 7afcad63..2a131b55 100644 --- a/Documentation/lei-add-external.pod +++ b/Documentation/lei-add-external.pod @@ -75,7 +75,9 @@ Default: C<auto> =item --inbox-version=NUM -Force a public-inbox version (must be C<1> or C<2>). +Force a remote public-inbox version (must be C<1> or C<2>). +This is auto-detected by default, and this option exists mainly +for testing. =back diff --git a/Documentation/lei-config.pod b/Documentation/lei-config.pod index 663404fe..699f45cb 100644 --- a/Documentation/lei-config.pod +++ b/Documentation/lei-config.pod @@ -4,7 +4,11 @@ lei-config - git-config wrapper for lei configuration file =head1 SYNOPSIS -lei config [OPTIONS] +lei config <name> [[<value>] [<value-pattern>]] + +lei config -l | --list + +lei config -e | --edit =head1 DESCRIPTION @@ -60,8 +64,8 @@ L<https://rt.cpan.org/Ticket/Display.html?id=129967> Enable debugging output of underlying IMAP and NNTP libraries, currently L<Mail::IMAPClient> and L<Net::NNTP>, respectively. -If using L<imap.proxy> or L<nntp.proxy> point to a SOCKS proxy, -debugging output for L<IO::Socket::Socks> will be enabled, as +If L<imap.proxy> or L<nntp.proxy> points to a SOCKS proxy, +debugging output for L<IO::Socket::Socks> will be enabled as well. Disabling L<imap.compress> may be required for readability. @@ -97,6 +101,27 @@ C<frag>, C<func>, and C<context>. =back +=head1 OPTIONS + +Most L<git-config(1)> command-line switches are accepted by C<lei config> +as is. The most frequently used options are expected to be: + +=over 4 + +=item -e + +=item --edit + +Open an editor to edit the lei config file. + +=item -l + +=item --list + +List all variables set in config file, along with their values. + +=back + =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> @@ -106,6 +131,6 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/lei-daemon-kill.pod b/Documentation/lei-daemon-kill.pod index 48c237b8..50f75f4c 100644 --- a/Documentation/lei-daemon-kill.pod +++ b/Documentation/lei-daemon-kill.pod @@ -26,12 +26,12 @@ so another L<lei-daemon(8)> process can take its place. =item SIGKILL Kills L<lei-daemon(8)> immediately. Some worker processes may -remain running after a short while after this takes effect. +remain running for a short while. =back =for comment -SIGQUIT and SIGINT currently do what SIGTERM do, may change... +SIGQUIT and SIGINT currently do what SIGTERM does, may change... =head1 CONTACT diff --git a/Documentation/lei-edit-search.pod b/Documentation/lei-edit-search.pod index 21cb11aa..7f447ca2 100644 --- a/Documentation/lei-edit-search.pod +++ b/Documentation/lei-edit-search.pod @@ -8,7 +8,9 @@ lei edit-search [OPTIONS] OUTPUT =head1 DESCRIPTION -Invoke C<git config --edit> to edit the saved search at C<OUTPUT>. +Invoke C<git config --edit> to edit the saved search at C<OUTPUT>, +where C<OUTPUT> was supplied for argument of C<lei q -o OUTPUT ...> +A listing of outputs is available via C<lei ls-search>. =head1 CONTACT @@ -19,7 +21,7 @@ and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta =head1 COPYRIGHT -Copyright 2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/lei-forget-search.pod b/Documentation/lei-forget-search.pod index adbe7638..5ff526f1 100644 --- a/Documentation/lei-forget-search.pod +++ b/Documentation/lei-forget-search.pod @@ -8,7 +8,9 @@ lei forget-search [OPTIONS] OUTPUT =head1 DESCRIPTION -Forget a saved search at C<OUTPUT>. +Forget a saved search at C<OUTPUT>, +where C<OUTPUT> was supplied for argument of C<lei q -o OUTPUT ...> +A listing of outputs is available via C<lei ls-search>. =head1 OPTIONS diff --git a/Documentation/lei-import.pod b/Documentation/lei-import.pod index 4ac7dccd..31d6db13 100644 --- a/Documentation/lei-import.pod +++ b/Documentation/lei-import.pod @@ -10,7 +10,8 @@ lei import [OPTIONS] (--stdin|-) =head1 DESCRIPTION -Import messages into the local storage of L<lei(1)>. C<LOCATION> is a +Import messages into the local storage of L<lei(1)> +(aka L<leiE<sol>store|lei-store-format(5)>). C<LOCATION> is a source of messages: a directory (Maildir), a file, or a URL (C<imap://>, C<imaps://>, C<nntp://>, or C<nntps://>). URLs requiring authentication use L<git-credential(1)> to @@ -85,8 +86,13 @@ Default: C<auto> Use the specified proxy (e.g., C<socks5h://0:9050>). +Consider L<imap.proxy> and L<nntp.proxy> which can be persistently +configured on a per-host basis in L<lei-config(1)>. + =back +See L<lei-config(1)> for various C<imap.*> and C<nntp.*> options. + =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> @@ -102,4 +108,4 @@ License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO -L<lei-index(1)> +L<lei-config(1)>, L<lei-index(1)>, L<lei-store-format(5)> diff --git a/Documentation/lei-inspect.pod b/Documentation/lei-inspect.pod index 19dd8ab5..82b9651a 100644 --- a/Documentation/lei-inspect.pod +++ b/Documentation/lei-inspect.pod @@ -26,7 +26,7 @@ An inboxdir, extindex topdir, or Xapian shard =item --pretty -Pretty print output. If stdout is opened to a tty, C<--pretty> is +Pretty-print output. If stdout is opened to a tty, C<--pretty> is enabled by default. =item - @@ -47,7 +47,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/lei-lcat.pod b/Documentation/lei-lcat.pod index e8073862..530b755e 100644 --- a/Documentation/lei-lcat.pod +++ b/Documentation/lei-lcat.pod @@ -31,7 +31,7 @@ C<-f text> when writing to stdout. Most commonly C<text> (the default) or C<reply> to display the message(s) in a format suitable for trimming -and sending as a email reply. +and sending as an email reply. =item --stdin diff --git a/Documentation/lei-ls-mail-source.pod b/Documentation/lei-ls-mail-source.pod index 59d14afe..0e485923 100644 --- a/Documentation/lei-ls-mail-source.pod +++ b/Documentation/lei-ls-mail-source.pod @@ -28,7 +28,7 @@ Format output as JSON and include more information. =item --pretty -Pretty print JSON output. If stdout is opened to a tty, C<--pretty> +Pretty-print JSON output. If stdout is opened to a tty, C<--pretty> is enabled by default. =item --ascii diff --git a/Documentation/lei-ls-search.pod b/Documentation/lei-ls-search.pod index a56611bf..367f4ad6 100644 --- a/Documentation/lei-ls-search.pod +++ b/Documentation/lei-ls-search.pod @@ -8,7 +8,8 @@ lei ls-search [OPTIONS] [PREFIX] =head1 DESCRIPTION -List saved search queries. If C<PREFIX> is given, restrict the output +List saved search queries (generated from C<lei q -o OUTPUT>). +If C<PREFIX> is given, restrict the output to entries that start with the specified value. =head1 OPTIONS @@ -25,7 +26,7 @@ C<jsonl>, or C<concatjson>. =item --pretty -Pretty print C<json> or C<concatjson> output. If stdout is opened to +Pretty-print C<json> or C<concatjson> output. If stdout is opened to a tty and used as the C<--output> destination, C<--pretty> is enabled by default. @@ -55,7 +56,7 @@ and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta =head1 COPYRIGHT -Copyright 2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/lei-mail-formats.pod b/Documentation/lei-mail-formats.pod index 930c5d76..618bada2 100644 --- a/Documentation/lei-mail-formats.pod +++ b/Documentation/lei-mail-formats.pod @@ -83,9 +83,19 @@ mbox. =head1 MH -Not yet supported, locking semantics (or lack thereof) appear to -make it unsuitable for parallel access. It is widely-supported -by a variety of MUAs and mailing list managers, however. +Preliminary support for reads as of 2.0.0. Locking semantics differ +incompatibly amongst existing writers: Python and nmh appear +compatible with each other, while mutt appears racy and unsuitable +for parallel access due to rename(2) potentially clobbering the +C<.mh_sequences> file. More info about other clients is greatly +appreciated. + +Sequence numbers may be packed and reused by some writers, so lei +users may need to run L<lei-refresh-mail-sync(1)> if inotify|kevent +missed packing while L<lei-daemon(8)> wasn't running. + +lei is safe for reading mlmmj archives as MH since mlmmj neither +packs nor uses a .mh_sequences file to store state. =head1 MMDF diff --git a/Documentation/lei-mail-sync-overview.pod b/Documentation/lei-mail-sync-overview.pod index e30674bb..7ae7e887 100644 --- a/Documentation/lei-mail-sync-overview.pod +++ b/Documentation/lei-mail-sync-overview.pod @@ -20,7 +20,7 @@ Future work will be done to improve it and add IMAP IDLE support. # dump "inbox" labeled files from the past week to a Maildir lei q L:inbox rt:last.week.. -o /tmp/results - # open /tmp/results in your favorite mail agent. If inotify or kevent + # Open /tmp/results in your favorite mail agent. If inotify or kevent # works, keyword changes (e.g. marking messages as `seen') are # synchronized automatically. diff --git a/Documentation/lei-overview.pod b/Documentation/lei-overview.pod index 7095b504..e9a97d64 100644 --- a/Documentation/lei-overview.pod +++ b/Documentation/lei-overview.pod @@ -119,11 +119,13 @@ code repository. =head1 PERFORMANCE NOTES -L<Inline::C> is required, lei runs as a background daemon to reduce -startup costs and can provide real-time L<kqueue(2)>/L<inotify(7)> -Maildir monitoring. L<IO::KQueue> (p5-IO-KQueue on FreeBSD) and -L<Linux::Inotify2> (liblinux-inotify2-perl and perl-Linux-Inotify2 in -.deb and .rpm-based distros, respectively) are recommended. +L<Inline::C> is required on BSDs and can speed things up on Linux. + +lei runs as a background daemon to reduce startup costs and can +provide real-time L<kqueue(2)>/L<inotify(7)> Maildir monitoring. +L<IO::KQueue> (p5-IO-KQueue on FreeBSD) and L<Linux::Inotify2> +(liblinux-inotify2-perl and perl-Linux-Inotify2 in .deb and .rpm-based +distros, respectively) are recommended. L<Socket::MsgHdr> is optional (libsocket-msghdr-perl in Debian), and further improves startup performance. Its effect is most felt diff --git a/Documentation/lei-q.pod b/Documentation/lei-q.pod index 2f0c3bc6..79156750 100644 --- a/Documentation/lei-q.pod +++ b/Documentation/lei-q.pod @@ -12,9 +12,6 @@ lei q [OPTIONS] (--stdin|-) Search for messages across the lei/store and externals. -=for comment -TODO: Give common prefixes, or at least a description/reference. - =head1 OPTIONS =for comment @@ -50,6 +47,10 @@ A prefix can specify the format of the output: C<maildir>, C<mboxrd>, C<mboxcl2>, C<mboxcl>, C<mboxo>. For a description of mail formats, see L<lei-mail-formats(5)>. +C<v2:/path/to/inbox> may be used to create a new inbox of +L<public-inbox-v2-format(5)>. The new inbox will not be configured +in the L<public-inbox-config(5)> file. + C<maildir> is the default for an existing directory or non-existing path. Default: C<-> (stdout) @@ -76,7 +77,7 @@ Disable color (for C<-f reply> and C<-f text>). =item --pretty -Pretty print C<json> or C<concatjson> output. If stdout is opened to +Pretty-print C<json> or C<concatjson> output. If stdout is opened to a tty and used as the C<--output> destination, C<--pretty> is enabled by default. @@ -107,8 +108,9 @@ Augment output destination instead of clobbering it. =item --no-import-before -Do not import keywords before writing to an existing output -destination. +Do not import messages before writing to an existing output destination. +Be certain you do not need existing data in your output before using +this, it permanently erases data unless C<--augment> is used. =item --threads @@ -124,6 +126,37 @@ of the same thread. TODO: Warning: this flag may become persistent and saved in lei/store unless an MUA unflags it! (Behavior undecided) +Caveat: C<-tt> only works on locally-indexed messages at the +moment, and not on remote (HTTP(S)) endpoints. + +=item --thread-id=MSGID + +=item -T MSGID + +Only search messages in the same thread as the given Message-ID. + +For HTTP(S) externals, this only works on instances running +public-inbox 2.0+ (UNRELEASED). + +=item --jobs=QUERY_WORKERS[,WRITE_WORKERS] + +=item --jobs=,WRITE_WORKERS + +=item -j QUERY_WORKERS[,WRITE_WORKERS] + +=item -j ,WRITE_WORKERS + +Set the number of query and write worker processes for parallelism. + +C<QUERY_WORKERS> defaults to the number of CPUs available, but 4 per +remote (HTTP/HTTPS) host. + +C<WRITE_WORKERS> defaults to 75% of the number of CPUs available for +Maildir and mbox* destinations, but 4 per IMAP/IMAPS host. + +Omitting C<QUERY_WORKERS> but leaving the comma (C<,>) allows +one to only set C<WRITE_WORKERS> + =item --dedupe=STRATEGY =item -d STRATEGY @@ -194,7 +227,7 @@ Default: fcntl,dotlock =item -n NUMBER -Fuzzy limit the number of matches per-local external and lei/store. +Fuzzy-limit the number of matches per local external and lei/store. Messages added by the L<--threads> switch do not count towards this limit, and there is no limit on remote externals. diff --git a/Documentation/lei-reindex.pod b/Documentation/lei-reindex.pod new file mode 100644 index 00000000..3a5861c4 --- /dev/null +++ b/Documentation/lei-reindex.pod @@ -0,0 +1,47 @@ +=head1 NAME + +lei-reindex - reindex messages already in lei/store + +=head1 SYNOPSIS + +lei reindex [OPTIONS] + +=head1 DESCRIPTION + +Forces a re-index of all messages previously-indexed by L<lei-import(1)> +or L<lei-index(1)>. This can be used for in-place upgrades and bugfixes +while other processes are querying the store. Keep in mind this roughly +doubles the size of the already-large Xapian database. + +It does not re-index messages in externals, using the C<--reindex> +switch of L<public-inbox-index(1)> or L<public-inbox-extindex(1)> is +needed for that. + +=head1 OPTIONS + +=over + +=item -q + +=item --quiet + +Suppress feedback messages. + +=back + +=head1 CONTACT + +Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> + +The mail archives are hosted at L<https://public-inbox.org/meta/> and +L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> + +=head1 COPYRIGHT + +Copyright all contributors L<mailto:meta@public-inbox.org> + +License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> + +=head1 SEE ALSO + +L<lei-index(1)>, L<lei-import(1)> diff --git a/Documentation/lei-security.pod b/Documentation/lei-security.pod index 104bfb48..e54cae90 100644 --- a/Documentation/lei-security.pod +++ b/Documentation/lei-security.pod @@ -4,7 +4,7 @@ lei - security information =head1 SYNOPSIS -L<lei(1)> is intended for use with both publicly-archived +L<lei(1)> is intended for use with both publicly archived and "private" mail in personal mailboxes. This document is intended to give an overview of security implications and lower^Wmanage user expectations. @@ -66,7 +66,7 @@ other users on the local system. =head1 CORE DUMPS -In case any process crashes, a core dumps may contain passwords or +In case any process crashes, a core dump may contain passwords or contents of sensitive messages. Please report these so they can be fixed (see L</CONTACT>). @@ -74,7 +74,7 @@ fixed (see L</CONTACT>). lei currently uses the L<curl(1)> and L<git(1)> executables in C<$PATH> for HTTP and HTTPS network access. Interactive -authentication for HTTP and HTTPS is not-yet-supported since all +authentication for HTTP and HTTPS is not yet supported since all currently supported HTTP/HTTPS sources are L<PublicInbox::WWW> instances. @@ -83,7 +83,7 @@ L<Net::NNTP> (standard library) is used for NNTP and NNTPS. L<Mail::IMAPClient> and L<Net::NNTP> will use L<IO::Socket::SSL> for TLS if available. In turn, L<IO::Socket::SSL> uses the -widely-installed OpenSSL library. +widely installed OpenSSL library. STARTTLS will be attempted if advertised by the server unless IMAPS or NNTPS are used. C<-c imap.starttls=0> diff --git a/Documentation/lei-store-format.pod b/Documentation/lei-store-format.pod index 625c60f4..d4bb42d5 100644 --- a/Documentation/lei-store-format.pod +++ b/Documentation/lei-store-format.pod @@ -67,7 +67,7 @@ the "same" message. =head2 mail_sync.sqlite3 -This SQLite database maintained for bidirectional mapping of +This SQLite database is maintained for bidirectional mapping of git blobs to IMAP UIDs, Maildir file names, and NNTP article numbers. It is also used for retrieving messages from Maildirs indexed by diff --git a/Documentation/lei-up.pod b/Documentation/lei-up.pod index ac644a96..8c426942 100644 --- a/Documentation/lei-up.pod +++ b/Documentation/lei-up.pod @@ -32,8 +32,8 @@ delays or downtime. The time(s) of the last successful queries are the C<lastresult> values visible from L<lei-edit-search(1)>. -Date formats understood by L<git-rev-parse(1)> may be used. -e.g C<1.hour> or C<3.days> +Date formats understood by L<git-rev-parse(1)> may be used, +e.g., C<1.hour> or C<3.days>. Default: 2.days @@ -64,7 +64,9 @@ specified via C<lei q --only>. =item --mua=CMD -C<--lock>, C<--alert>, and C<--mua> are all supported and +=item --jobs QUERY_WORKERS[,WRITE_WORKERS] + +C<--lock>, C<--alert>, C<--mua>, and C<--jobs> are all supported and documented in L<lei-q(1)>. C<--mua> is incompatible with C<--all>. diff --git a/Documentation/lei.pod b/Documentation/lei.pod index f01f506a..2b10f490 100644 --- a/Documentation/lei.pod +++ b/Documentation/lei.pod @@ -126,7 +126,7 @@ Other subcommands include =head1 FILES -By default storage is located at C<$XDG_DATA_HOME/lei/store>. The +By default, storage is located at C<$XDG_DATA_HOME/lei/store>. The configuration for lei resides at C<$XDG_CONFIG_HOME/lei/config>. =head1 ERRORS diff --git a/Documentation/marketing.txt b/Documentation/marketing.txt index 385e5172..8e4aa3b5 100644 --- a/Documentation/marketing.txt +++ b/Documentation/marketing.txt @@ -3,7 +3,9 @@ marketing guide for public-inbox TL; DR: Don't market this. If you must: don't be pushy and annoying about it. Slow down. -Please no superlatives, hype or BS. +Please no superlatives, hype or BS. Please keep all marketing +materials text-only to be accessible to those on slow networks +and ancient hardware. It's online and public, so it already markets itself. Being informative is not a bad thing, being insistent is. @@ -25,3 +27,12 @@ than the adoption of any software. Every time somebody recognizes and rejects various forms of lock-in and centralization is already a victory for us. + +Please keep in mind: + +* Perl 5 is not a well-liked language +* AGPL is not a well-liked license +* maintainer is a shy introvert + +Be sure to mention these things in any marketing materials +to avoid wasting time of people who hate Perl and/or AGPL. diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl index 1936cea7..001ad310 100755 --- a/Documentation/mknews.perl +++ b/Documentation/mknews.perl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Generates NEWS, NEWS.atom, and NEWS.html files using release emails # this uses unstable internal APIs of public-inbox, and this script @@ -46,6 +46,11 @@ if ($dst eq 'NEWS') { ibx => $ibx, -upfx => "$base_url/", -hr => 1, + zfh => $out, + env => { + HTTP_HOST => 'public-inbox.org', + 'psgi.url_scheme' => 'https', + }, }; if ($dst eq 'NEWS.html') { html_start($out, $ctx); diff --git a/Documentation/public-inbox-cindex.pod b/Documentation/public-inbox-cindex.pod new file mode 100644 index 00000000..fdc2b82d --- /dev/null +++ b/Documentation/public-inbox-cindex.pod @@ -0,0 +1,136 @@ +=head1 NAME + +public-inbox-cindex - create and update code repository search indices + +=head1 SYNOPSIS + +public-inbox-cindex [OPTIONS] -g GIT_DIR [-g GIT_DIR]... + +public-inbox-cindex [OPTIONS] --update + +=head1 DESCRIPTION + +public-inbox-cindex creates and updates the Xapian search index for +git code repository (C<coderepo>) search. It can also associate +(fuzzy join) coderepos with Xapian-indexed inboxes. It only indexes +commit messages and diffs as they would show up in an email. It +does not index the contents of blobs directly. + +Like inbox indices, coderepo indices can either be internal or external +to a coderepo. Either way, they're both created and updated through +public-inbox-cindex. + +Once the initial indices are created by public-inbox-cindex, +the L</--update> switch will incrementally update them. + +=head1 OPTIONS + +=over + +=item -d EXTDIR + +Use the given directory as an external index. External indices are +generally recommended to internal indices since they do not need +write access to any code repositories themselves. They are highly +recommended when many repositories share a common history or if +there is an M:N relationship between inboxes and coderepos. + +=item -j JOBS + +=item --jobs=JOBS + +Influences the number of Xapian indexing shards. + +If the repo has not been indexed or initialized, C<JOBS - 1> +shards will be created. + +Default: the number of existing Xapian shards + +=item --reindex + +Forces a re-index of all commits. This can be used for in-place +upgrades and bugfixes while read-only processes are utilizing the index. + +=item --update + +=item -u + +Incrementally index all previously-indexed coderepos. + +=item --prune + +Removes commits which are no longer accessible via git. +Use this after L<git-gc(1)> (or L<git-prune(1)>). + +=item --no-fsync + +=item --dangerous + +=item --max-size SIZE + +=item --batch-size SIZE + +These affect the coderepo index the same way they affect +inbox indices. See L<public-inbox-index(1)>. + +=back + +=head1 FILES + +For internal indices, the Xapian DB is stored in +C<$GIT_DIR/public-inbox-cindex>. + +External indices are stored wherever L</-d> EXTDIR points. + +=head1 CONFIGURATION + +=over 8 + +=item publicinbox.indexMaxSize + +=item publicinbox.indexBatchSize + +These configuration knobs affect the coderepo index the same way +they affect inbox indices. See L<public-inbox-index(1)>. + +=back + +=head1 ENVIRONMENT + +=over 8 + +=item PI_CONFIG + +Used to override the default "~/.public-inbox/config" value. + +=item XAPIAN_FLUSH_THRESHOLD + +The number of documents to update before committing changes to +disk. This variable is handled directly by Xapian, refer to +Xapian API documentation for more details. + +Use C<publicinbox.indexBatchSize> instead. + +=back + +=head1 UPGRADING + +Occasionally, public-inbox will update its schema version and +require a full reindex by running this command with L</--reindex>. + +=head1 CONTACT + +Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> + +The mail archives are hosted at L<https://public-inbox.org/meta/> and +L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> + +=head1 COPYRIGHT + +Copyright all contributors L<mailto:meta@public-inbox.org> + +License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> + +=head1 SEE ALSO + +L<public-inbox-index(1)> diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod index c80c3c5f..64ee3138 100644 --- a/Documentation/public-inbox-clone.pod +++ b/Documentation/public-inbox-clone.pod @@ -4,7 +4,9 @@ public-inbox-clone - "git clone --mirror" wrapper =head1 SYNOPSIS -public-inbox-clone INBOX_URL [INBOX_DIR] +public-inbox-clone [OPTIONS] INBOX_URL [INBOX_DIR] + +public-inbox-clone [OPTIONS] ROOT_URL [DESTINATION] # public-inbox 2.0+ =head1 DESCRIPTION @@ -13,20 +15,29 @@ making the initial clone of a remote HTTP(S) public-inbox. It allows cloning multi-epoch v2 inboxes with a single command and zero configuration. +In public-inbox 2.0+, public-inbox-clone can create and maintain +a mirror of multiple inboxes or code repositories using manifest.js.gz +files like L<grok-pull(1)> from grokmirror. L<public-inbox-fetch(1)> is +NOT required when using this mode. + It does not run L<public-inbox-init(1)> nor L<public-inbox-index(1)>. Those commands must be run separately if serving/searching the mirror is required. As-is, -public-inbox-clone is suitable for creating a git-only backup. +public-inbox-clone is suitable for creating a git-only backup +without Xapian and SQLite indices. -public-inbox-clone creates a Makefile with handy targets to update the -inbox once indexed. This Makefile may be edited by the user; it will +When cloning a single inbox, public-inbox-clone creates a Makefile +with handy targets to update the inbox once indexed. +This Makefile may be edited by the user; it will not be rewritten by L<public-inbox-fetch(1)> unless it is removed completely. public-inbox-clone does not use nor require any extra -configuration files (not even C<~/.public-inbox/config>). +configuration files (not even C<~/.public-inbox/config>), +but it can download snippets suitable for adding to any +L<public-inbox-config(5)> file. -L<public-inbox-fetch(1)> may be used to keep C<INBOX_DIR> +L<public-inbox-fetch(1)> may be used to keep a single C<INBOX_DIR> up-to-date. For v2 inboxes, it will create a C<$INBOX_DIR/manifest.js.gz> @@ -51,6 +62,157 @@ C<--epoch=~2..> clones the three latest epochs. Default: C<0..~0> or C<0..> or C<..~0> (all epochs, all three examples are equivalent) +=item -I PATTERN + +=item --include=PATTERN + +When cloning a top-level with multiple inboxes via manifest, +only clone inboxes and repositories matching a given wildcard pattern +(using C<*?> and C<[]> is supported). + +This is a new option in public-inbox 2.0+ + +=item --exclude=PATTERN + +When cloning a top-level with multiple inboxes via manifest, +ignore inboxes and repositories matching the given wildcard pattern. +Supports the same wildcards as L</--include> + +This is a new option in public-inbox 2.0+ + +=item --inbox-config=always|v2|v1|never + +Whether or not to retrieve the C<$INBOX/_/text/config/raw> HTTP(S) +endpoint when cloning. + +Since we can't deduce v1 inboxes from code repositories, setting this +to C<v2> or C<never> can allow faster clones of code repositories if +no v1 inboxes are present. + +Default: C<always> + +This is a new option in public-inbox 2.0+ + +=item --inbox-version=NUM + +Force a remote public-inbox version (must be C<1> or C<2>). +This is auto-detected by default, and this option exists mainly +for testing. + +This is a new option in public-inbox 2.0+ + +=item --objstore=DIR + +Enables space savings when the remote C<manifest.js.gz> +includes C<forkgroup> entries as generated by grokmirror 2.x. + +If C<DIR> does not start with C</>, C<./>, or C<../>, it is treated +as relative to the C<DESTINATION> directory. If only C<--objstore=> +is specified where C<DIR> is an empty string (C<"">), then C<objstore> +(C<$DESTINATION/objstore>) is the implied value of C<DIR>. + +This is a new option in public-inbox 2.0+ + +=item --manifest=FILE + +When incrementally updating an existing mirror, load the given +manifest (typically C<manifest.js.gz>) to speed up updates. + +By default, public-inbox writes the retrieved manifest to +C<$DESTINATION/manifest.js.gz>, this directive also +changes the destination to the specified C<FILE> + +If C<FILE> does not start with C</>, C<./>, or C<../>, it is treated +as relative to the C<DESTINATION> directory. If only C<--manifest=> +is specified where C<FILE> is an empty string (C<"">), then C<manifest.js.gz> +(C<$DESTINATION/manifest.js.gz>) is the implied value of C<FILE>. + +When updating manifests with many forks using the same objstore, +git 2.41+ is highly recommended for performance as we automatically +use the C<fetch.hideRefs> feature to speed up negotiation. + +C<--manifest=> is a new option in public-inbox 2.0+ + +=item --remote-manifest=URL|RELATIVE_PATH + +Use an alternate location for the remote manifest.js.gz file. +This may be specified as a full absolute URL (e.g +C<--remote-manifest=https://80x24.org/lore/pub/manifest.js.gz>), +or a pathname relative to the ROOT_URL (e.g +C<--remote-manifest=pub/manifest.js.gz> when ROOT_URL is +C<https://80x24.org/lore/> + +By default, C<ROOT_URL/manifest.js.gz> is used. + +This is a new option in public-inbox 2.0+ + +=item --project-list=FILE + +When cloning code repos from a manifest, generate a cgit-compatible +project list. + +If C<FILE> does not start with C</>, C<./>, or C<../>, it is treated +as relative to the C<DESTINATION> directory. If only C<--project-list=> +is specified where C<FILE> is an empty string (C<"">), then C<projects.list> +(C<$DESTINATION/projects.list>) is the implied value of C<FILE>. + +This is a new option in public-inbox 2.0+ + +=item --post-update-hook=COMMAND + +Hooks to run after a repository is cloned or updated, C<COMMAND> will +have the bare git repository destination given as its first and only +argument. + +For v2 inboxes, this operates on a per-epoch basis. + +May be specified multiple times to run multiple commands in the +order specified on the command-line. + +This is a new option in public-inbox 2.0+ + +=item -p + +=item --prune + +Pass the C<--prune> and C<--prune-tags> flags to L<git-fetch(1)> +calls on incremental clones. + +This is a new option in public-inbox 2.0+ + +=item --purge + +Deletes entire repos which no longer exist in the remote manifest, +or are filtered out by C<--include=> or C<--exclude=>. + +This is only useful when using C<--manifest> + +This is a new option in public-inbox 2.0+ + +=item --exit-code + +Exit with C<127> if no updates are done when relying on a manifest. +Updates include fingerprint mismatches in the manifest, new symlinks, +new repositories, and removed repositories from the L<--project-list> + +This is a new option in public-inbox 2.0+ + +=item -k + +=item --keep-going + +Continue as much as possible after an error. + +This is a new option in public-inbox 2.0+ + +=item -n + +=item --dry-run + +Show what would be done, without making any changes. + +This is a new option in public-inbox 2.0+ + =item -q =item --quiet @@ -71,6 +233,40 @@ Whether to wrap L<git(1)> and L<curl(1)> commands with L<torsocks(1)>. Default: C<auto> +=item -j JOBS + +=item --jobs=JOBS + +The number of parallel processes to spawn at once for various network +operations using L<git(1)> and/or L<curl(1)>. + +=back + +=head1 EXAMPLES + +=for comment +Sticking to smaller projects in examples to minimize load on servers + +=over + +=item To mirror the most recent epochs of dwarves and LTTng inboxes: + + public-inbox-clone --epoch=~0 \ + --include='*lttng*' --include='*dwarves' \ + https://80x24.org/lore/ /path/to/inbox-mirror + +C<https://lore.kernel.org/> may be used instead of C<https://80x24.org/lore/> + +=item To mirror all code repos of the sparse project: + + public-inbox-clone --objstore= --project-list= --prune \ + --include='*sparse*' --inbox-config=never \ + --remote-manifest=https://80x24.org/lore/pub/manifest.js.gz \ + https://80x24.org/lore/ /path/to/code-mirror + +C<https://git.kernel.org/> may be used instead of C<https://80x24.org/lore/> +and the C<--remote-manifest> option can be omitted. + =back =head1 CONTACT diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index d8504e61..b4a1d94d 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -74,6 +74,11 @@ Omitting this for a given inbox will prevent the inbox from being served by L<public-inbox-nntpd(1)>, L<public-inbox-imapd(1)>, and/or L<public-inbox-pop3d(1)> +Newsgroup names should be all lowercase. Uppercase characters are +converted to lowercase for compatibility with IMAP, POP3, and our +L<public-inbox-extindex(1)> and L<public-inbox-cindex(1)> tools +starting with public-inbox 2.0+ (they were unusable before). + Default: none, optional =item publicinbox.<name>.watch @@ -125,8 +130,8 @@ C<basic> only requires L<DBD::SQLite(3pm)> and provides all NNTP functionality along with thread-awareness in the WWW interface. -C<medium> requires L<Search::Xapian(3pm)> to provide full-text -term search functionality in the WWW UI. +C<medium> requires L<Xapian(3pm)> or L<Search::Xapian(3pm)> to provide +full-text term search functionality in the WWW UI. C<full> also includes positional information used by Xapian to allow for searching for phrases using quoted text. @@ -191,11 +196,28 @@ Default: :all The local path name of a CSS file for the PSGI web interface. May contain the attributes "media", "title" and "href" which match the associated attributes of the HTML <style> tag. -"href" may be specified to point to the URL of an remote CSS file +"href" may be specified to point to the URL of a remote CSS file and the path may be "/dev/null" or any empty file. Multiple files may be specified and will be included in the order specified. +=item publicinboxImport.dropUniqueUnsubscribe + +Drop C<List-Unsubscribe> headers if the message also includes +the C<List-Unsubscribe-Post: List-Unsubscribe=One-Click> header +to signal MUAs to support an instantaneous unsubscribe. This +is strongly recommended for users creating their own public +archives of mailing lists they subscribe to, otherwise any +archive reader can unsubscribe the archivist. + +This may break DKIM signatures if the C<List-Unsubscribe*> +headers are signed, but breaking DKIM signatures is the +lesser evil compared to allowing any reader to unsubscribe +the archivist. + +This affects L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, +and L<public-inbox-learn(1)> + =item publicinboxmda.spamcheck This may be set to C<none> to disable the use of SpamAssassin @@ -251,7 +273,9 @@ Default: 25 A comma-delimited list of listings to hide the inbox from. -Valid values are currently C<www> and C<manifest>. +Valid values are currently C<www> and C<manifest> for non-C<404> +values of L</publicinbox.wwwListing> and L</publicinbox.grokManifest>, +respectively Default: none @@ -265,6 +289,10 @@ The URL of the cgit instance associated with the coderepo. Default: none +=item coderepo.snapshots + +See C<snapshots> in L<cgitrc(5)> + =item publicinbox.cgitrc A path to a L<cgitrc(5)> file. "repo.url" directives in the cgitrc @@ -287,18 +315,50 @@ Default: /var/www/htdocs/cgit/cgit.cgi or /usr/lib/cgit/cgit.cgi =item publicinbox.cgitdata A path to the data directory used by cgit for storing static files. -Typically guessed based the location of C<cgit.cgi> (from -C<publicinbox.cgitbin>, but may be overridden. +Typically guessed based on the location of C<cgit.cgi> (from +C<publicinbox.cgitbin>), but may be overridden. -Default: basename of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/ +Default: dirname of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/ or /usr/share/cgit/ +=item publicinbox.cgit + +Controls whether or not and how C<cgit> is used for serving coderepos. +New in public-inbox 2.0.0 (PENDING). + +=over 8 + +=item * first + +Try using C<cgit> as the first choice, this is the default. + +=item * fallback + +Fall back to using C<cgit> only if our native, inbox-aware +git code repository viewer doesn't recognize the URL. + +=begin comment +=for comment rewrite is not yet implemented +=item * rewrite + +Rewrite C<cgit> URLs for our native, inbox-aware code repository viewer. +This implies C<fallback> for URLs the native viewer does not recognize. + +=end comment + +=back + +Default: C<first> (C<cgit> will be used iff C<publicinbox.cgitrc> +is set and the C<cgit> binary exists). + =item publicinbox.mailEditor See L<public-inbox-edit(1)> =item publicinbox.indexMaxSize + =item publicinbox.indexBatchSize + =item publicinbox.indexSequentialShard See L<public-inbox-index(1)> @@ -327,6 +387,21 @@ TODO support showing cgit listing Default: C<404> +=item publicinbox.nameIsUrl + +Treat the name of the public inbox as its unqualified URL when +using C<publicInbox.wwwListing=all>. That is, every +C<[publicinbox "foo"]> section implicitly sets C<publicinbox.foo.url=foo>. + +This is a convenient alternative to specifying +C<publicinbox.E<lt>nameE<gt>.url> for every single inbox if +your inbox URLs are domain-agnostic when using +C<publicInbox.wwwListing=all> + +Default: false + +New in public-inbox 2.0.0 (PENDING). + =item publicinbox.grokmanifest Controls the generation of a grokmirror-compatible gzipped JSON file diff --git a/Documentation/public-inbox-daemon.pod b/Documentation/public-inbox-daemon.pod index 81a79a10..092be667 100644 --- a/Documentation/public-inbox-daemon.pod +++ b/Documentation/public-inbox-daemon.pod @@ -79,9 +79,9 @@ C<err=> may also be specified on a per-listener basis. Default: /dev/null with C<--daemonize>, inherited otherwise -=item -W +=item -W INTEGER -=item --worker-processes +=item --worker-processes INTEGER Set the number of worker processes. @@ -96,31 +96,81 @@ the master on crashes. Default: 1 +=item -X INTEGER + +=item --xapian-helpers INTEGER + +Enables the use of Xapian helper processes to handle expensive, +non-deterministic Xapian search queries asynchronously without +blocking simple requests. + +With positive values, there is an additional manager process +that can be signaled to control the number of Xapian helper workers. + +* C<-X0> one worker, no manager process +* C<-X1> one worker, one manager process +... +* C<-X8> eight workers, one manager process + +As with the public-facing public-inbox-* daemons, sending C<SIGTTIN> +or C<SIGTTOU> to the Xapian helper manager process will increment or +decrement the number of workers. + +Both Xapian helper workers and managers automatically respawn if they +crash or are explicitly killed, even with C<-X0>. + +A C++ compiler, L<pkg-config(1)>, and Xapian development files (e.g. +C<libxapian-dev> or C<xapian*-core-dev*>) are required to gain access to +some expensive queries and significant memory savings. + +Xapian helper workers are shared by all C<--worker-processes> of the +Perl daemon for additional memory savings. + +New in public-inbox 2.0.0. + +Default: undefined, search queries are handled synchronously + =item --cert /path/to/cert The default TLS certificate for HTTPS, IMAPS, NNTPS, POP3S and/or STARTTLS support if the C<cert> option is not given with C<--listen>. -Well-known TCP ports automatically get TLS or STARTTLS support -If using systemd-compatible socket activation and a TCP listener -on port well-known ports (563 is inherited, it is automatically -NNTPS when this option is given. When a listener on port 119 is -inherited and this option is given, it automatically gets -STARTTLS support. +With this option, well-known TCP ports automatically get TLS or STARTTLS +support if using systemd-compatible socket activation. That is, ports +443, 563, 993, and 995 support HTTPS, NNTPS, IMAPS, and POP3S, +respectively; while ports 110, 119, and 143 support STARTTLS on POP3, +NNTP, and IMAP, respectively. =item --key /path/to/key The default TLS certificate key for the default C<--cert> or per-listener C<cert=> option. The private key may be -concatenated into the path used by the cert, in which case this +concatenated into the cert file itself, in which case this option is not needed. +=item --multi-accept INTEGER + +By default, each worker accepts one connection at a time to maximize +fairness and minimize contention across multiple processes on a +shared listen socket. Accepting multiple connections at once may be +useful in constrained deployments with few, heavily loaded workers. +Negative values enables a worker to accept all available clients at +once, possibly starving others in the process. C<-1> behaves like +C<multi_accept yes> in nginx; while C<0> (the default) is +C<multi_accept no> in nginx. Positive values allow +fine-tuning without the runaway behavior of C<-1>. + +This may be specified on a per-listener basis via the C<multi-accept=> +per-listener directive (e.g. C<-l http://127.0.0.1?multi-accept=1>). + +Default: 0 + =back =head1 SIGNALS Most of our signal handling behavior is copied from L<nginx(8)> -and/or L<starman(1)>; so it is possible to reuse common scripts +and/or L<starman(1)>, so it is possible to reuse common scripts for managing them. =over 8 @@ -141,7 +191,7 @@ Reload config files associated with the process. =item SIGTTIN -Increase the number of running workers processes by one. +Increase the number of running worker processes by one. =item SIGTTOU @@ -149,7 +199,7 @@ Decrease the number of running worker processes by one. =item SIGWINCH -Stop all running worker processes. SIGHUP or SIGTTIN +Stop all running worker processes. SIGHUP or SIGTTIN may be used to restart workers. =item SIGQUIT @@ -177,7 +227,7 @@ activation. See L<systemd.socket(5)> and L<sd_listen_fds(3)>. =item PERL_INLINE_DIRECTORY -Pointing this to point to a writable directory enables the use +Pointing this to a writable directory enables the use of L<Inline> and L<Inline::C> extensions which may provide platform-specific performance improvements. Currently, this enables the use of L<vfork(2)> which speeds up subprocess @@ -194,8 +244,8 @@ created by a user. See L<Inline> and L<Inline::C> for more details. There are two ways to upgrade a running process. Users of process management systems with socket activation -(L<systemd(1)> or similar) may rely on multiple instances For -systemd, this means using two (or more) '@' instances for each +(L<systemd(1)> or similar) may rely on multiple daemon instances. +For systemd, this means using two (or more) '@' instances for each service (e.g. C<SERVICENAME@INSTANCE>) as documented in L<systemd.unit(5)>. diff --git a/Documentation/public-inbox-extindex.pod b/Documentation/public-inbox-extindex.pod index f71a90e5..2db7d7e9 100644 --- a/Documentation/public-inbox-extindex.pod +++ b/Documentation/public-inbox-extindex.pod @@ -13,7 +13,7 @@ public-inbox-extindex [OPTIONS] [EXTINDEX_DIR] --all public-inbox-extindex creates and updates an external search and overview database used by the read-only public-inbox PSGI (HTTP), NNTP, and IMAP interfaces. This requires either the -L<Search::Xapian> XS bindings OR the L<Xapian> SWIG bindings, +L<Xapian> SWIG bindings OR or L<Search::Xapian> XS bindings along with L<DBD::SQLite> and L<DBI> Perl modules. =head1 OPTIONS @@ -47,11 +47,26 @@ C<indexlevel> set to C<basic> and their respective Xapian public-inboxes where cross-posting is common, this allows significant space savings on Xapian indices. +=item --dedupe=MSGID + +=item --dedupe + +Rerun deduplication on messages with the given Message-ID or +all messages if no Message-ID is specified. Deduplication rules may +change and evolve over time, especially if filters are involved. + +C<--dedupe=MSGID> may be specified multiple times to deduplicate +multiple Message-IDs. + +Use this if you see C<W: BUG? $MSGID not deduplicated properly> +warnings from WWW logs. + =item --gc Perform garbage collection instead of indexing. Use this if -inboxes are removed from the extindex, or if messages are -purged or removed from some inboxes. +inboxes are removed from the extindex, a newsgroup name is +set or changed, or if messages are purged or removed from +some inboxes. =item --reindex @@ -60,15 +75,24 @@ used for in-place upgrades and bugfixes while read-only server processes are utilizing the index. Keep in mind this roughly doubles the size of the already-large Xapian database. -The extindex locks will be released roughly every 10s to -allow L<public-inbox-mda(1)> and L<public-inbox-watch(1)> -processes to write to the extindex. - =item --fast Used with C<--reindex>, it will only look for new and stale entries and not touch already-indexed messages. +=item --no-multi-pack-index + +Disable writing a L<git-multi-pack-index(1)> file to save memory. +Normally, enabling multi-pack-index speeds up startup time of +subsequent L<git-cat-file(1)> processes by 3-4%, but generating +this file requires several GB of memory with large repos. + +Unlike the C<core.multiPackIndex> directive in git, it's still +possible to read existing multi-pack-index files if they are +created elsewhere. + +Available in public-inbox 2.0.0+ + =back =head1 FILES @@ -77,9 +101,9 @@ L<public-inbox-extindex-format(5)> =head1 CONFIGURATION -public-inbox-extindex does not currently write to the -L<public-inbox-config(5)> file, configuration may be entered -manually. The extindex name of C<all> is a special case which +public-inbox-extindex does not write to the L<public-inbox-config(5)> +file, it must be entered manually. +The extindex name of C<all> is a special case which corresponds to indexing C<--all> inboxes. An example for C<--all> is as follows: @@ -89,6 +113,16 @@ C<--all> is as follows: coderepo = foo coderepo = bar +Putting an C<extindex> entry in the config allows L<PublicInbox::WWW>. +You can have any number of C<extentry.$NAME> sections where C<$NAME> +is something other than C<all> to display a union of several inboxes. + +It is strongly recommended any public inboxes indexed by this command +have a stable C<publicinbox.$NAME.newsgroup> entry (regardless of +the presence of an NNTP or IMAP server). Otherwise, public-inbox-extindex +will use C<publicinbox.$NAME.inboxdir> as an internal key which can +cause needless reindexing and require L<--gc> if inboxes are relocated. + See L<public-inbox-config(5)> for more details. =head1 ENVIRONMENT @@ -117,9 +151,17 @@ Default: none, uses C<publicinbox.indexBatchSize> =head1 UPGRADING -Occasionally, public-inbox will update it's schema version and +Occasionally, public-inbox will update its schema version and require a full index by running this command. +=head1 LOCKING + +It is safe to use C<--dedupe>, C<--gc> and C<--reindex> while +other processes are writing to covered inboxes or extindex. +The extindex locks will be released roughly every 10s to +allow L<public-inbox-mda(1)> and L<public-inbox-watch(1)> +processes to write to the extindex. + =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> diff --git a/Documentation/public-inbox-fetch.pod b/Documentation/public-inbox-fetch.pod index c78ffc0b..1ff0df44 100644 --- a/Documentation/public-inbox-fetch.pod +++ b/Documentation/public-inbox-fetch.pod @@ -61,6 +61,14 @@ there are no updates: public-inbox-fetch -q --exit-code && public-inbox-index test $? -eq 0 || exit $? +=item -p + +=item --prune + +Pass the C<--prune> and C<--prune-tags> flags to L<git-fetch(1)> calls. + +This is a new option in public-inbox 2.0+ + =item -v =item --verbose diff --git a/Documentation/public-inbox-glossary.pod b/Documentation/public-inbox-glossary.pod index 3c9e2bd2..d88539c8 100644 --- a/Documentation/public-inbox-glossary.pod +++ b/Documentation/public-inbox-glossary.pod @@ -25,7 +25,7 @@ C<over.sqlite3> =item tid, THREADID -A sequentially-assigned positive integer. These integers are +A sequentially assigned positive integer. These integers are per-inbox or per-extindex. In the future, this may be prefixed with C<T> for JMAP (RFC 8621) and RFC 8474. This may not be strictly compliant with RFC 8621 since inboxes and extindices @@ -40,7 +40,7 @@ RFC-(822|2822|5322) email message. =item IMAP EMAILID, JMAP Email Id -To-be-decided. This will likely be the git blob ID prefixed with C<g> +To be decided. This will likely be the git blob ID prefixed with C<g> rather than the numeric UID to accommodate the same blob showing up in both an extindex and inbox (or multiple extindices). @@ -87,7 +87,7 @@ but it imports drafts. For L<lei(1)> users only. This will allow lei users to place the same email into one or more virtual folders for -ease-of-filtering. This is NOT tied to public-inbox names, as +ease of filtering. This is NOT tied to public-inbox names, as messages stored by lei may not be public. These are similar in spirit to arbitrary freeform "tags" diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 011ade3c..f1a2180a 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -13,8 +13,8 @@ public-inbox-index [OPTIONS] --all public-inbox-index creates and updates the search, overview and NNTP article number database used by the read-only public-inbox HTTP and NNTP interfaces. Currently, this requires -L<DBD::SQLite> and L<DBI> Perl modules. L<Search::Xapian> -is optional, only to support the PSGI search interface. +L<DBD::SQLite> and L<DBI> Perl modules. L<Xapian> (or L<Search::Xapian>) +are optional, only to support the PSGI search interface. Once the initial indices are created by public-inbox-index, L<public-inbox-mda(1)> and L<public-inbox-watch(1)> will @@ -192,6 +192,13 @@ external indices are configured. Do not update the C<all> external index by default. This negates all uses of C<-E> / C<--update-extindex=> on the command-line. +=item --no-multi-pack-index + +Disables writing the multi-pack-index when using L</--update-extindex>. +See L<public-inbox-extindex(1)/--no-multi-pack-index> for details. + +Available in public-inbox 2.0.0+ + =item --since=DATESTRING =item --after=DATESTRING @@ -319,7 +326,7 @@ Default: none, uses C<publicinbox.indexBatchSize> =head1 UPGRADING -Occasionally, public-inbox will update it's schema version and +Occasionally, public-inbox will update its schema version and require a full index by running this command. =head1 CONTACT diff --git a/Documentation/public-inbox-learn.pod b/Documentation/public-inbox-learn.pod index 3c92b1cc..b08e4bc8 100644 --- a/Documentation/public-inbox-learn.pod +++ b/Documentation/public-inbox-learn.pod @@ -54,7 +54,7 @@ This is similar to the C<spam> command above, but does not feed the message to L<spamc(1)> and only removes messages which match on any of the C<To:>, C<Cc:>, and C<List-ID:> headers. -The C<--all> option may be used match C<spam> semantics in removing +The C<--all> option may be used to match C<spam> semantics in removing the message from all configured inboxes. C<--all> is only available in public-inbox 1.6.0+. @@ -73,6 +73,25 @@ Default: ~/.public-inbox/config =back +=head1 CONFIGURATION + +These configuration knobs should be used in the +L<public-inbox-config(5)> file. + +=over 8 + +=item publicinboxImport.dropUniqueUnsubscribe + +=item publicinbox.<name>.address + +=item publicinbox.<name>.listid + +=item publicinboxmda.spamcheck + +See L<public-inbox-config(5)> for descriptions of these options + +=back + =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> @@ -82,7 +101,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/public-inbox-mda.pod b/Documentation/public-inbox-mda.pod index 93cb0e9c..edc90287 100644 --- a/Documentation/public-inbox-mda.pod +++ b/Documentation/public-inbox-mda.pod @@ -68,6 +68,22 @@ Default: ~/.public-inbox/emergency/ =back +=head1 CONFIGURATION + +Various configuration knobs should be used in the +L<public-inbox-config(5)> file. + +=over 8 + +=item publicinboxImport.dropUniqueUnsubscribe + +=item publicinbox.<name>.address + +=item publicinbox.<name>.listid + +See L<public-inbox-config(5)> for descriptions of these options + +=back =head1 CONTACT @@ -78,7 +94,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/public-inbox-overview.pod b/Documentation/public-inbox-overview.pod index d4318576..35917ccc 100644 --- a/Documentation/public-inbox-overview.pod +++ b/Documentation/public-inbox-overview.pod @@ -48,7 +48,7 @@ that inbox. The instructions are roughly: # Optional but strongly recommended for hosting HTTP # (and required for NNTP) - # enable overview (requires DBD::SQLite) and, if Search::Xapian is + # enable overview (requires DBD::SQLite) and, if Xapian is # available, search: public-inbox-index INBOX_DIR diff --git a/Documentation/public-inbox-purge.pod b/Documentation/public-inbox-purge.pod index 945286c6..1223b577 100644 --- a/Documentation/public-inbox-purge.pod +++ b/Documentation/public-inbox-purge.pod @@ -31,7 +31,7 @@ leads to discontiguous git history. =item --all Purge the message in all inboxes configured in ~/.public-inbox/config. -This is an alternative to specifying individual inboxes directories +This is an alternative to specifying individual inbox directories on the command-line. =back @@ -74,7 +74,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/public-inbox-tuning.pod b/Documentation/public-inbox-tuning.pod index 53668ecc..b56c2b10 100644 --- a/Documentation/public-inbox-tuning.pod +++ b/Documentation/public-inbox-tuning.pod @@ -42,6 +42,14 @@ Other OS tuning knobs Scalability to many inboxes +=item 9 + +public-inbox-cindex --join performance + +=item 10 + +public-inbox-clone with shared object stores + =back =head2 New inboxes: public-inbox-init -V2 @@ -79,8 +87,8 @@ RAM. Attempts to parallelize random I/O on HDDs leads to pathological slowdowns as inboxes grow. While C<-V2> introduced Xapian shards as a parallelization -mechanism for SSDs; enabling C<publicInbox.indexSequentialShard> -repurposes sharding as mechanism to reduce the kernel page cache +mechanism for SSDs, enabling C<publicInbox.indexSequentialShard> +repurposes sharding as a mechanism to reduce the kernel page cache footprint when indexing on HDDs. Initializing a mirror with a high C<--jobs> count to create more @@ -108,7 +116,7 @@ indices on btrfs to achieve acceptable performance (even on SSD). Disabling copy-on-write also disables checksumming, thus C<raid1> (or higher) configurations may be corrupt after unsafe shutdowns. -Fortunately, these SQLite and Xapian indices are designed to +Fortunately, these SQLite and Xapian indices are designed to be recoverable from git if missing. Disabling CoW does not prevent all fragmentation. Large values @@ -125,7 +133,7 @@ C<btrfs filesystem defragment -fr $INBOX_DIR> may be necessary. Large filesystems benefit significantly from the C<space_cache=v2> mount option documented in L<btrfs(5)>. -Older, non-CoW filesystems are generally work well out-of-the-box +Older, non-CoW filesystems generally work well out of the box for our Xapian and SQLite indices. =head2 Performance on solid state drives @@ -152,9 +160,17 @@ C<LimitNOFILE=> in L<systemd.exec(5)>) may need to be raised to accommodate many concurrent clients. Transport Layer Security (IMAPS, NNTPS, or via STARTTLS) significantly -increases memory use of client sockets, sure to account for that in +increases memory use of client sockets, be sure to account for that in capacity planning. +Bursts of small object allocations late in process life contribute to +fragmentation of the heap due to arenas (slabs) used internally by Perl. +glibc malloc users should use C<MALLOC_MMAP_THRESHOLD_=131072> to reduce +fragmentation from the sliding mmap window. On 64-bit systems, jemalloc +(tested as an LD_PRELOAD on GNU/Linux) reduces fragmentation at the +expense of VM space. 32-bit systems may be better off sticking with +glibc and MALLOC_MMAP_THRESHOLD_. + =head2 Other OS tuning knobs Linux users: the C<sys.vm.max_map_count> sysctl may need to be increased if @@ -168,13 +184,28 @@ Other OSes may have similar tuning knobs (patches appreciated). L<public-inbox-extindex(1)> allows any number of public-inboxes to share the same Xapian indices. -git 2.33+ startup time is orders-of-magnitude faster and uses +git 2.33+ startup time is orders of magnitude faster and uses less memory when dealing with thousands of alternates required for thousands of inboxes with L<public-inbox-extindex(1)>. Frequent packing (via L<git-gc(1)>) both improves performance and reduces the need to increase C<sys.vm.max_map_count>. +=head2 public-inbox-cindex --join performance + +A C++ compiler and the Xapian development files makes C<--join> or +C<--join=aggressive> orders of magnitude faster in L<public-inbox-cindex(1)>. +On Debian-based systems this is C<libxapian-dev>. RPM-based distros have +these in C<xapian-core-devel> or C<xapian14-core-libs>. *BSDs typically +package development files together with runtime libraries, so the C<xapian> +or C<xapian-core> package will already have the development files. + +=head2 public-inbox-clone with shared object stores + +When mirroring manifests with many forks using the same objstore, +git 2.41+ is highly recommended for performance as we automatically +use the C<fetch.hideRefs> feature to speed up negotiation. + =head1 CONTACT Feedback encouraged via plain-text mail to L<mailto:meta@public-inbox.org> diff --git a/Documentation/public-inbox-v2-format.pod b/Documentation/public-inbox-v2-format.pod index e93d7fc7..de3b0bfd 100644 --- a/Documentation/public-inbox-v2-format.pod +++ b/Documentation/public-inbox-v2-format.pod @@ -30,7 +30,7 @@ databases for parallelism by "shards". - all.git # empty, alternates to $EPOCH.git - xap$SCHEMA_VERSION/$SHARD # per-shard Xapian DB - xap$SCHEMA_VERSION/over.sqlite3 # OVER-view DB for NNTP, threading - - msgmap.sqlite3 # same the v1 msgmap + - msgmap.sqlite3 # same as the v1 msgmap For blob lookups, the reader only needs to open the "all.git" repository with $GIT_DIR/objects/info/alternates which references @@ -89,7 +89,7 @@ After-the-fact invocations of L<public-inbox-index> will ignore messages written to 'd' after they are written to 'm'. Deltafication is not significantly improved over v1, but overall -storage for trees is made as as small as possible. Initial +storage for trees is made as small as possible. Initial statistics and benchmarks showing the benefits of this approach are documented at: @@ -97,7 +97,7 @@ L<https://public-inbox.org/meta/20180209205140.GA11047@dcvr/> =head2 XAPIAN SHARDS -Another second scalability problem in v1 was the inability to +Another scalability problem in v1 was the inability to utilize multiple CPU cores for Xapian indexing. This is addressed by using shards in Xapian to perform import indexing in parallel. diff --git a/Documentation/public-inbox-watch.pod b/Documentation/public-inbox-watch.pod index e8f97c80..6e2142fe 100644 --- a/Documentation/public-inbox-watch.pod +++ b/Documentation/public-inbox-watch.pod @@ -41,16 +41,18 @@ importing them into public-inbox git repositories and indices. public-inbox-watch is useful in situations when a user wishes to mirror an existing mailing list, but has no access to run L<public-inbox-mda(1)> on a server. Unlike public-inbox-mda -which is invoked once per-message, public-inbox-watch is a +which is invoked once per message, public-inbox-watch is a persistent process, making it faster for after-the-fact imports of large Maildirs. Upon startup, it scans the mailbox for new messages to be imported while it was not running. -As of public-inbox 1.6.0, Maildirs, IMAP folders, and NNTP -newsgroups are supported. Previous versions of public-inbox -only supported Maildirs. +All versions of public-inbox-watch support Maildirs. public-inbox +1.6.0 added support for IMAP folders and NNTP newsgroups. +public-inbox 2.0 adds support for MH directories. There are no +plans to support the mbox family since new messages are expensive +to detect in large mboxes. public-inbox-watch should be run inside a L<screen(1)> session or as a L<systemd(1)> service. Errors are emitted to stderr. @@ -62,10 +64,14 @@ public-inbox-watch takes no command-line options. =head1 CONFIGURATION These configuration knobs should be used in the -L<public-inbox-config(5)> file +L<public-inbox-config(5)> file. =over 8 +=item publicinboxImport.dropUniqueUnsubscribe + +See L<public-inbox-config(5)/publicinboxImport.dropUniqueUnsubscribe> + =item publicinbox.<name>.watch A location to watch. public-inbox 1.5.0 and earlier only supported @@ -74,17 +80,24 @@ C<maildir:> paths: [publicinbox "test"] watch = maildir:/path/to/maildirs/.INBOX.test/ -public-inbox 1.6.0 supports C<nntp://>, C<nntps://>, +public-inbox 1.6.0+ supports C<nntp://>, C<nntps://>, C<imap://> and C<imaps://> URLs: watch = nntp://news.example.com/inbox.test.group watch = imaps://user@mail.example.com/INBOX.test +2.0+ supports MH: + + watch = mh:/path/to/MH/inbox.test + This may be specified multiple times to combine several mailboxes into a single public-inbox. URLs requiring authentication will require L<netrc(5)> and/or L<git-credential(1)> (preferred) to fill in the username and password. +public-inbox 2.0+ also supports boolean C<false> to prevent the global +L</publicinboxwatch.watchspam> directive from writing to the inbox. + Default: none =item publicinbox.<name>.watchheader @@ -120,7 +133,7 @@ Messages without the (S)een flag are not considered for hiding. This hiding affects all configured public-inboxes in PI_CONFIG. As with C<publicinbox.$NAME.watch>, C<imap://> and C<imaps://> URLs -are supported in public-inbox 1.6.0+. +are supported in public-inbox 1.6.0+, and C<MH> in 2.0+. Default: none; only for L<public-inbox-watch(1)> users @@ -201,7 +214,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> diff --git a/Documentation/public-inbox.cgi.pod b/Documentation/public-inbox.cgi.pod index 71f8a6f5..58d59ba2 100644 --- a/Documentation/public-inbox.cgi.pod +++ b/Documentation/public-inbox.cgi.pod @@ -4,7 +4,7 @@ public-inbox.cgi - CGI wrapper for PublicInbox::WWW =head1 SYNOPSIS -You generally want to run public-inbox-httpd, instead +You generally want to run public-inbox-netd or public-inbox-httpd, instead =head1 DESCRIPTION @@ -12,9 +12,15 @@ public-inbox.cgi provides a CGI interface wrapper on top of the PSGI/Plack L<PublicInbox::WWW> module. It is only provided for compatibility reasons and NOT recommended. -CGI with Perl is slow due to code loading overhead and web servers lack -the scheduling fairness of L<public-inbox-httpd(1)> for handling git -clones and streaming large mbox downloads. +CGI with Perl is slow due to code loading overhead and +web servers lack the scheduling fairness of L<public-inbox-netd(1)> +and L<public-inbox-httpd(1)> for handling git clones and +streaming large mbox downloads. + +=head1 COMPATIBILITY NOTE + +When using the CGI with Apache, make sure to set AllowEncodedSlashes to On, as +public-inbox makes heavy use of encoded slashes. =head1 CONTACT @@ -25,10 +31,11 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT -Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> +Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO -L<public-inbox-httpd(1)>, L<PublicInbox::WWW>, L<public-inbox-daemon(8)>, +L<public-inbox-netd(1)>, L<public-inbox-httpd(1)>, +L<PublicInbox::WWW>, L<public-inbox-daemon(8)>, diff --git a/Documentation/reproducibility.txt b/Documentation/reproducibility.txt index 4e56ada4..3336de73 100644 --- a/Documentation/reproducibility.txt +++ b/Documentation/reproducibility.txt @@ -12,7 +12,7 @@ reproducible. Keeping all communications as email ensures the full history of the entire project can be mirrored by anyone with the resources to do so. Compact, low-complexity data requires -less resources to mirror, so sticking with plain-text +less resources to mirror, so sticking with plain text ensures more parties can mirror and potentially fork the project with all its data. @@ -26,4 +26,4 @@ If these things make power hungry project leaders and admins uncomfortable, good. That was the point. It's how checks and balances ought to work. -Comments, corrections, etc welcome: meta@public-inbox.org +Comments, corrections, etc. welcome: meta@public-inbox.org diff --git a/Documentation/standards.perl b/Documentation/standards.perl index c36afb5d..743cdee1 100755 --- a/Documentation/standards.perl +++ b/Documentation/standards.perl @@ -11,11 +11,11 @@ Non-exhaustive list of standards public-inbox software attempts or intends to implement. This list is intended to be a quick reference for hackers and users. -Given the goals of interoperability and accessibility; strict +Given the goals of interoperability and accessibility, strict conformance to standards is not always possible, but rather best-effort taking into account real-world cases. In particular, "obsolete" standards remain relevant as long as clients and -data exists. +data using them exist. IETF RFCs --------- diff --git a/Documentation/technical/data_structures.txt b/Documentation/technical/data_structures.txt index 4dcf9ce6..11f78041 100644 --- a/Documentation/technical/data_structures.txt +++ b/Documentation/technical/data_structures.txt @@ -32,19 +32,19 @@ Per-message classes Common abbreviation: $mime, $eml Used by: PublicInbox::WWW, PublicInbox::SearchIdx - An representation of an entire email, multipart or not. + A representation of an entire email, multipart or not. An option to use libgmime or libmailutils may be supported in the future for performance and memory use. This can be a memory hog with big messages and giant attachments, so our PublicInbox::WWW interface only keeps - one object of this class in memory at-a-time. + one object of this class in memory at a time. In other words, this is the "meat" of the message, whereas $smsg (below) is just the "skeleton". Our PublicInbox::V2Writable class may have two objects of this - type in memory at-a-time for deduplication. + type in memory at a time for deduplication. In public-inbox 1.4 and earlier, Email::MIME and its subclass, PublicInbox::MIME were used. Despite still slurping, @@ -61,10 +61,10 @@ Per-message classes This is loaded from either the overview DB (over.sqlite3) or the Xapian DB (docdata.glass), though the Xapian docdata - is won't hold NNTP-only fields (Cc:/To:) + won't hold NNTP-only fields (Cc:/To:). There may be hundreds or thousands of these objects in memory - at-a-time, so fields are pruned if unneeded. + at a time, so fields are pruned if unneeded. * PublicInbox::SearchThread::Msg - subclass of Smsg Common abbreviation: $cont or $node @@ -75,9 +75,9 @@ Per-message classes Nowadays, this is a re-blessed $smsg with additional fields. As with $smsg objects, there may be hundreds or thousands - of these objects in memory at-a-time. + of these objects in memory at a time. - We also do not use a linked-list for storing children as JWZ + We also do not use a linked list for storing children as JWZ describes, but instead a Perl hashref for {children} which becomes an arrayref upon sorting. @@ -88,7 +88,7 @@ Per-inbox classes * PublicInbox::Inbox - represents a single public-inbox Common abbreviation: $ibx - Used everywhere + Used everywhere. This represents a "publicinbox" section in the config file, see public-inbox-config(5) for details. @@ -117,7 +117,7 @@ Per-inbox classes * PublicInbox::Search - Xapian read-only interface Common abbreviation: $srch, $ibx->search - Used everywhere if Search::Xapian (or Xapian.pm) is available. + Used everywhere if Xapian is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) @@ -152,7 +152,7 @@ ad-hoc structures shared across packages This holds the PSGI $env as well as any internal variables used by various modules of PublicInbox::WWW. - As with the PSGI $env, there is one per-active WWW + As with the PSGI $env, there is one per active WWW request+response cycle. It does not exist for idle HTTP clients. @@ -174,8 +174,8 @@ daemon classes Common abbreviation: $http Used by: PublicInbox::DS, public-inbox-httpd - Unlike PublicInbox::NNTP, this class no knowledge of any of - the email or git-specific parts of public-inbox, only PSGI. + Unlike PublicInbox::NNTP, this class has no knowledge of any of + the email- or git-specific parts of public-inbox, only PSGI. However, it supports APIs and behaviors (e.g. streaming large responses) which PublicInbox::WWW may take advantage of. @@ -188,7 +188,7 @@ daemon classes This class calls non-blocking accept(2) or accept4(2) on a listen socket to create new PublicInbox::HTTP and - PublicInbox::HTTP instances. + PublicInbox::NNTP instances. * PublicInbox::HTTPD Common abbreviation: $httpd @@ -197,9 +197,9 @@ daemon classes wrappers around client sockets accepted from PublicInbox::Listener. - Since the SERVER_NAME and SERVER_PORT PSGI variables needs to be + Since the SERVER_NAME and SERVER_PORT PSGI variables need to be exposed for HTTP/1.0 requests when Host: headers are missing, - this is per-Listener socket. + this is per Listener socket. * PublicInbox::HTTPD::Async Common abbreviation: $async diff --git a/Documentation/technical/ds.txt b/Documentation/technical/ds.txt index 5a1655a1..afead2f1 100644 --- a/Documentation/technical/ds.txt +++ b/Documentation/technical/ds.txt @@ -1,9 +1,14 @@ PublicInbox::DS - event loop and async I/O base class -Our PublicInbox::DS event loop which powers public-inbox-nntpd -and public-inbox-httpd diverges significantly from the -unmaintained Danga::Socket package we forked from. In fact, -it's probably different from most other event loops out there. +Our PublicInbox::DS event loop which powers most of our long-lived +processes(*) diverges significantly from the unmaintained Danga::Socket +package we forked from. In fact, it's probably different from most +other event loops out there. + +Most notably, it uses one-shot, level-trigger, and edge-trigger mode +modes of kqueue|epoll depending on the situation. + +(*) public-inbox-netd,(-httpd,-imapd,-nntpd,-pop3d,-watch) + lei-daemon Most notably: @@ -14,7 +19,7 @@ Most notably: triggers a call. The lack of read/write callback distinction is driven by the - fact TLS libraries (e.g. OpenSSL via IO::Socket::SSL) may + fact that TLS libraries (e.g. OpenSSL via IO::Socket::SSL) may declare SSL_WANT_READ on SSL_write(), and SSL_WANT_READ on SSL_read(). So we end up having to let each user object decide whether it wants to make read or write calls depending on its @@ -30,7 +35,7 @@ Most notably: Reducing the user-supplied code down to a single callback allows subclasses to keep their logic self-contained. The combination of this change and one-shot wakeups (see below) for bidirectional - data flows make asynchronous code easier to reason about. + data flows makes asynchronous code easier to reason about. Other divergences: @@ -48,7 +53,7 @@ Other divergences: Augmented features: -* obj->write(CODEREF) passes the object itself to the CODEREF +* obj->write(CODEREF) passes the object itself to the CODEREF. Being able to enqueue subroutine calls is a powerful feature in Danga::Socket for keeping linear logic in an asynchronous environment. Unfortunately, each subroutine takes several kilobytes of memory. @@ -81,7 +86,7 @@ New features * IO::Socket::SSL support (for NNTPS, STARTTLS+NNTP, HTTPS) -* dwaitpid (waitpid wrapper) support for reaping dead children +* awaitpid (waitpid wrapper) support for reaping dead children * reliable signal wakeups are supported via signalfd on Linux, EVFILT_SIGNAL on *BSDs via IO::KQueue. diff --git a/Documentation/technical/memory.txt b/Documentation/technical/memory.txt index ea7fb7b7..039694c3 100644 --- a/Documentation/technical/memory.txt +++ b/Documentation/technical/memory.txt @@ -8,7 +8,7 @@ memory-efficient. We strive to keep processes small to improve locality, allow the kernel to cache more files, and to be a good neighbor to other processes running on the machine. Taking advantage of -automatic reference counting (ARC) in Perl allows us +automatic reference counting (ARC) in Perl allows us to deterministically release memory back to the heap. We start with a simple data model with few circular @@ -48,3 +48,9 @@ In the future, our internal data model will be further flattened and simplified to reduce the overhead imposed by small objects. Large allocations may also be avoided by optionally using Inline::C. + +Finally, the mwrap-perl LD_PRELOAD wrapper was ported to Perl 5 +and enhanced to provide live memory usage tracking on 64-bit systems +with minimal performance impact on production traffic: + + git clone https://80x24.org/mwrap-perl.git diff --git a/Documentation/technical/weird-stuff.txt b/Documentation/technical/weird-stuff.txt new file mode 100644 index 00000000..0c8d6891 --- /dev/null +++ b/Documentation/technical/weird-stuff.txt @@ -0,0 +1,22 @@ +There's a lot of weird code in public-inbox which may be daunting +to new hackers. + +* The event loop (PublicInbox::DS) is an evolution of a fairly standard + C10K event loop. See ds.txt in this directory for more. + +Things got weirder in 2021: + +* The lei command-line tool is backed by a daemon. This was done to + improve startup time for shell completion and manage git/SQLite/Xapian + single-writer during long, parallel imports. It may eventually become + a read-write IMAP/JMAP server. + +* SOCK_SEQPACKET is used extensively in lei, and will likely make its + way into more places, still. + +And even more so in 2022: + +* public-inbox-clone / PublicInbox::LeiMirror relies on ->DESTROY + for make-like dependency management while providing parallelism. + +More to come, lei will expose Maildirs via FUSE 3... diff --git a/Documentation/technical/whyperl.txt b/Documentation/technical/whyperl.txt index fbe2e1b1..db1d9793 100644 --- a/Documentation/technical/whyperl.txt +++ b/Documentation/technical/whyperl.txt @@ -21,7 +21,7 @@ Good Things Perl 5 is installed on many, if not most GNU/Linux and BSD-based servers and workstations. It is likely the most - widely-installed programming environment that offers a + widely installed programming environment that offers a significant amount of POSIX functionality. Users won't have to waste bandwidth or space with giant toolchains or architecture-specific binaries. @@ -47,8 +47,8 @@ Good Things * Predictable performance - While Perl is neither fast or memory-efficient, its - performance and memory use are predictable and does not + While Perl is neither fast nor memory-efficient, its + performance and memory use are predictable and do not require GC tuning by the user. public-inbox is developed for (and mostly on) old @@ -56,7 +56,7 @@ Good Things late 1990s, and any cheap VPS today has more than enough RAM and CPU for handling plain-text email. - Low hardware requirements increases the reach of our software + Low hardware requirements increase the reach of our software to more users, improving centralization resistance. * Compatibility @@ -86,7 +86,7 @@ Good Things There should be no need to rely on language-specific package managers such as cpan(1), those systems increase - the learning curve for users and systems administrators. + the learning curve for users and system administrators. * Compactness and terseness @@ -98,7 +98,7 @@ Good Things * Performance ceiling and escape hatch With optional Inline::C, we can be "as fast as C" in some - cases. Inline::C is widely-packaged by distros and it + cases. Inline::C is widely packaged by distros and it gives us an escape hatch for dealing with missing bindings or performance problems should they arise. Inline::C use (as opposed to XS) also preserves the software freedom and @@ -135,7 +135,7 @@ Bad Things (m//, substr(), index(), etc.) still require memory copies into userspace, negating a benefit of zero-copy. -* The XS/C API make it difficult to improve internals while +* The XS/C API makes it difficult to improve internals while preserving compatibility. * Lack of optional type checking. This may be a blessing in @@ -161,14 +161,14 @@ Red herrings to ignore when evaluating other runtimes ----------------------------------------------------- These don't discount a language or runtime from being -being used, they're just not interesting. +used, they're just not interesting. * Lightweight threading While lightweight threading implementations are - convenient, they tend to be significantly heavier than a + convenient, they tend to be significantly heavier than pure event-loop systems (or multi-threaded event-loop - systems) + systems). Lightweight threading implementations have stack overhead and growth typically measured in kilobytes. The userspace diff --git a/Documentation/txt2pre b/Documentation/txt2pre index 3ecd9100..b45c52e8 100755 --- a/Documentation/txt2pre +++ b/Documentation/txt2pre @@ -1,15 +1,15 @@ -#!/usr/bin/env perl -# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> +#!perl -w +# n.b. this is invoked via $(PERL) in makefiles +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Stupid script to make HTML from preformatted, utf-8 text versions, # only generating links for http(s). Markdown does too much # and requires indentation to output preformatted text. -use strict; -use warnings; +use v5.12; use PublicInbox::Linkify; use PublicInbox::Hval qw(ascii_html); -my %xurls; +my (%xurls, %lei); for (qw[lei(1) lei-add-external(1) lei-add-watch(1) @@ -42,6 +42,7 @@ for (qw[lei(1) lei-q(1) lei-rediff(1) lei-refresh-mail-sync(1) + lei-reindex(1) lei-rm(1) lei-rm-watch(1) lei-security(7) @@ -49,12 +50,13 @@ for (qw[lei(1) lei-tag(1) lei-up(1) public-inbox.cgi(1) + public-inbox-cindex(1) public-inbox-clone(1) public-inbox-config(5) - public-inbox-config(5) public-inbox-convert(1) public-inbox-daemon(8) public-inbox-edit(1) + public-inbox-extindex(1) public-inbox-fetch(1) public-inbox-glossary(7) public-inbox-httpd(1) @@ -63,8 +65,10 @@ for (qw[lei(1) public-inbox-init(1) public-inbox-learn(1) public-inbox-mda(1) + public-inbox-netd(1) public-inbox-nntpd(1) public-inbox-overview(7) + public-inbox-pop3d(1) public-inbox-purge(1) public-inbox-v1-format(5) public-inbox-v2-format(5) @@ -74,8 +78,11 @@ for (qw[lei(1) my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "$n.html"; $xurls{$n} = "$n.html"; + /\Alei-(.+?)\(1\)\z/ and $xurls{"lei $1"} = "$n.html"; } +$xurls{'lei/store'} = 'lei-store-format.html'; + for (qw[make(1) flock(2) setrlimit(2) vfork(2) tmpfs(5) inotify(7) unix(7) syslog(3)]) { my ($n, $s) = (/([\w\-]+)\((\d)\)/); @@ -141,6 +148,8 @@ $xurls{'copydatabase(1)'} = 'https://manpages.debian.org/stable/xapian-tools/copydatabase.1.en.html'; $xurls{'xapian-compact(1)'} = 'https://manpages.debian.org/stable/xapian-tools/xapian-compact.1.en.html'; +$xurls{'xapian-delve(1)'} = + 'https://manpages.debian.org/stable/xapian-tools/xapian-delve.1.en.html'; $xurls{'gzip(1)'} = 'https://manpages.debian.org/stable/gzip/gzip.1.en.html'; $xurls{'chmod(1)'} = 'https://manpages.debian.org/stable/coreutils/chmod.1.en.html'; @@ -158,6 +167,9 @@ if ($str =~ /^NAME\n\s+([^\n]+)/sm) { if ($title =~ /([\w\.\-]+)/) { delete $xurls{$1}; } + if ($title =~ /\blei-([\w\-]+)\b/) { + delete $xurls{"lei $1"}; + } } $title = ascii_html($title); my $l = PublicInbox::Linkify->new; |