diff options
author | Eric Wong <e@80x24.org> | 2016-08-18 04:44:07 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-08-18 09:31:55 +0000 |
commit | adecbd43d2eed046fe6e88a459d03567461fd84b (patch) | |
tree | 7900b8ee9e908daa6fa8ddbef3c7ad846ecd8c8b /lib/PublicInbox | |
parent | dfe55f5ee5bd6e3a12d933a6570eb94f294d1c54 (diff) | |
download | public-inbox-adecbd43d2eed046fe6e88a459d03567461fd84b.tar.gz |
Begin documenting some basic help functionality. I may tweak the anchor names of the various HTML endpoints to be more consistent with each other (old ones will be supported for a short while), so I'm not documenting those, for now. This may become part of a builtin key-value store for basic texts, but this probably shouldn't become a wiki engine, either.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Search.pm | 38 | ||||
-rw-r--r-- | lib/PublicInbox/WWW.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/WwwStream.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/WwwText.pm | 203 |
4 files changed, 258 insertions, 1 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 7561ef44..445c2d8a 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -51,6 +51,7 @@ my %bool_pfx_internal = ( thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) ); +# do we still need these? probably not.. my %bool_pfx_external = ( path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) @@ -62,6 +63,29 @@ my %prob_prefix = ( m => 'Q', # 'mid' is exact, 'm' can do partial ); +# not documenting m: and mid: for now, the using the URLs works w/o Xapian +our @HELP = ( + 's:' => <<EOF, +match within Subject only e.g. s:"a quick brown fox" +This is a probabilistic search with support for stemming +and wildcards '*' +EOF + 'd:' => <<EOF, +date range as YYYYMMDD e.g. d:19931002..20101002 +Open-ended ranges such as d:19931002.. and d:..20101002 +are also supported. +EOF +); +# TODO: (from mairix, some of these are maybe) +# b (body), f (From:), c (Cc:), n (attachment), t (To:) +# tc (To:+Cc:), bs (body + Subject), tcf (To: +Cc: +From:) +# +# Non-mairix: +# df (filenames from diff) +# nq (non-quoted body) +# da (diff a/ removed lines) +# db (diff b/ added lines) + my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); sub xpfx { $all_pfx{$_[0]} } @@ -190,10 +214,14 @@ sub qp { # we do not actually create AltId objects, # just parse the spec to avoid the extra DB handles for now. if (my $altid = $self->{altid}) { + my $user_pfx = $self->{-user_pfx} ||= []; for (@$altid) { # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3' /\Aserial:(\w+):/ or next; my $pfx = $1; + push @$user_pfx, "$pfx:", <<EOF; +alternate serial number e.g. $pfx:12345 +EOF # gmane => XGMANE $qp->add_boolean_prefix($pfx, 'X'.uc($pfx)); } @@ -321,4 +349,14 @@ sub enquire { $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); } +sub help { + my ($self) = @_; + $self->qp; # parse altids + my @ret = @HELP; + if (my $user_pfx = $self->{-user_pfx}) { + push @ret, @$user_pfx; + } + \@ret; +} + 1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 6f6a0033..4d599fc9 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -96,6 +96,8 @@ sub call { } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { + get_text($ctx, $1, $2); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { @@ -238,6 +240,18 @@ sub get_thread { PublicInbox::View::thread_html($ctx); } +# /$INBOX/_/text/$KEY/ +# /$INBOX/_/text/$KEY/raw +# KEY may contain slashes +sub get_text { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + + require PublicInbox::WwwText; + PublicInbox::WwwText::get_text($ctx, $key); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 9ed25e1d..c89e6de4 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -31,10 +31,12 @@ sub _html_top ($) { my $desc = ascii_html($obj->description); my $title = $ctx->{-title_html} || $desc; my $upfx = $ctx->{-upfx} || ''; + my $help = $upfx.'_/text/help'; my $atom = $ctx->{-atom} || $upfx.'new.atom'; my $tip = $ctx->{-html_tip} || ''; my $top = "<b>$desc</b>"; - my $links = "<a\nhref=\"$atom\">Atom feed</a>"; + my $links = "<a\nhref=\"$help\">help</a> / ". + "<a\nhref=\"$atom\">Atom feed</a>"; if ($obj->search) { my $q_val = $ctx->{-q_value_html}; if (defined $q_val && $q_val ne '') { diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm new file mode 100644 index 00000000..b8beb97c --- /dev/null +++ b/lib/PublicInbox/WwwText.pm @@ -0,0 +1,203 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# serves the /$INBOX/_/* endpoints from :text/* of the git tree +package PublicInbox::WwwText; +use strict; +use warnings; +use PublicInbox::Linkify; +use PublicInbox::WwwStream; +use PublicInbox::Hval qw(ascii_html); +our $QP_URL = 'https://xapian.org/docs/queryparser.html'; +our $WIKI_URL = 'https://en.wikipedia.org/wiki'; + +# /$INBOX/_/text/$KEY/ # KEY may contain slashes +# For now, "help" is the only supported $KEY +sub get_text { + my ($ctx, $key) = @_; + my $code = 200; + + $key = 'help' if !defined $key; # this 302s to _/text/help/ + + # get the raw text the same way we get mboxrds + my $raw = ($key =~ s!/raw\z!!); + my $have_tslash = ($key =~ s!/\z!!) if !$raw; + + my $txt = ''; + if (!_default_text($ctx, $key, \$txt)) { + $code = 404; + $txt = "404 Not Found ($key)\n"; + } + if ($raw) { + return [ $code, [ 'Content-Type', 'text/plain', + 'Content-Length', bytes::length($txt) ], + [ $txt ] ] + } + + # enforce trailing slash for "wget -r" compatibility + if (!$have_tslash && $code == 200) { + my $url = $ctx->{-inbox}->base_url($ctx->{env}); + $url .= "_/text/$key/"; + + return [ 302, [ 'Content-Type', 'text/plain', + 'Location', $url ], + [ "Redirecting to $url\n" ] ]; + } + + # Follow git commit message conventions, + # first line is the Subject/title + my ($title) = ($txt =~ /\A([^\n]*)/s); + _do_linkify($txt); + $ctx->{-title_html} = ascii_html($title); + + my $nslash = ($key =~ tr!/!/!); + $ctx->{-upfx} = '../../../' . ('../' x $nslash); + + PublicInbox::WwwStream->response($ctx, $code, sub { + my ($nr, undef) = @_; + $nr == 1 ? '<pre>'.$txt.'</pre>' : undef + }); +} + +sub _do_linkify { + my $l = PublicInbox::Linkify->new; + $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0]))); +} + +sub _srch_prefix ($$) { + my ($srch, $txt) = @_; + my $pad = 0; + my $htxt = ''; + my $help = $srch->help; + my $i; + for ($i = 0; $i < @$help; $i += 2) { + my $pfx = $help->[$i]; + my $n = length($pfx); + $pad = $n if $n > $pad; + $htxt .= $pfx . "\0"; + $htxt .= $help->[$i + 1]; + $htxt .= "\f\n"; + } + $pad += 2; + my $padding = ' ' x ($pad + 8); + $htxt =~ s/^/$padding/gms; + $htxt =~ s/^$padding(\S+)\0/" $1". + (' ' x ($pad - length($1)))/egms; + $htxt =~ s/\f\n/\n/gs; + $$txt .= $htxt; + 1; +} + + +sub _default_text ($$$) { + my ($ctx, $key, $txt) = @_; + return if $key ne 'help'; # TODO more keys? + + my $ibx = $ctx->{-inbox}; + my $base_url = $ibx->base_url($ctx->{env}); + $$txt .= "public-inbox help for $base_url\n"; + $$txt .= <<EOF; + +overview +-------- + + public-inbox uses Message-ID identifiers in URLs. + One may look up messages by substituting Message-IDs + (without the leading '<' or trailing '>') into the URL. + Forward slash ('/') characters in the Message-IDs + need to be escaped as "%2F" (without quotes). + + Thus, it is possible to retrieve any message by its + Message-ID by going to: + + $base_url<Message-ID>/ + + (without the '<' or '>') + + Message-IDs are described at: + + $WIKI_URL/Message-ID + +EOF + + # n.b. we use the Xapian DB for any regeneratable, + # order-of-arrival-independent data. + my $srch = $ibx->search; + if ($srch) { + $$txt .= <<EOF; +search +------ + + This public-inbox has search functionality provided by Xapian. + + It supports typical AND, OR, NOT, '+', '-' queries present + in other search engines. + + We also support search prefixes to limit the scope of the + search to certain fields using prefixes. + + Prefixes supported in this installation include: + +EOF + _srch_prefix($srch, $txt); + + $$txt .= <<EOF; + The upstream Xapian query parser documentation fully explains + the query syntax: + + $QP_URL + +message threading +----------------- + + Message threading is enabled for this public-inbox, + additional endpoints for message threads are available: + + * $base_url<Message-ID>/T/#u + + Loads the thread belonging to the given <Message-ID> + in flat chronological order. The "#u" anchor + focuses the browser on the given <Message-ID>. + + * $base_url<Message-ID>/t/#u + + Loads the thread belonging to the given <Message-ID> + in threaded order with nesting. For deep threads, + this requires a wide display or horizontal scrolling. + + Both of these HTML endpoints are suitable for offline reading + using the thread overview at the bottom of each page. + + Users of feed readers may follow a particular thread using: + + * $base_url<Message-ID>/t.atom + + Which loads the thread in Atom Syndication Standard + described at Wikipedia and RFC4287: + + $WIKI_URL/Atom_(standard) + https://tools.ietf.org/html/rfc4287 + + Finally, the gzipped mbox for a thread is available for + downloading and importing into your favorite mail client: + + * $base_url<Message-ID>/t.mbox.gz + + We use the mboxrd variant of the mbox format described + at: + + $WIKI_URL/Mbox + +contact +------- + + This help text is maintained by public-inbox developers + reachable via plain-text email at: meta\@public-inbox.org + +EOF + # TODO: support admin contact info in ~/.public-inbox/config + } + 1; +} + +1; |