From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B05F02018E; Thu, 18 Aug 2016 09:51:50 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Cc: Philip Oakley Subject: [PATCH 1/2] www: implement generic help text Date: Thu, 18 Aug 2016 09:51:49 +0000 Message-Id: <20160818095150.17118-2-e@80x24.org> In-Reply-To: <20160818095150.17118-1-e@80x24.org> References: <20160818095150.17118-1-e@80x24.org> List-Id: Begin documenting some basic help functionality. This will probably be edited/tweaked heavily. I may tweak the anchor names of the HTML thread overview to be more consistent with each other (old ones will be supported), so I'm not documenting those, for now... --- MANIFEST | 2 + lib/PublicInbox/Search.pm | 38 ++++++++ lib/PublicInbox/WWW.pm | 14 +++ lib/PublicInbox/WwwStream.pm | 4 +- lib/PublicInbox/WwwText.pm | 201 +++++++++++++++++++++++++++++++++++++++++++ t/psgi_text.t | 39 +++++++++ 6 files changed, 297 insertions(+), 1 deletion(-) create mode 100644 lib/PublicInbox/WwwText.pm create mode 100644 t/psgi_text.t diff --git a/MANIFEST b/MANIFEST index bed6050..306945a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -82,6 +82,7 @@ lib/PublicInbox/WWW.pm lib/PublicInbox/WatchMaildir.pm lib/PublicInbox/WwwAttach.pm lib/PublicInbox/WwwStream.pm +lib/PublicInbox/WwwText.pm sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre @@ -141,6 +142,7 @@ t/plack.t t/precheck.t t/psgi_attach.t t/psgi_mount.t +t/psgi_text.t t/qspawn.t t/search.t t/spamcheck_spamc.t diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 7561ef4..445c2d8 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -51,6 +51,7 @@ my %bool_pfx_internal = ( thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) ); +# do we still need these? probably not.. my %bool_pfx_external = ( path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) @@ -62,6 +63,29 @@ my %prob_prefix = ( m => 'Q', # 'mid' is exact, 'm' can do partial ); +# not documenting m: and mid: for now, the using the URLs works w/o Xapian +our @HELP = ( + 's:' => < <{altid}) { + my $user_pfx = $self->{-user_pfx} ||= []; for (@$altid) { # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3' /\Aserial:(\w+):/ or next; my $pfx = $1; + push @$user_pfx, "$pfx:", < XGMANE $qp->add_boolean_prefix($pfx, 'X'.uc($pfx)); } @@ -321,4 +349,14 @@ sub enquire { $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); } +sub help { + my ($self) = @_; + $self->qp; # parse altids + my @ret = @HELP; + if (my $user_pfx = $self->{-user_pfx}) { + push @ret, @$user_pfx; + } + \@ret; +} + 1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 6f6a003..4d599fc 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -96,6 +96,8 @@ sub call { } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { + get_text($ctx, $1, $2); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { @@ -238,6 +240,18 @@ sub get_thread { PublicInbox::View::thread_html($ctx); } +# /$INBOX/_/text/$KEY/ +# /$INBOX/_/text/$KEY/raw +# KEY may contain slashes +sub get_text { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + + require PublicInbox::WwwText; + PublicInbox::WwwText::get_text($ctx, $key); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 9ed25e1..c89e6de 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -31,10 +31,12 @@ sub _html_top ($) { my $desc = ascii_html($obj->description); my $title = $ctx->{-title_html} || $desc; my $upfx = $ctx->{-upfx} || ''; + my $help = $upfx.'_/text/help'; my $atom = $ctx->{-atom} || $upfx.'new.atom'; my $tip = $ctx->{-html_tip} || ''; my $top = "$desc"; - my $links = "Atom feed"; + my $links = "help / ". + "Atom feed"; if ($obj->search) { my $q_val = $ctx->{-q_value_html}; if (defined $q_val && $q_val ne '') { diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm new file mode 100644 index 0000000..ef3446b --- /dev/null +++ b/lib/PublicInbox/WwwText.pm @@ -0,0 +1,201 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +# +# serves the /$INBOX/_/* endpoints from :text/* of the git tree +package PublicInbox::WwwText; +use strict; +use warnings; +use PublicInbox::Linkify; +use PublicInbox::WwwStream; +use PublicInbox::Hval qw(ascii_html); +our $QP_URL = 'https://xapian.org/docs/queryparser.html'; +our $WIKI_URL = 'https://en.wikipedia.org/wiki'; + +# /$INBOX/_/text/$KEY/ # KEY may contain slashes +# For now, "help" is the only supported $KEY +sub get_text { + my ($ctx, $key) = @_; + my $code = 200; + + $key = 'help' if !defined $key; # this 302s to _/text/help/ + + # get the raw text the same way we get mboxrds + my $raw = ($key =~ s!/raw\z!!); + my $have_tslash = ($key =~ s!/\z!!) if !$raw; + + my $txt = ''; + if (!_default_text($ctx, $key, \$txt)) { + $code = 404; + $txt = "404 Not Found ($key)\n"; + } + if ($raw) { + return [ $code, [ 'Content-Type', 'text/plain', + 'Content-Length', bytes::length($txt) ], + [ $txt ] ] + } + + # enforce trailing slash for "wget -r" compatibility + if (!$have_tslash && $code == 200) { + my $url = $ctx->{-inbox}->base_url($ctx->{env}); + $url .= "_/text/$key/"; + + return [ 302, [ 'Content-Type', 'text/plain', + 'Location', $url ], + [ "Redirecting to $url\n" ] ]; + } + + # Follow git commit message conventions, + # first line is the Subject/title + my ($title) = ($txt =~ /\A([^\n]*)/s); + _do_linkify($txt); + $ctx->{-title_html} = ascii_html($title); + + my $nslash = ($key =~ tr!/!/!); + $ctx->{-upfx} = '../../../' . ('../' x $nslash); + + PublicInbox::WwwStream->response($ctx, $code, sub { + my ($nr, undef) = @_; + $nr == 1 ? '
'.$txt.'
' : undef + }); +} + +sub _do_linkify { + my $l = PublicInbox::Linkify->new; + $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0]))); +} + +sub _srch_prefix ($$) { + my ($srch, $txt) = @_; + my $pad = 0; + my $htxt = ''; + my $help = $srch->help; + my $i; + for ($i = 0; $i < @$help; $i += 2) { + my $pfx = $help->[$i]; + my $n = length($pfx); + $pad = $n if $n > $pad; + $htxt .= $pfx . "\0"; + $htxt .= $help->[$i + 1]; + $htxt .= "\f\n"; + } + $pad += 2; + my $padding = ' ' x ($pad + 8); + $htxt =~ s/^/$padding/gms; + $htxt =~ s/^$padding(\S+)\0/" $1". + (' ' x ($pad - length($1)))/egms; + $htxt =~ s/\f\n/\n/gs; + $$txt .= $htxt; + 1; +} + + +sub _default_text ($$$) { + my ($ctx, $key, $txt) = @_; + return if $key ne 'help'; # TODO more keys? + + my $ibx = $ctx->{-inbox}; + my $base_url = $ibx->base_url($ctx->{env}); + $$txt .= "public-inbox help for $base_url\n"; + $$txt .= <') into the URL. + Forward slash ('/') characters in the Message-IDs + need to be escaped as "%2F" (without quotes). + + Thus, it is possible to retrieve any message by its + Message-ID by going to: + + $base_url/ + + (without the '<' or '>') + + Message-IDs are described at: + + $WIKI_URL/Message-ID + +EOF + + # n.b. we use the Xapian DB for any regeneratable, + # order-of-arrival-independent data. + my $srch = $ibx->search; + if ($srch) { + $$txt .= </T/#u + + Loads the thread belonging to the given + in flat chronological order. The "#u" anchor + focuses the browser on the given . + + * $base_url/t/#u + + Loads the thread belonging to the given + in threaded order with nesting. For deep threads, + this requires a wide display or horizontal scrolling. + + Both of these HTML endpoints are suitable for offline reading + using the thread overview at the bottom of each page. + + + Users of feed readers may follow a particular thread using + the Atom feed: + + * $base_url/t.atom + + https://tools.ietf.org/html/rfc4287 + $WIKI_URL/Atom_(standard) + + Finally, the gzipped mbox for a thread is available for + downloading and importing into your favorite mail client: + + * $base_url/t.mbox.gz + + We use the mboxrd variant of the mbox format described at: + + $WIKI_URL/Mbox + +contact +------- + + This help text is maintained by public-inbox developers + reachable via plain-text email at: meta\@public-inbox.org + +EOF + # TODO: support admin contact info in ~/.public-inbox/config + } + 1; +} + +1; diff --git a/t/psgi_text.t b/t/psgi_text.t new file mode 100644 index 0000000..bf565f8 --- /dev/null +++ b/t/psgi_text.t @@ -0,0 +1,39 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +my $tmpdir = tempdir('psgi-text-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $maindir = "$tmpdir/main.git"; +my $addr = 'test-public@example.com'; +my $cfgpfx = "publicinbox.test"; +my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for psgi_text.t" if $@; +} +use_ok $_ foreach @mods; +use PublicInbox::Import; +use PublicInbox::Git; +use PublicInbox::Config; +use PublicInbox::WWW; +use_ok 'PublicInbox::WwwText'; +use Plack::Builder; +my $config = PublicInbox::Config->new({ + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $maindir, +}); +is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); +my $www = PublicInbox::WWW->new($config); + +test_psgi(sub { $www->call(@_) }, sub { + my ($cb) = @_; + my $res; + $res = $cb->(GET('/test/_/text/help/')); + like($res->content, qr!public-inbox help.*!, + 'default help'); +}); + +done_testing(); -- EW