From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6E8CC1F826; Sun, 16 Aug 2015 08:38:06 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Cc: Eric Wong Subject: [RFC 10/11] implement /s/$SUBJECT_PATH.html lookups Date: Sun, 16 Aug 2015 08:37:58 +0000 Message-Id: <1439714279-21923-11-git-send-email-e@80x24.org> In-Reply-To: <1439714279-21923-1-git-send-email-e@80x24.org> References: <1439714279-21923-1-git-send-email-e@80x24.org> List-Id: Quick-and-dirty wiring up of to Subject: paths. This may prove more memorizable and easier-to-share than /t/$MESSAGE_ID.html links, but less strict. This changes our schema version to 1, since we now use lower-case subject paths. --- lib/PublicInbox/Search.pm | 15 ++++++++-- lib/PublicInbox/View.pm | 72 ++++++++++++++++++++++++++++++++++++++++------- lib/PublicInbox/WWW.pm | 20 ++++++++++++- t/search.t | 16 +++++------ 4 files changed, 101 insertions(+), 22 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 39b06b0..f4f00b2 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -12,7 +12,10 @@ use PublicInbox::MID qw/mid_clean mid_compressed/; use constant { TS => 0, - SCHEMA_VERSION => 0, + # SCHEMA_VERSION history + # 0 - initial + # 1 - subject_path is lower-cased + SCHEMA_VERSION => 1, LANG => 'english', QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; @@ -209,6 +212,12 @@ sub query { $self->do_enquire($query, $opts); } +sub get_subject_path { + my ($self, $path, $opts) = @_; + my $query = $self->qp->parse_query("path:$path", 0); + $self->do_enquire($query); +} + # given a message ID, get replies to a message sub get_replies { my ($self, $mid, $opts) = @_; @@ -461,13 +470,13 @@ sub merge_threads { # normalize subjects so they are suitable as pathnames for URLs sub subject_path { - my ($subj) = @_; + my $subj = pop; $subj =~ s/\A\s+//; $subj =~ s/\s+\z//; $subj =~ s/^(?:re|aw):\s*//i; # remove reply prefix (aw: German) $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g; - $subj; + lc($subj); } sub do_cat_mail { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index c40a2a7..696d7d5 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -31,7 +31,7 @@ sub msg_html { } else { $footer = ''; } - headers_to_html_header($mime, $full_pfx) . + headers_to_html_header($mime, $full_pfx, $srch) . multipart_text_as_html($mime, $full_pfx) . '
' . PRE_WRAP . html_footer($mime, 1, $full_pfx, $srch) . $footer . @@ -179,6 +179,52 @@ sub thread_html { $rv .= "
" . PRE_WRAP . $next . $foot . ""; } +sub subject_path_html { + my (undef, $ctx, $foot, $srch) = @_; + my $path = $ctx->{subject_path}; + my $res = $srch->get_subject_path($path); + my $rv = ''; + require PublicInbox::GitCatFile; + my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $nr = scalar @{$res->{msgs}}; + return $rv if $nr == 0; + my @msgs; + while (my $smsg = shift @{$res->{msgs}}) { + my $m = $smsg->mid; + + # Duplicated from WWW.pm + my ($x2, $x38) = ($m =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/); + + unless (defined $x38) { + require Digest::SHA; + $m = Digest::SHA::sha1_hex($m); + ($x2, $x38) = ($m =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/); + } + + # FIXME: duplicated code from Feed.pm + my $mime = eval { + my $str = $git->cat_file("HEAD:$x2/$x38"); + Email::MIME->new($str); + }; + unless ($@) { + my $t = eval { str2time($mime->header('Date')) }; + defined($t) or $t = 0; + $mime->header_set('X-PI-TS', $t); + push @msgs, $mime; + } + } + require PublicInbox::Thread; + my $th = PublicInbox::Thread->new(@msgs); + $th->thread; + $th->order(*PublicInbox::Thread::sort_ts); + my $state = [ undef, { root_anchor => 'dummy' }, undef, 0 ]; + thread_entry(\$rv, $state, $_, 0) for $th->rootset; + my $final_anchor = $state->[3]; + my $next = "end of thread\n"; + + $rv .= "
" . PRE_WRAP . $next . $foot . ""; +} + # only private functions below. sub index_walk { @@ -235,7 +281,7 @@ sub enc_for { } sub multipart_text_as_html { - my ($mime, $full_pfx) = @_; + my ($mime, $full_pfx, $srch) = @_; my $rv = ""; my $part_nr = 0; my $enc_msg = enc_for($mime->header("Content-Type")); @@ -339,7 +385,7 @@ sub add_text_body_full { } sub headers_to_html_header { - my ($mime, $full_pfx) = @_; + my ($mime, $full_pfx, $srch) = @_; my $rv = ""; my @title; @@ -347,18 +393,21 @@ sub headers_to_html_header { my $v = $mime->header($h); defined($v) && length($v) or next; $v = PublicInbox::Hval->new_oneline($v); - $rv .= "$h: " . $v->as_html . "\n"; if ($h eq 'From') { my @from = Email::Address->parse($v->raw); - $v = $from[0]->name; - unless (defined($v) && length($v)) { - $v = '<' . $from[0]->address . '>'; - } - $title[1] = ascii_html($v); + $title[1] = ascii_html($from[0]->name); } elsif ($h eq 'Subject') { $title[0] = $v->as_html; + if ($srch) { + my $path = $srch->subject_path($v->raw); + $rv .= "$h: "; + $rv .= $v->as_html . "\n"; + next; + } } + $rv .= "$h: " . $v->as_html . "\n"; + } my $header_obj = $mime->header_obj; @@ -510,6 +559,9 @@ sub hash_subj { sub thread_replies { my ($dst, $root, $res) = @_; my @msgs = map { $_->mini_mime } @{$res->{msgs}}; + foreach (@{$res->{msgs}}) { + print STDERR "smsg->path: <", $_->path, ">\n"; + } require PublicInbox::Thread; $root->header_set('X-PI-TS', '0'); my $th = PublicInbox::Thread->new($root, @msgs); @@ -532,7 +584,7 @@ sub thread_entry { my ($dst, $state, $node, $level) = @_; # $state = [ $search_res, $seen, undef, 0 (msg_nr) ]; # $seen is overloaded with 3 types of fields: - # 1) "root" => Message-ID, + # 1) "root_anchor" => anchor_for(Message-ID), # 2) seen subject hashes: sha1(subject) => 1 # 3) anchors hashes: "#$sha1_hex" (same as $seen in index_entry) if (my $mime = $node->message) { diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 52e51c4..7fe9b85 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -53,8 +53,13 @@ sub run { } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi); + # subject_path display + } elsif ($path_info =~ m!$LISTNAME_RE/s/(\S+)\.html\z!o) { + my $sp = $2; + invalid_list(\%ctx, $1) || get_subject_path(\%ctx, $cgi, $sp); + # convenience redirects, order matters - } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t)/(\S+)\z!o) { + } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) { my $pfx = $2; invalid_list_mid(\%ctx, $1, $3) || redirect_mid(\%ctx, $cgi, $2); @@ -211,6 +216,19 @@ sub get_thread { [ $body ] ]; } +# /$LISTNAME/s/$SUBJECT_PATH.html +sub get_subject_path { + my ($ctx, $cgi, $sp) = @_; + $ctx->{subject_path} = $sp; + my $srch = searcher($ctx) or return need_search($ctx); + require PublicInbox::View; + my $foot = footer($ctx); + my $body = PublicInbox::View->subject_path_html($ctx, $foot, $srch) or + return r404(); + [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], + [ $body ] ]; +} + sub self_url { my ($cgi) = @_; ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string; diff --git a/t/search.t b/t/search.t index 9de6d28..9bdd3ce 100644 --- a/t/search.t +++ b/t/search.t @@ -22,7 +22,7 @@ my $ro = PublicInbox::Search->new($git_dir); my $root = Email::MIME->create( header_str => [ Date => 'Fri, 02 Oct 1993 00:00:00 +0000', - Subject => 'hello world', + Subject => 'Hello world', 'Message-ID' => '', From => 'John Smith ', To => 'list@example.com', @@ -31,7 +31,7 @@ my $ro = PublicInbox::Search->new($git_dir); my $last = Email::MIME->create( header_str => [ Date => 'Sat, 02 Oct 2010 00:00:00 +0000', - Subject => 'Re: hello world', + Subject => 'Re: Hello world', 'In-Reply-To' => '', 'Message-ID' => '', From => 'John Smith ', @@ -70,19 +70,19 @@ sub filter_mids { is($res->{count}, 0, "path variant `$p' does not match"); } - $res = $ro->query('subject:(hello world)'); + $res = $ro->query('subject:(Hello world)'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for subject:() match'); - $res = $ro->query('subject:"hello world"'); + $res = $ro->query('subject:"Hello world"'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for subject:"" match'); - $res = $ro->query('subject:"hello world"', {limit => 1}); + $res = $ro->query('subject:"Hello world"', {limit => 1}); is(scalar @{$res->{msgs}}, 1, "limit works"); my $first = $res->{msgs}->[0]; - $res = $ro->query('subject:"hello world"', {offset => 1}); + $res = $ro->query('subject:"Hello world"', {offset => 1}); is(scalar @{$res->{msgs}}, 1, "offset works"); my $second = $res->{msgs}->[0]; @@ -207,7 +207,7 @@ sub filter_mids { $rw->add_message(Email::MIME->create( header_str => [ Date => 'Sat, 02 Oct 2010 00:00:01 +0000', - Subject => 'hello', + Subject => 'Hello', 'Message-ID' => '', From => 'Quoter ', To => 'list@example.com', @@ -217,7 +217,7 @@ sub filter_mids { $rw->add_message(Email::MIME->create( header_str => [ Date => 'Sat, 02 Oct 2010 00:00:02 +0000', - Subject => 'hello', + Subject => 'Hello', 'Message-ID' => '', From => 'Non-Quoter', To => 'list@example.com', -- EW