From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS198093 171.25.193.0/24 X-Spam-Status: No, score=-2.4 required=3.0 tests=AWL,BAYES_00,RCVD_IN_XBL shortcircuit=no autolearn=no version=3.3.2 X-Original-To: meta@public-inbox.org Received: from 80x24.org (tor-exit3-readme.dfri.se [171.25.193.235]) by dcvr.yhbt.net (Postfix) with ESMTP id B7CC01FAE9 for ; Tue, 25 Aug 2015 02:03:25 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] search: implement subject summarization Date: Tue, 25 Aug 2015 02:03:16 +0000 Message-Id: <1440468196-12885-3-git-send-email-e@80x24.org> List-Id: We ought to summarize subjects to avoid exploding line lengths in the web interface. --- lib/PublicInbox/Search.pm | 25 +++++++++++++++++++++++++ lib/PublicInbox/SearchMsg.pm | 3 +-- t/search.t | 17 +++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index bcc5312..5ef380e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -237,6 +237,31 @@ sub subject_normalized { $subj; } +# for doc data +sub subject_summary { + my $subj = pop; + my $max = 68; + if (length($subj) > $max) { + my @subj = split(/\s+/, $subj); + $subj = ''; + my $l; + + while ($l = shift @subj) { + my $new = $subj . $l . ' '; + last if length($new) >= $max; + $subj = $new; + } + if (length $subj) { + my $r = scalar @subj ? ' ...' : ''; + $subj =~ s/ \z/$r/s; + } else { + @subj = ($l =~ /\A(.{1,72})/); + $subj = $subj[0] . ' ...'; + } + } + $subj; +} + sub enquire { my ($self) = @_; $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a8f99bd..a9f3180 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -94,9 +94,8 @@ sub date { sub to_doc_data { my ($self) = @_; - $self->mid . "\n" . - $self->subject . "\n" . + PublicInbox::Search::subject_summary($self->subject) . "\n" . $self->from_name . "\n". $self->date . "\n" . $self->references_sorted; diff --git a/t/search.t b/t/search.t index 17e9eaf..65539f1 100644 --- a/t/search.t +++ b/t/search.t @@ -16,6 +16,23 @@ is(0, system(qw(git init -q --bare), $git_dir), "git init (main)"); eval { PublicInbox::Search->new($git_dir) }; ok($@, "exception raised on non-existent DB"); +{ + my $orig = "FOO " x 30; + my $summ = PublicInbox::Search::subject_summary($orig); + + $summ = length($summ); + $orig = length($orig); + ok($summ < $orig && $summ > 0, "summary shortened ($orig => $summ)"); + + $orig = "FOO" x 30; + $summ = PublicInbox::Search::subject_summary($orig); + + $summ = length($summ); + $orig = length($orig); + ok($summ < $orig && $summ > 0, + "summary shortened but not empty: $summ"); +} + my $rw = PublicInbox::SearchIdx->new($git_dir, 1); my $ro = PublicInbox::Search->new($git_dir); my $rw_commit = sub { -- EW