user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] search: implement subject summarization
Date: Tue, 25 Aug 2015 02:03:16 +0000	[thread overview]
Message-ID: <1440468196-12885-3-git-send-email-e@80x24.org> (raw)

We ought to summarize subjects to avoid exploding
line lengths in the web interface.
---
 lib/PublicInbox/Search.pm    | 25 +++++++++++++++++++++++++
 lib/PublicInbox/SearchMsg.pm |  3 +--
 t/search.t                   | 17 +++++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index bcc5312..5ef380e 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -237,6 +237,31 @@ sub subject_normalized {
 	$subj;
 }
 
+# for doc data
+sub subject_summary {
+	my $subj = pop;
+	my $max = 68;
+	if (length($subj) > $max) {
+		my @subj = split(/\s+/, $subj);
+		$subj = '';
+		my $l;
+
+		while ($l = shift @subj) {
+			my $new = $subj . $l . ' ';
+			last if length($new) >= $max;
+			$subj = $new;
+		}
+		if (length $subj) {
+			my $r = scalar @subj ? ' ...' : '';
+			$subj =~ s/ \z/$r/s;
+		} else {
+			@subj = ($l =~ /\A(.{1,72})/);
+			$subj = $subj[0] . ' ...';
+		}
+	}
+	$subj;
+}
+
 sub enquire {
 	my ($self) = @_;
 	$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index a8f99bd..a9f3180 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -94,9 +94,8 @@ sub date {
 
 sub to_doc_data {
 	my ($self) = @_;
-
 	$self->mid . "\n" .
-	$self->subject . "\n" .
+	PublicInbox::Search::subject_summary($self->subject) . "\n" .
 	$self->from_name . "\n".
 	$self->date . "\n" .
 	$self->references_sorted;
diff --git a/t/search.t b/t/search.t
index 17e9eaf..65539f1 100644
--- a/t/search.t
+++ b/t/search.t
@@ -16,6 +16,23 @@ is(0, system(qw(git init -q --bare), $git_dir), "git init (main)");
 eval { PublicInbox::Search->new($git_dir) };
 ok($@, "exception raised on non-existent DB");
 
+{
+	my $orig = "FOO " x 30;
+	my $summ = PublicInbox::Search::subject_summary($orig);
+
+	$summ = length($summ);
+	$orig = length($orig);
+	ok($summ < $orig && $summ > 0, "summary shortened ($orig => $summ)");
+
+	$orig = "FOO" x 30;
+	$summ = PublicInbox::Search::subject_summary($orig);
+
+	$summ = length($summ);
+	$orig = length($orig);
+	ok($summ < $orig && $summ > 0,
+	   "summary shortened but not empty: $summ");
+}
+
 my $rw = PublicInbox::SearchIdx->new($git_dir, 1);
 my $ro = PublicInbox::Search->new($git_dir);
 my $rw_commit = sub {
-- 
EW


             reply	other threads:[~2015-08-25  2:03 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-25  2:03 Eric Wong [this message]
2015-08-25  3:56 ` [PATCH] search: implement subject summarization Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440468196-12885-3-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).