* [PATCH 09/11] search: bump schema version to 5 for subject_path
2015-08-20 2:57 6% ` [PATCH 05/11] index: simplify main landing page if search-enabled Eric Wong
@ 2015-08-20 2:57 7% ` Eric Wong
1 sibling, 0 replies; 2+ results
From: Eric Wong @ 2015-08-20 2:57 UTC (permalink / raw)
To: meta
In "index: simplify main landing page if search-enabled",
subject normalization went a little farther to drop trailing
'.' characters, so we will need to re-index.
---
lib/PublicInbox/Search.pm | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 0e63ee3..abd9db4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -22,7 +22,8 @@ use constant {
# 2 - subject_path is mid_compressed in the index, only
# 3 - message-ID is compressed if it includes '%' (hack!)
# 4 - change "Re: " normalization, avoid circular Reference ghosts
- SCHEMA_VERSION => 4,
+ # 5 - subject_path drops trailing '.'
+ SCHEMA_VERSION => 5,
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
--
EW
^ permalink raw reply related [relevance 7%]
* [PATCH 05/11] index: simplify main landing page if search-enabled
@ 2015-08-20 2:57 6% ` Eric Wong
2015-08-20 2:57 7% ` [PATCH 09/11] search: bump schema version to 5 for subject_path Eric Wong
1 sibling, 0 replies; 2+ results
From: Eric Wong @ 2015-08-20 2:57 UTC (permalink / raw)
To: meta
We can display /t/$MESSAGE_ID.html easily with a Xapian search
index, so rely on it instead of trying to display messages inline.
---
lib/PublicInbox/Feed.pm | 103 +++++++++++++++++++++++++++++++++++++++-------
lib/PublicInbox/Search.pm | 15 +++++--
2 files changed, 98 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 8bfd19e..40dfb45 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -9,11 +9,15 @@ use Date::Parse qw(strptime);
use PublicInbox::Hval;
use PublicInbox::GitCatFile;
use PublicInbox::View;
+use PublicInbox::MID qw/mid_clean mid_compressed/;
use constant {
DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
MAX_PER_PAGE => 25, # this needs to be tunable
};
+use Encode qw/find_encoding/;
+my $enc_utf8 = find_encoding('UTF-8');
+
# main function
sub generate {
my ($class, $ctx) = @_;
@@ -55,22 +59,30 @@ sub generate_html_index {
my $title = $feed_opts->{description} || '';
$title = PublicInbox::Hval->new_oneline($title)->as_html;
+ my $atom_url = $feed_opts->{atomurl};
my $html = "<html><head><title>$title</title>" .
- '<link rel="alternate" title="Atom feed"' . "\nhref=\"" .
- $feed_opts->{atomurl} . "\"\ntype=\"application/atom+xml\"/>" .
- '</head><body>';
+ "<link\nrel=alternate\ntitle=\"Atom feed\"\n".
+ "href=\"$atom_url\"\"\ntype=\"application/atom+xml\"/>" .
+ '</head><body>' . PublicInbox::View::PRE_WRAP;
my $state;
my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+ my $topics;
+ my $srch = $ctx->{srch};
+ $srch and $topics = [ [], {} ];
my (undef, $last) = each_recent_blob($ctx, sub {
- my ($path, $commit) = @_;
- unless (defined $state) {
- $state = [ $ctx->{srch}, {}, $commit, 0 ];
+ my ($path, $commit, $ts, $u, $subj) = @_;
+ $state ||= [ undef, {}, $commit, 0 ];
+
+ if ($srch) {
+ add_topic($git, $srch, $topics, $path, $ts, $u, $subj);
+ } else {
+ my $mime = do_cat_mail($git, $path) or return 0;
+ $html .=
+ PublicInbox::View->index_entry($mime, 0, $state);
+ 1;
}
- my $mime = do_cat_mail($git, $_[0]) or return 0;
- $html .= PublicInbox::View->index_entry($mime, 0, $state);
- 1;
});
Email::Address->purge_cache;
$git = undef; # destroy pipes.
@@ -81,6 +93,7 @@ sub generate_html_index {
$footer .= "\n" . $list_footer if $list_footer;
$footer = "<hr /><pre>$footer</pre>";
}
+ dump_topics(\$html, $topics) if $topics;
$html .= "$footer</body></html>";
}
@@ -92,6 +105,7 @@ sub nav_footer {
my $old_r = $cgi->param('r');
my $head = ' ';
my $next = ' ';
+ # $state = [ undef, {}, $first_commit, $last_anchor ];
my $first = $state->[2];
my $anchor = $state->[3];
@@ -128,7 +142,8 @@ sub each_recent_blob {
# leave us with filenames with spaces in them..
my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
qw/log --no-notes --no-color --raw -r
- --abbrev=16 --abbrev-commit/);
+ --abbrev=16 --abbrev-commit/,
+ "--format=%h%x00%ct%x00%an%x00%s%x00");
push @cmd, $range;
my $pid = open(my $log, '-|', @cmd) or
@@ -137,26 +152,29 @@ sub each_recent_blob {
my $last;
my $nr = 0;
my ($cur_commit, $first_commit, $last_commit);
- while (my $line = <$log>) {
+ my ($ts, $subj, $u);
+ while (defined(my $line = <$log>)) {
if ($line =~ /$addmsg/o) {
my $add = $1;
next if $deleted{$add}; # optimization-only
- $nr += $cb->($add, $cur_commit);
+ $nr += $cb->($add, $cur_commit, $ts, $u, $subj);
if ($nr >= $max) {
$last = 1;
last;
}
} elsif ($line =~ /$delmsg/o) {
$deleted{$1} = 1;
- } elsif ($line =~ /^commit (${hex}{7,40})/o) {
- $cur_commit = $1;
- $first_commit = $1 unless defined $first_commit;
+ } elsif ($line =~ /^${hex}{7,40}/o) {
+ ($cur_commit, $ts, $u, $subj) = split("\0", $line);
+ unless (defined $first_commit) {
+ $first_commit = $cur_commit;
+ }
}
}
if ($last) {
while (my $line = <$log>) {
- if ($line =~ /^commit (${hex}{7,40})/o) {
+ if ($line =~ /^(${hex}{7,40})/o) {
$last_commit = $1;
last;
}
@@ -279,4 +297,57 @@ sub do_cat_mail {
$@ ? undef : $mime;
}
+# accumulate recent topics if search is supported
+sub add_topic {
+ my ($git, $srch, $topics, $path, $ts, $u, $subj) = @_;
+ my ($order, $subjs) = @$topics;
+ my $header_obj;
+
+ # legacy ssoma did not set commit titles based on Subject
+ $subj = $enc_utf8->decode($subj);
+ if ($subj eq 'mda') {
+ my $mime = do_cat_mail($git, $path) or return 0;
+ $header_obj = $mime->header_obj;
+ $subj = mime_header($header_obj, 'Subject');
+ }
+
+ $subj = $srch->subject_normalized($subj);
+ if (++$subjs->{$subj} == 1) {
+ unless ($header_obj) {
+ my $mime = do_cat_mail($git, $path) or return 0;
+ $header_obj = $mime->header_obj;
+ }
+ my $mid = $header_obj->header_raw('Message-ID');
+ $mid = mid_compressed(mid_clean($mid));
+ $u = $enc_utf8->decode($u);
+ push @$order, [ $mid, $ts, $u, $subj ];
+ return 1;
+ }
+ 0; # old topic, continue going
+}
+
+sub dump_topics {
+ my ($dst, $topics) = @_;
+ my ($order, $subjs) = @$topics;
+ $$dst .= '[No recent topics]' unless (scalar @$order);
+ while (defined(my $info = shift @$order)) {
+ my ($mid, $ts, $u, $subj) = @$info;
+ my $n = delete $subjs->{$subj};
+ $mid = PublicInbox::Hval->new($mid)->as_href;
+ $subj = PublicInbox::Hval->new($subj)->as_html;
+ $u = PublicInbox::Hval->new($u)->as_html;
+ $$dst .= "<a\nhref=\"t/$mid.html#u\"><b>$subj</b></a>\n- ";
+ $ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+ if ($n == 1) {
+ $$dst .= "created by $u @ $ts UTC\n"
+ } else {
+ # $n isn't the total number of posts on the topic,
+ # just the number of posts in the current "git log"
+ # window, so leave it unlabeled
+ $$dst .= "updated by $u @ $ts UTC ($n)\n"
+ }
+ }
+ $$dst .= '</pre>'
+}
+
1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b9f283f..c28401b 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -492,14 +492,21 @@ sub merge_threads {
# normalize subjects so they are suitable as pathnames for URLs
sub subject_path {
my $subj = pop;
-
- $subj =~ s/\A\s+//;
- $subj =~ s/\s+\z//;
- $subj =~ s/$REPLY_RE//igo; # remove reply prefix
+ $subj = subject_normalized($subj);
$subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
lc($subj);
}
+sub subject_normalized {
+ my $subj = pop;
+ $subj =~ s/\A\s+//s; # no leading space
+ $subj =~ s/\s+\z//s; # no trailing space
+ $subj =~ s/\s+/ /gs; # no redundant spaces
+ $subj =~ s/\.+\z//; # no trailing '.'
+ $subj =~ s/$REPLY_RE//igo; # remove reply prefix
+ $subj;
+}
+
sub do_cat_mail {
my ($git, $blob) = @_;
my $mime = eval {
--
EW
^ permalink raw reply related [relevance 6%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-08-20 2:57 [PATCH 01/11] feed: remove threading from index Eric Wong
2015-08-20 2:57 6% ` [PATCH 05/11] index: simplify main landing page if search-enabled Eric Wong
2015-08-20 2:57 7% ` [PATCH 09/11] search: bump schema version to 5 for subject_path Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).