From 43238d3688a51d98ce47151173aa7971e231a3bb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 3 Sep 2015 03:00:28 +0000 Subject: get rid of Message-ID compression entirely Provide a fallback for legacy SHA-1 messages, but do not advertise shorter URLs anymore for data portability concerns. This fixes a regression introduced in commit 81a9c1b476987d845b340ab9013d26cf4487cb9a ("search: disable Message-ID compression in Xapian") which ended up breaking thread-related endpoints for large Message-IDs, as lookups on the SHA-1 message no longer worked. --- lib/PublicInbox/ExtMsg.pm | 2 +- lib/PublicInbox/Feed.pm | 5 ++--- lib/PublicInbox/Hval.pm | 4 ++-- lib/PublicInbox/Mbox.pm | 4 ++-- lib/PublicInbox/View.pm | 13 ++++++------- lib/PublicInbox/WWW.pm | 14 ++++++++++++-- 6 files changed, 25 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 3e0e6e4e..00903225 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -5,7 +5,7 @@ use strict; use warnings; use URI::Escape qw(uri_escape_utf8); use PublicInbox::Hval; -use PublicInbox::MID qw/mid_compress mid2path/; +use PublicInbox::MID qw/mid2path/; # TODO: user-configurable our @EXT_URL = ( diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 8fc65fb5..2284f239 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -9,7 +9,7 @@ use Date::Parse qw(strptime); use PublicInbox::Hval; use PublicInbox::GitCatFile; use PublicInbox::View; -use PublicInbox::MID qw/mid_clean mid_compress mid2path/; +use PublicInbox::MID qw/mid_clean mid2path/; use POSIX qw/strftime/; use constant { DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard @@ -393,8 +393,7 @@ sub add_topic { my $mime = do_cat_mail($git, $path) or return 0; $header_obj = $mime->header_obj; } - my $mid = $header_obj->header('Message-ID'); - $mid = mid_compress(mid_clean($mid)); + my $mid = mid_clean($header_obj->header('Message-ID')); $u = $enc_utf8->decode($u); push @$order, [ $mid, $ts, $u, $subj, $topic ]; return 1; diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 0445e575..d65ad3bb 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -8,7 +8,7 @@ use warnings; use fields qw(raw href); use Encode qw(find_encoding); use URI::Escape qw(uri_escape_utf8); -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean/; my $enc_ascii = find_encoding('us-ascii'); @@ -27,7 +27,7 @@ sub new { sub new_msgid { my ($class, $msgid, $no_compress) = @_; $msgid = mid_clean($msgid); - $class->new($msgid, $no_compress ? $msgid : mid_compress($msgid)); + $class->new($msgid, $msgid); } sub new_oneline { diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 19c18ac4..453f4cda 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -4,7 +4,7 @@ package PublicInbox::Mbox; use strict; use warnings; -use PublicInbox::MID qw/mid_compress mid2path/; +use PublicInbox::MID qw/mid2path/; require Email::Simple; sub thread_mbox { @@ -69,7 +69,7 @@ sub emit_mbox { $fh = PublicInbox::MboxGz->new($fh) if $sfx; require PublicInbox::GitCatFile; - my $mid = mid_compress($ctx->{mid}); + my $mid = $ctx->{mid}; my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); my %opts = (offset => 0); my $nr; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index c572198a..129aa895 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -161,7 +161,7 @@ sub thread_html { sub emit_thread_html { my ($cb, $ctx, $foot, $srch) = @_; - my $mid = mid_compress($ctx->{mid}); + my $mid = $ctx->{mid}; my $res = $srch->get_thread($mid); my $msgs = load_results($res); my $nr = scalar @$msgs; @@ -447,7 +447,7 @@ sub headers_to_html_header { sub thread_inline { my ($dst, $ctx, $cur, $full_pfx) = @_; my $srch = $ctx->{srch}; - my $mid = mid_compress(mid_clean($cur->header('Message-ID'))); + my $mid = mid_clean($cur->header('Message-ID')); my $res = $srch->get_thread($mid); my $nr = $res->{total}; @@ -465,7 +465,7 @@ sub thread_inline { seen => { $subj => 1 }, srch => $srch, cur => $mid, - parent_cmp => $parent ? mid_compress($parent) : '', + parent_cmp => defined $parent ? $parent : '', parent => $parent, }; for (thread_results(load_results($res))->rootset) { @@ -683,16 +683,15 @@ sub _inline_header { my $pfx = INDENT x $level; my $cur = $state->{cur}; - my $mid = $mime->header('Message-ID'); + my $mid = mid_clean($mime->header('Message-ID')); my $f = $mime->header('X-PI-From'); my $d = _msg_date($mime); $f = PublicInbox::Hval->new($f); $d = PublicInbox::Hval->new($d); $f = $f->as_html; $d = $d->as_html . ' UTC'; - my $midc = mid_compress(mid_clean($mid)); if ($cur) { - if ($cur eq $midc) { + if ($cur eq $mid) { delete $state->{cur}; $$dst .= "$pfx` ". "[this message] by $f @ $d\n"; @@ -700,7 +699,7 @@ sub _inline_header { return; } } else { - $state->{next_msg} ||= $midc; + $state->{next_msg} ||= $mid; } # Subject is never undef, this mail was loaded from diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 16fd16aa..11b54027 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -116,8 +116,18 @@ sub invalid_list { sub invalid_list_mid { my ($ctx, $listname, $mid) = @_; my $ret = invalid_list($ctx, $listname, $mid); - $ctx->{mid} = uri_unescape($mid) unless $ret; - $ret; + return $ret if $ret; + + $ctx->{mid} = $mid = uri_unescape($mid); + if ($mid =~ /\A[a-f0-9]{40}\z/) { + if ($mid = mid2blob($ctx)) { + require Email::Simple; + use PublicInbox::MID qw/mid_clean/; + $mid = Email::Simple->new($mid); + $ctx->{mid} = mid_clean($mid->header('Message-ID')); + } + } + undef; } # /$LISTNAME/new.atom -> Atom feed, includes replies -- cgit v1.2.3-24-ge0c7