From 22d8fcc3d37b9ce13a5088f1d0557078c9a84062 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 2 Oct 2015 21:15:07 +0000 Subject: rename mid_compress to id_compress We use it as a general compressor for identifiers such as subject paths, so using the "mid_" prefix probably is not appropriate. --- lib/PublicInbox/MID.pm | 19 +++++++------------ lib/PublicInbox/Search.pm | 6 +++--- lib/PublicInbox/SearchIdx.pm | 4 ++-- lib/PublicInbox/View.pm | 4 ++-- t/view.t | 11 +++++------ 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 677a9d59..3d404ef8 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -4,7 +4,7 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean mid_compress mid2path/; +our @EXPORT_OK = qw/mid_clean id_compress mid2path/; use Digest::SHA qw/sha1_hex/; use constant MID_MAX => 40; # SHA-1 hex length @@ -19,18 +19,13 @@ sub mid_clean { } # this is idempotent -sub mid_compress { - my ($mid, $force) = @_; +sub id_compress { + my ($id, $force) = @_; - # XXX dirty hack! FIXME! - # Some HTTP servers (apache2 2.2.22-13+deb7u5 on my system) - # apparently do not handle "%25" in the URL path component correctly. - # I'm not yet sure if it's something weird with my rewrite rules - # or what; will need to debug... - return sha1_hex($mid) if (index($mid, '%') >= 0); - - return $mid if (!$force && length($mid) <= MID_MAX); - sha1_hex($mid); + if ($force || $id =~ /[^\w\-]/ || length($id) > MID_MAX) { + return sha1_hex($id); + } + $id; } sub mid2path { diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 1d13f4b8..fbc6882c 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -14,7 +14,7 @@ use constant LINES => 3; # :lines as defined in RFC 3977 use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; use Email::MIME; -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean id_compress/; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -25,7 +25,7 @@ use constant { # SCHEMA_VERSION history # 0 - initial # 1 - subject_path is lower-cased - # 2 - subject_path is mid_compress in the index, only + # 2 - subject_path is id_compress in the index, only # 3 - message-ID is compressed if it includes '%' (hack!) # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' @@ -104,7 +104,7 @@ sub get_thread { return { total => 0, msgs => [] } unless $smsg; my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id); - my $path = mid_compress($smsg->path); + my $path = id_compress($smsg->path); my $qsub = Search::Xapian::Query->new(xpfx('path').$path); my $query = Search::Xapian::Query->new(OP_OR, $qtid, $qsub); $self->do_enquire($query, $opts); diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f98ba3e1..8184dc71 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -5,7 +5,7 @@ package PublicInbox::SearchIdx; use strict; use warnings; use base qw(PublicInbox::Search); -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean id_compress/; *xpfx = *PublicInbox::Search::xpfx; use constant { @@ -81,7 +81,7 @@ sub add_message { if ($subj ne '') { my $path = $self->subject_path($subj); - $doc->add_term(xpfx('path') . mid_compress($path)); + $doc->add_term(xpfx('path') . id_compress($path)); } add_val($doc, &PublicInbox::Search::TS, $smsg->ts); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index ccdcde2a..c9be7700 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -9,7 +9,7 @@ use Encode qw/find_encoding/; use Encode::MIME::Header; use Email::MIME::ContentType qw/parse_content_type/; use PublicInbox::Hval; -use PublicInbox::MID qw/mid_clean mid_compress mid2path/; +use PublicInbox::MID qw/mid_clean id_compress mid2path/; use Digest::SHA qw/sha1_hex/; my $SALT = rand; require POSIX; @@ -586,7 +586,7 @@ sub anchor_for { my ($msgid) = @_; my $id = $msgid; if ($id !~ /\A[a-f0-9]{40}\z/) { - $id = mid_compress(mid_clean($id), 1); + $id = id_compress(mid_clean($id), 1); } 'm' . $id; } diff --git a/t/view.t b/t/view.t index 325f509c..568ab303 100644 --- a/t/view.t +++ b/t/view.t @@ -145,13 +145,12 @@ EOF like($html, qr/\bhi = bye\b/, "HTML output decoded QP"); } - -{ # XXX dirty hack - use PublicInbox::MID qw/mid_compress/; - like(mid_compress('foo%bar@wtf'), qr/\A[a-f0-9]{40}\z/, +{ + use PublicInbox::MID qw/id_compress/; + like(id_compress('foo%bar@wtf'), qr/\A[a-f0-9]{40}\z/, "percent always converted to sha1 to workaround buggy httpds"); - is(mid_compress('foobar@wtf'), 'foobar@wtf', - 'regular MID not compressed'); + is(id_compress('foobar-wtf'), 'foobar-wtf', + 'regular ID not compressed'); } done_testing(); -- cgit v1.2.3-24-ge0c7