about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2014-09-15 02:34:33 +0000
committerEric Wong <e@80x24.org>2014-09-15 02:44:10 +0000
commit9713697ffa1e811e378f4233baf4fe2fb133214a (patch)
tree8034b0438f561e5e2c530ae138c643c80cd1e713 /lib
parentb16868d658478c3fa07c44f802d37b266e5ee822 (diff)
downloadpublic-inbox-9713697ffa1e811e378f4233baf4fe2fb133214a.tar.gz
Some Message-IDs are crazy long, so support SHA-1s for them
instead.  This allows shorter URLs to be generated and are
less likely

However, we'll still favor short Message-IDs whenever possible.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Hval.pm22
-rw-r--r--lib/PublicInbox/WWW.pm16
2 files changed, 26 insertions, 12 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index a93e8c24..9be163aa 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -5,28 +5,36 @@
 package PublicInbox::Hval;
 use strict;
 use warnings;
-use fields qw(raw);
+use fields qw(raw href);
 use Encode qw(find_encoding);
 use URI::Escape qw(uri_escape_utf8);
 
 my $enc_ascii = find_encoding('us-ascii');
 
 sub new {
-        my ($class, $raw) = @_;
+        my ($class, $raw, $href) = @_;
         my $self = fields::new($class);
 
         # we never care about leading/trailing whitespace
         $raw =~ s/\A\s*//;
         $raw =~ s/\s*\z//;
         $self->{raw} = $raw;
+        $self->{href} = defined $href ? $href : $raw;
         $self;
 }
 
 sub new_msgid {
-        my ($class, $raw) = @_;
-        $raw =~ s/\A<//;
-        $raw =~ s/>\z//;
-        $class->new($raw);
+        my ($class, $msgid) = @_;
+        $msgid =~ s/\A\s*<?//;
+        $msgid =~ s/>?\s*\z//;
+
+        if (length($msgid) <= 40) {
+                $class->new($msgid);
+        } else {
+                require Digest::SHA;
+                my $hex = Digest::SHA::sha1_hex($msgid);
+                $class->new($msgid, $hex);
+        }
 }
 
 sub new_oneline {
@@ -52,7 +60,7 @@ sub ascii_html {
 }
 
 sub as_html { ascii_html($_[0]->{raw}) }
-sub as_href { ascii_html(uri_escape_utf8($_[0]->{raw})) }
+sub as_href { ascii_html(uri_escape_utf8($_[0]->{href})) }
 
 sub raw {
         if (defined $_[1]) {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f67e72ce..1814286b 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -130,13 +130,19 @@ sub get_index {
 # just returns a string ref for the blob in the current ctx
 sub mid2blob {
         my ($ctx) = @_;
-        require Digest::SHA;
-        my $hex = Digest::SHA::sha1_hex($ctx->{mid});
-        $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or
-                        die "BUG: not a SHA-1 hex: $hex";
+        my $hex = $ctx->{mid};
+        my ($x2, $x38) = ($hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/);
+
+        unless (defined $x38) {
+                # compatibility with old links
+                require Digest::SHA;
+                $hex = Digest::SHA::sha1_hex($hex);
+                ($x2, $x38) = ($hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/);
+                defined $x38 or die "BUG: not a SHA-1 hex: $hex";
+        }
 
         my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
-                        qw(cat-file blob), "HEAD:$1/$2");
+                        qw(cat-file blob), "HEAD:$x2/$x38");
         my $cmd = join(' ', @cmd);
         my $pid = open my $fh, '-|';
         defined $pid or die "fork failed: $!\n";