diff options
author | Eric Wong <e@80x24.org> | 2014-04-24 00:21:21 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2014-04-24 00:21:21 +0000 |
commit | 483a67ca0613a75bb80ea4c1201cb2d5f2cf063d (patch) | |
tree | b46b775c6eb619708789734cebf7d349589428bb /lib/PublicInbox/Hval.pm | |
parent | dc4a8227f8949694e4fc3f40bb0278d4b4713381 (diff) | |
download | public-inbox-483a67ca0613a75bb80ea4c1201cb2d5f2cf063d.tar.gz |
This helps us keep track of escaping which needs to be done for various levels.
Diffstat (limited to 'lib/PublicInbox/Hval.pm')
-rw-r--r-- | lib/PublicInbox/Hval.pm | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm new file mode 100644 index 00000000..26a2d0bb --- /dev/null +++ b/lib/PublicInbox/Hval.pm @@ -0,0 +1,53 @@ +# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# represents a header value in various forms +package PublicInbox::Hval; +use strict; +use warnings; +use fields qw(raw -as_utf8); +use Encode qw(find_encoding); +use CGI qw(escapeHTML); +use URI::Escape qw(uri_escape); + +my $enc_utf8 = find_encoding('utf8'); +my $enc_ascii = find_encoding('us-ascii'); +my $enc_mime = find_encoding('MIME-Header'); + +sub new { + my ($class, $raw) = @_; + my $self = fields::new($class); + + # we never care about leading/trailing whitespace + $raw =~ s/\A\s*//; + $raw =~ s/\s*\z//; + $self->{raw} = $raw; + $self; +} + +sub new_msgid { + my ($class, $raw) = @_; + $raw =~ s/\A<//; + $raw =~ s/>\z//; + $class->new($raw); +} + +sub new_oneline { + my ($class, $raw) = @_; + $raw = '' unless defined $raw; + $raw =~ tr/\t\n / /s; # squeeze spaces + $raw =~ tr/\r//d; # kill CR + $class->new($raw); +} + +sub as_utf8 { + my ($self) = @_; + $self->{-as_utf8} ||= $enc_utf8->encode($self->{raw}); +} + +sub ascii_html { $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF) } + +sub as_html { ascii_html($_[0]->as_utf8) } +sub as_href { ascii_html(uri_escape($_[0]->as_utf8)) } + +1; |