diff options
-rw-r--r-- | lib/PublicInbox/Config.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Feed.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Filter/RubyLang.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/GitHTTPBackend.pm | 8 | ||||
-rw-r--r-- | lib/PublicInbox/HTTP.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Hval.pm | 3 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/Linkify.pm | 5 | ||||
-rw-r--r-- | lib/PublicInbox/MID.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/MsgTime.pm | 7 | ||||
-rw-r--r-- | lib/PublicInbox/NNTP.pm | 16 | ||||
-rw-r--r-- | lib/PublicInbox/NewsWWW.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/SolverGit.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/ViewDiff.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/WWW.pm | 20 | ||||
-rw-r--r-- | lib/PublicInbox/WwwAttach.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/WwwListing.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/Xapcmd.pm | 6 | ||||
-rwxr-xr-x | script/public-inbox-purge | 2 | ||||
-rw-r--r-- | t/linkify.t | 12 |
24 files changed, 75 insertions, 52 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 09f9179b..6e85750a 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -307,7 +307,7 @@ sub parse_cgitrc { } } elsif (m!\Ainclude=(.+)\z!) { parse_cgitrc($self, $1, $nesting + 1); - } elsif (m!\A(scan-hidden-path|remove-suffix)=(\d+)\z!) { + } elsif (m!\A(scan-hidden-path|remove-suffix)=([0-9]+)\z!) { my ($k, $v) = ($1, $2); $k =~ tr/-/_/; $self->{"-cgit_$k"} = $v; diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index a04838a1..ae071895 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -102,7 +102,7 @@ sub recent_msgs { my $hex = '[a-f0-9]'; my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!; my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!; - my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~\d+)?/; + my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~[0-9]+)?/; # revision ranges may be specified my $range = 'HEAD'; diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm index a43d67a9..d40705b7 100644 --- a/lib/PublicInbox/Filter/RubyLang.pm +++ b/lib/PublicInbox/Filter/RubyLang.pm @@ -50,7 +50,7 @@ sub scrub { my @v = $hdr->header_raw('X-Mail-Count'); my $n; foreach (@v) { - /\A\s*(\d+)\s*\z/ or next; + /\A\s*([0-9]+)\s*\z/ or next; $n = $1; last; } diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 09411048..a2a81f8e 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -51,8 +51,8 @@ sub serve { # Documentation/technical/http-protocol.txt in git.git # requires one and exactly one query parameter: - if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ || - $path =~ /\Agit-\w+-pack\z/) { + if ($env->{QUERY_STRING} =~ /\Aservice=git-[A-Za-z0-9_]+-pack\z/ || + $path =~ /\Agit-[A-Za-z0-9_]+-pack\z/) { my $ok = serve_smart($env, $git, $path); return $ok if $ok; } @@ -90,7 +90,7 @@ sub static_result ($$$$) { my $len = $size; my $code = 200; push @$h, 'Content-Type', $type; - if (($env->{HTTP_RANGE} || '') =~ /\bbytes=(\d*)-(\d*)\z/) { + if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) { ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size); if ($code == 416) { push @$h, 'Content-Range', "bytes */$size"; @@ -260,7 +260,7 @@ sub parse_cgi_headers { foreach my $l (split(/\r?\n/, $h)) { my ($k, $v) = split(/:\s*/, $l, 2); if ($k =~ /\AStatus\z/i) { - ($code) = ($v =~ /\b(\d+)\b/); + ($code) = ($v =~ /\b([0-9]+)\b/); } else { push @h, $k, $v; } diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 10e6d6a4..977614b4 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -142,7 +142,7 @@ sub app_dispatch { $env->{REMOTE_ADDR} = $self->{remote_addr}; $env->{REMOTE_PORT} = $self->{remote_port}; if (my $host = $env->{HTTP_HOST}) { - $host =~ s/:(\d+)\z// and $env->{SERVER_PORT} = $1; + $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1; $env->{SERVER_NAME} = $host; } if (defined $input) { diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 95a0f709..2b443970 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -13,6 +13,9 @@ our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape to_attr from_attr/; my $enc_ascii = find_encoding('us-ascii'); +# safe-ish acceptable filename pattern for portability +our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor + sub new { my ($class, $raw, $href) = @_; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index b3178b98..c9330332 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -74,7 +74,7 @@ sub _set_uint ($$$) { my $val = $opts->{$field}; if (defined $val) { $val = $val->[-1] if ref($val) eq 'ARRAY'; - $val = undef if $val !~ /\A\d+\z/; + $val = undef if $val !~ /\A[0-9]+\z/; } $opts->{$field} = $val || $default; } @@ -87,7 +87,7 @@ sub _set_limiter ($$$) { my $mkey = $pfx.'max'; my $val = $self->{$mkey} or return; my $lim; - if ($val =~ /\A\d+\z/) { + if ($val =~ /\A[0-9]+\z/) { require PublicInbox::Qspawn; $lim = PublicInbox::Qspawn::Limiter->new($val); } elsif ($val =~ /\A[a-z][a-z0-9]*\z/) { @@ -161,7 +161,7 @@ sub max_git_part { if (opendir my $dh, $gits) { my $max = -1; while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A(\d+)\.git\z! or next; + $git_dir =~ m!\A([0-9]+)\.git\z! or next; $max = $1 if $1 > $max; } $part = $self->{-max_git_part} = $max if $max >= 0; diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index d4778e7d..84960a98 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -13,6 +13,7 @@ package PublicInbox::Linkify; use strict; use warnings; use Digest::SHA qw/sha1_hex/; +use PublicInbox::Hval qw(ascii_html); my $SALT = rand; my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// @@ -61,12 +62,12 @@ sub linkify_1 { $end = ')'; } + $url = ascii_html($url); # for IDN + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - # only escape ampersands, others do not match LINK_RE - $url =~ s/&/&/g; $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; ^ge; diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 7f1ab15e..6904d61a 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -26,11 +26,11 @@ sub mid_clean { $mid; } -# this is idempotent +# this is idempotent, used for HTML anchor/ids and such sub id_compress { my ($id, $force) = @_; - if ($force || $id =~ /[^\w\-]/ || length($id) > MID_MAX) { + if ($force || $id =~ /[^a-zA-Z0-9_\-]/ || length($id) > MID_MAX) { utf8::encode($id); return sha1_hex($id); } diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm index 62160233..12412825 100644 --- a/lib/PublicInbox/MsgTime.pm +++ b/lib/PublicInbox/MsgTime.pm @@ -44,8 +44,9 @@ sub msg_received_at ($) { my @recvd = $hdr->header_raw('Received'); my ($ts); foreach my $r (@recvd) { - $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ - \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next; + $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+ + [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+) + \s+([\+\-][0-9]+))/sx or next; $ts = eval { str2date_zone($1) } and return $ts; my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; @@ -59,7 +60,7 @@ sub msg_date_only ($) { my ($ts); foreach my $d (@date) { # Y2K problems: 3-digit years - $d =~ s!([A-Za-z]{3}) (\d{3}) (\d\d:\d\d:\d\d)! + $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})! my $yyyy = $2 + 1900; "$1 $yyyy $3"!e; $ts = eval { str2date_zone($d) } and return $ts; if ($@) { diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 8cb6c56d..be80560f 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -121,7 +121,7 @@ sub args_ok ($$) { # returns 1 if we can continue, 0 if not due to buffered writes or disconnect sub process_line ($$) { my ($self, $l) = @_; - my ($req, @args) = split(/\s+/, $l); + my ($req, @args) = split(/[ \t]/, $l); return 1 unless defined($req); # skip blank line $req = lc($req); $req = eval { @@ -437,7 +437,7 @@ sub set_nntp_headers ($$$$$) { # clobber some my $xref = xref($self, $ng, $n, $mid); $hdr->header_set('Xref', $xref); - $xref =~ s/:\d+//g; + $xref =~ s/:[0-9]+//g; $hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]); header_append($hdr, 'List-Post', "<mailto:$ng->{-primary_address}>"); if (my $url = $ng->base_url) { @@ -453,7 +453,7 @@ sub art_lookup ($$$) { my ($n, $mid); my $err; if (defined $art) { - if ($art =~ /\A\d+\z/o) { + if ($art =~ /\A[0-9]+\z/) { $err = '423 no such article number in this group'; $n = int($art); goto find_mid; @@ -508,7 +508,7 @@ sub simple_body_write ($$) { sub set_art { my ($self, $art) = @_; - $self->{article} = $art if defined $art && $art =~ /\A\d+\z/; + $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/; } sub _header ($) { @@ -576,11 +576,11 @@ sub get_range ($$) { defined $range or return '420 No article(s) selected'; my ($beg, $end); my ($min, $max) = $ng->mm->minmax; - if ($range =~ /\A(\d+)\z/) { + if ($range =~ /\A([0-9]+)\z/) { $beg = $end = $1; - } elsif ($range =~ /\A(\d+)-\z/) { + } elsif ($range =~ /\A([0-9]+)-\z/) { ($beg, $end) = ($1, $max); - } elsif ($range =~ /\A(\d+)-(\d+)\z/) { + } elsif ($range =~ /\A([0-9]+)-([0-9]+)\z/) { ($beg, $end) = ($1, $2); } else { return r501; @@ -959,7 +959,7 @@ sub event_read { $self->{rbuf} .= $$buf; } my $r = 1; - while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]*)\r?\n//) { + while ($r > 0 && $self->{rbuf} =~ s/\A[ \t\r\n]*([^\r\n]*)\r?\n//) { my $line = $1; return $self->close if $line =~ /[[:cntrl:]]/s; my $t0 = now(); diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm index 8626cf96..80bb4886 100644 --- a/lib/PublicInbox/NewsWWW.pm +++ b/lib/PublicInbox/NewsWWW.pm @@ -47,7 +47,7 @@ sub call { if (my $ibx = $pi_config->lookup_newsgroup($ng)) { my $url = PublicInbox::Hval::prurl($env, $ibx->{url}); my $code = 301; - if (defined $article && $article =~ /\A\d+\z/) { + if (defined $article && $article =~ /\A[0-9]+\z/) { my $mid = eval { $ibx->mm->mid_for($article) }; if (defined $mid) { # article IDs are not stable across clones, diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index c054a874..9903f427 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -144,7 +144,7 @@ sub _xdb ($) { my $qpf = \($self->{qp_flags} ||= $QP_FLAGS); if ($self->{version} >= 2) { foreach my $part (<$dir/*>) { - -d $part && $part =~ m!/\d+\z! or next; + -d $part && $part =~ m!/[0-9]+\z! or next; my $sub = Search::Xapian::Database->new($part); if ($xdb) { $xdb->add_database($sub); diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 6592b3b2..b089de9c 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -308,12 +308,12 @@ sub new { my ($class, $qp) = @_; my $r = $qp->{r}; - my ($l) = (($qp->{l} || '') =~ /(\d+)/); + my ($l) = (($qp->{l} || '') =~ /([0-9]+)/); $l = $LIM if !$l || $l > $LIM; bless { q => $qp->{'q'}, x => $qp->{x} || '', - o => (($qp->{o} || '0') =~ /(\d+)/), + o => (($qp->{o} || '0') =~ /([0-9]+)/), l => $l, r => (defined $r && $r ne '0'), }, $class; diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index 3841c567..81f99025 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -206,7 +206,7 @@ sub find_extract_diff ($$$) { } my $msgs = $srch->query($q, { relevance => 1 }); - my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/; + my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: ([0-9]+))?/; my $di; foreach my $smsg (@$msgs) { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 76844cd4..a8c33ef4 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -48,7 +48,7 @@ sub count_partitions ($) { # due to -compact if (-d $xpfx) { foreach my $part (<$xpfx/*>) { - -d $part && $part =~ m!/\d+\z! or next; + -d $part && $part =~ m!/[0-9]+\z! or next; eval { Search::Xapian::Database->new($part)->close; $nparts++; @@ -574,7 +574,7 @@ sub git_dir_latest { my $latest; opendir my $dh, $pfx or die "opendir $pfx: $!\n"; while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A(\d+)\.git\z! or next; + $git_dir =~ m!\A([0-9]+)\.git\z! or next; if ($1 > $$max) { $$max = $1; $latest = "$pfx/$git_dir"; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 09afdaf1..1b52bf86 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -528,7 +528,7 @@ sub attach_link ($$$$;$) { $desc = $fn unless defined $desc; $desc = '' unless defined $desc; my $sfn; - if (defined $fn && $fn =~ /\A[[:alnum:]][\w\.-]+[[:alnum:]]\z/) { + if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) { $sfn = $fn; } elsif ($ct eq 'text/plain') { $sfn = 'a.txt'; @@ -1160,8 +1160,8 @@ sub paginate_recent ($$) { # Xapian uses '..' but '-' is perhaps friendier to URL linkifiers # if only $after exists "YYYYMMDD.." because "." could be skipped # if interpreted as an end-of-sentence - $t =~ s/\A(\d{8,14})-// and $after = str2ts($1); - $t =~ /\A(\d{8,14})\z/ and $before = str2ts($1); + $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1); + $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1); my $ibx = $ctx->{-inbox}; my $msgs = $ibx->recent($opts, $after, $before); diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 411ed2bb..b7dab819 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -55,12 +55,12 @@ sub diff_hunk ($$$$) { (defined($spfx) && defined($oid_a) && defined($oid_b)) or return "@@ $ca $cb @@"; - my ($n) = ($ca =~ /^-(\d+)/); + my ($n) = ($ca =~ /^-([0-9]+)/); $n = defined($n) ? do { ++$n; "#n$n" } : ''; my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>); - ($n) = ($cb =~ /^\+(\d+)/); + ($n) = ($cb =~ /^\+([0-9]+)/); $n = defined($n) ? do { ++$n; "#n$n" } : ''; $rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@); diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index b6f18f8d..7ea98204 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -28,7 +28,7 @@ use PublicInbox::UserContent; our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; -our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; +our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!; our $OID_RE = qr![a-f0-9]{7,40}!; sub new { @@ -74,7 +74,8 @@ sub call { my $method = $env->{REQUEST_METHOD}; if ($method eq 'POST') { - if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) { + if ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)? + (git-upload-pack)\z!x) { my ($part, $path) = ($2, $3); return invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path); @@ -97,11 +98,11 @@ sub call { invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { invalid_inbox($ctx, $1) || get_new($ctx); - } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)? + } elsif ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)? ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my ($part, $path) = ($2, $3); invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path); - } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { + } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-]+).mbox\.gz\z!o) { serve_mbox_range($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); @@ -123,11 +124,12 @@ sub call { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { get_text($ctx, $1, $2); - } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) { + } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) { get_css($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) { get_vcs_object($ctx, $1, $2); - } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) { + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/ + ($PublicInbox::Hval::FN)\z!ox) { get_vcs_object($ctx, $1, $2, $3); } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) { r301($ctx, $1, $2, 's/'); @@ -534,11 +536,15 @@ sub stylesheets_prepare ($$) { $inline_ok = 0; } else { my $fn = $_; + my ($key) = (m!([^/]+?)(?:\.css)?\z!i); + if ($key !~ /\A[a-zA-Z0-9_\-\.]+\z/) { + warn "ignoring $fn, non-ASCII word character\n"; + next; + } open(my $fh, '<', $fn) or do { warn "failed to open $fn: $!\n"; next; }; - my ($key) = (m!([^/]+?)(?:\.css)?\z!i); my $ctime = 0; my $local = do { local $/; <$fh> }; if ($local =~ /\S/) { diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index d690ce41..96103cb0 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -27,7 +27,7 @@ sub get_attach ($$$) { if ($ct && (($ct->{discrete} || '') eq 'text')) { # display all text as text/plain: my $cset = $ct->{attributes}->{charset}; - if ($cset && ($cset =~ /\A[\w-]+\z/)) { + if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) { $res->[1]->[1] .= qq(; charset=$cset); } } else { # TODO: allow user to configure safe types diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index e8dad4b8..e1473b3d 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -24,8 +24,8 @@ sub list_match_domain ($$) { my ($self, $env) = @_; my @list; my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME}; - $host =~ s/:\d+\z//; - my $re = qr!\A(?:https?:)?//\Q$host\E(?::\d+)?/!i; + $host =~ s/:[0-9]+\z//; + my $re = qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i; $self->{pi_config}->each_inbox(sub { my ($ibx) = @_; push @list, $ibx if !$ibx->{-hide}->{www} && $ibx->{url} =~ $re; diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 90672310..dad080c8 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -150,7 +150,7 @@ sub run { } else { opendir my $dh, $old or die "Failed to opendir $old: $!\n"; while (defined(my $dn = readdir($dh))) { - if ($dn =~ /\A\d+\z/) { + if ($dn =~ /\A[0-9]+\z/) { my $tmpl = "$dn-XXXXXXXX"; my $dst = tempdir($tmpl, DIR => $old); same_fs_or_die($old, $dst); @@ -200,7 +200,7 @@ sub progress_pfx ($) { my @p = split('/', $_[0]); # return "xap15/0" for v2, or "xapian15" for v1: - ($p[-1] =~ /\A\d+\z/) ? "$p[-2]/$p[-1]" : $p[-1]; + ($p[-1] =~ /\A[0-9]+\z/) ? "$p[-2]/$p[-1]" : $p[-1]; } # xapian-compact wrapper @@ -276,7 +276,7 @@ sub cpdb ($$) { $dst->set_metadata('last_commit', $lc) if $lc; # only the first xapian partition (0) gets 'indexlevel' - if ($old =~ m!(?:xapian\d+|xap\d+/0)\z!) { + if ($old =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\z!) { my $l = $src->get_metadata('indexlevel'); if ($l eq 'medium') { $dst->set_metadata('indexlevel', $l); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 381826dc..25e6cc9b 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -91,7 +91,7 @@ foreach my $ibx (@inboxes) { my $xdir_ro = $ibx->{search}->xdir(1); my $npart = 0; foreach my $part (<$xdir_ro/*>) { - if (-d $part && $part =~ m!/\d+\z!) { + if (-d $part && $part =~ m!/[0-9]+\z!) { my $bytes = 0; $bytes += -s $_ foreach glob("$part/*"); $npart++ if $bytes; diff --git a/t/linkify.t b/t/linkify.t index fe218b91..c4923582 100644 --- a/t/linkify.t +++ b/t/linkify.t @@ -132,4 +132,16 @@ use PublicInbox::Linkify; 'punctuation with unpaired ) OK') } +if ('IDN example: <ACDB98F4-178C-43C3-99C4-A1D03DD6A8F5@sb.org>') { + my $hc = '月'; + my $u = "http://www.\x{6708}.example.com/"; + my $s = $u; + my $l = PublicInbox::Linkify->new; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + my $expect = qq{<a +href="http://www.$hc.example.com/">http://www.$hc.example.com/</a>}; + is($s, $expect, 'IDN message escaped properly'); +} + done_testing(); |