From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 50B151FAD3 for ; Tue, 4 Jun 2019 11:27:51 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 16/24] msgtime: require ASCII digits for parsing dates Date: Tue, 4 Jun 2019 11:27:40 +0000 Message-Id: <20190604112748.23598-17-e@80x24.org> In-Reply-To: <20190604112748.23598-1-e@80x24.org> References: <20190604112748.23598-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: User input contains the darndest things. Don't waste more time than necessary trying to parse dates out of non-ASCII digits. --- lib/PublicInbox/MsgTime.pm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm index 6216023..1241282 100644 --- a/lib/PublicInbox/MsgTime.pm +++ b/lib/PublicInbox/MsgTime.pm @@ -44,8 +44,9 @@ sub msg_received_at ($) { my @recvd = $hdr->header_raw('Received'); my ($ts); foreach my $r (@recvd) { - $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ - \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next; + $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+ + [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+) + \s+([\+\-][0-9]+))/sx or next; $ts = eval { str2date_zone($1) } and return $ts; my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; @@ -59,7 +60,7 @@ sub msg_date_only ($) { my ($ts); foreach my $d (@date) { # Y2K problems: 3-digit years - $d =~ s!([A-Za-z]{3}) (\d{3}) (\d\d:\d\d:\d\d)! + $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})! my $yyyy = $2 + 1900; "$1 $yyyy $3"!e; $ts = eval { str2date_zone($d) } and return $ts; if ($@) { -- EW