From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7A92A1F463; Sun, 1 Dec 2019 22:04:25 +0000 (UTC) Date: Sun, 1 Dec 2019 22:04:25 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/4] msgtime: avoid obviously out-of-range dates (for now) Message-ID: <20191201220425.GA30161@dcvr> References: <20191129122508.7708-1-e@80x24.org> <20191129122508.7708-5-e@80x24.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20191129122508.7708-5-e@80x24.org> List-Id: Wacky dates show up in lore for valid messages. Lets ignore them and let future generations deal with Y10K and time-travel problems. --- lib/PublicInbox/MsgTime.pm | 6 +++++- t/msgtime.t | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm index 479aaa4ecf132..9f4326442dd11 100644 --- a/lib/PublicInbox/MsgTime.pm +++ b/lib/PublicInbox/MsgTime.pm @@ -38,7 +38,7 @@ sub str2date_zone ($) { if ($date =~ /(?:[A-Za-z]+,?\s+)? # day-of-week ([0-9]+),?\s+ # dd ([A-Za-z]+)\s+ # mon - ([0-9]{2,})\s+ # YYYY or YY (or YYY :P) + ([0-9]{2,4})\s+ # YYYY or YY (or YYY :P) ([0-9]+)[:\.] # HH: ((?:[0-9]{2})|(?:\s?[0-9])) # MM (?:[:\.]((?:[0-9]{2})|(?:\s?[0-9])))? # :SS @@ -67,6 +67,10 @@ sub str2date_zone ($) { $ts = timegm($ss // 0, $mm, $hh, $dd, $mon, $yyyy); + # 4-digit dates in non-spam from 1900s and 1910s exist in + # lore archives + return if $ts < 0; + # Compute the time offset from [+-]HHMM $tz //= 0; my ($tz_hh, $tz_mm); diff --git a/t/msgtime.t b/t/msgtime.t index 1452dc97d5b0b..cecad775769e1 100644 --- a/t/msgtime.t +++ b/t/msgtime.t @@ -5,7 +5,7 @@ use warnings; use Test::More; use PublicInbox::MIME; use PublicInbox::MsgTime; - +our $received_date = 'Mon, 22 Jan 2007 13:16:24 -0500'; sub datestamp ($) { my ($date) = @_; local $SIG{__WARN__} = sub {}; # Suppress warnings @@ -17,7 +17,11 @@ sub datestamp ($) { Subject => 'this is a subject', 'Message-ID' => '', Date => $date, - 'Received' => '(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S932173AbXAVSQY (ORCPT );\n\tMon, 22 Jan 2007 13:16:24 -0500', + 'Received' => <); +\t$received_date +EOF ], body => "hello world\n", ); @@ -104,4 +108,10 @@ for (qw(UT GMT Z)) { } is_datestamp('Fri, 02 Oct 1993 00:00:00 EDT', [ 749534400, '-0400']); +# fallback to Received: header if Date: is out-of-range: +is_datestamp('Fri, 1 Jan 1904 10:12:31 +0100', + PublicInbox::MsgTime::str2date_zone($received_date)); +is_datestamp('Fri, 9 Mar 71685 18:45:56 +0000', # Y10K is not my problem :P + PublicInbox::MsgTime::str2date_zone($received_date)); + done_testing();