about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-12-01 22:04:25 +0000
committerEric Wong <e@80x24.org>2019-12-12 03:51:45 +0000
commit74fd81670fcc9eaec15a009995e52f9aeefa1494 (patch)
treeb26e473c8ec8b91181a6d8677183234edb1c2f72
parent56643bfddba2f7bca60955e5c48435fe58cc8e1c (diff)
downloadpublic-inbox-74fd81670fcc9eaec15a009995e52f9aeefa1494.tar.gz
Wacky dates show up in lore for valid messages.  Lets ignore
them and let future generations deal with Y10K and time-travel
problems.
-rw-r--r--lib/PublicInbox/MsgTime.pm6
-rw-r--r--t/msgtime.t14
2 files changed, 17 insertions, 3 deletions
diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm
index 479aaa4e..9f432644 100644
--- a/lib/PublicInbox/MsgTime.pm
+++ b/lib/PublicInbox/MsgTime.pm
@@ -38,7 +38,7 @@ sub str2date_zone ($) {
         if ($date =~ /(?:[A-Za-z]+,?\s+)? # day-of-week
                         ([0-9]+),?\s+  # dd
                         ([A-Za-z]+)\s+ # mon
-                        ([0-9]{2,})\s+ # YYYY or YY (or YYY :P)
+                        ([0-9]{2,4})\s+ # YYYY or YY (or YYY :P)
                         ([0-9]+)[:\.] # HH:
                                 ((?:[0-9]{2})|(?:\s?[0-9])) # MM
                                 (?:[:\.]((?:[0-9]{2})|(?:\s?[0-9])))? # :SS
@@ -67,6 +67,10 @@ sub str2date_zone ($) {
 
                 $ts = timegm($ss // 0, $mm, $hh, $dd, $mon, $yyyy);
 
+                # 4-digit dates in non-spam from 1900s and 1910s exist in
+                # lore archives
+                return if $ts < 0;
+
                 # Compute the time offset from [+-]HHMM
                 $tz //= 0;
                 my ($tz_hh, $tz_mm);
diff --git a/t/msgtime.t b/t/msgtime.t
index 1452dc97..98cf66e6 100644
--- a/t/msgtime.t
+++ b/t/msgtime.t
@@ -5,7 +5,7 @@ use warnings;
 use Test::More;
 use PublicInbox::MIME;
 use PublicInbox::MsgTime;
-
+our $received_date = 'Mon, 22 Jan 2007 13:16:24 -0500';
 sub datestamp ($) {
         my ($date) = @_;
         local $SIG{__WARN__} = sub {};  # Suppress warnings
@@ -17,7 +17,11 @@ sub datestamp ($) {
                         Subject => 'this is a subject',
                         'Message-ID' => '<a@example.com>',
                         Date => $date,
-                        'Received' => '(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S932173AbXAVSQY (ORCPT <rfc822;w@1wt.eu>);\n\tMon, 22 Jan 2007 13:16:24 -0500',
+                        'Received' => <<EOF,
+(majordomo\@vger.kernel.org) by vger.kernel.org via listexpand
+\tid S932173AbXAVSQY (ORCPT <rfc822;w\@1wt.eu>);
+\t$received_date
+EOF
                 ],
                 body => "hello world\n",
             );
@@ -104,4 +108,10 @@ for (qw(UT GMT Z)) {
 }
 is_datestamp('Fri, 02 Oct 1993 00:00:00 EDT', [ 749534400, '-0400']);
 
+# fallback to Received: header if Date: is out-of-range:
+is_datestamp('Fri, 1 Jan 1904 10:12:31 +0100',
+        PublicInbox::MsgTime::str2date_zone($received_date));
+is_datestamp('Fri, 9 Mar 71685 18:45:56 +0000', # Y10K is not my problem :P
+        PublicInbox::MsgTime::str2date_zone($received_date));
+
 done_testing();