about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/MsgTime.pm41
-rw-r--r--t/msgtime.t87
3 files changed, 112 insertions, 17 deletions
diff --git a/MANIFEST b/MANIFEST
index 68c79c95..6d2aecee 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -174,6 +174,7 @@ t/mid.t
 t/mime.t
 t/msg_iter.t
 t/msgmap.t
+t/msgtime.t
 t/nntp.t
 t/nntpd.t
 t/over.t
diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm
index c67a41ff..f3ebb644 100644
--- a/lib/PublicInbox/MsgTime.pm
+++ b/lib/PublicInbox/MsgTime.pm
@@ -5,19 +5,31 @@ use strict;
 use warnings;
 use base qw(Exporter);
 our @EXPORT_OK = qw(msg_timestamp msg_datestamp);
-use Date::Parse qw(str2time);
-use Time::Zone qw(tz_offset);
+use Date::Parse qw(str2time strptime);
+
+sub str2date_zone ($) {
+        my ($date) = @_;
+
+        my $ts = str2time($date);
+        return undef unless(defined $ts);
+
+        # off is the time zone offset in seconds from GMT
+        my ($ss,$mm,$hh,$day,$month,$year,$off) = strptime($date);
+        return undef unless(defined $off);
+
+        # Compute the time zone from offset
+        my $sign = ($off < 0) ? '-' : '+';
+        my $hour = abs(int($off / 3600));
+        my $min  = ($off / 60) % 60;
+        my $zone = sprintf('%s%02d%02d', $sign, $hour, $min);
 
-sub zone_clamp ($) {
-        my ($zone) = @_;
-        $zone ||= '+0000';
         # "-1200" is the furthest westermost zone offset,
         # but git fast-import is liberal so we use "-1400"
         if ($zone >= 1400 || $zone <= -1400) {
                 warn "bogus TZ offset: $zone, ignoring and assuming +0000\n";
                 $zone = '+0000';
         }
-        $zone;
+        [$ts, $zone];
 }
 
 sub time_response ($) {
@@ -28,37 +40,32 @@ sub time_response ($) {
 sub msg_received_at ($) {
         my ($hdr) = @_; # Email::MIME::Header
         my @recvd = $hdr->header_raw('Received');
-        my ($ts, $zone);
+        my ($ts);
         foreach my $r (@recvd) {
-                $zone = undef;
                 $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+
                         \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next;
-                $zone = $2;
-                $ts = eval { str2time($1) } and last;
+                $ts = eval { str2date_zone($1) } and return $ts;
                 my $mid = $hdr->header_raw('Message-ID');
                 warn "no date in $mid Received: $r\n";
         }
-        defined $ts ? [ $ts, zone_clamp($zone) ] : undef;
+        undef;
 }
 
 sub msg_date_only ($) {
         my ($hdr) = @_; # Email::MIME::Header
         my @date = $hdr->header_raw('Date');
-        my ($ts, $zone);
+        my ($ts);
         foreach my $d (@date) {
-                $zone = undef;
                 # Y2K problems: 3-digit years
                 $d =~ s!([A-Za-z]{3}) (\d{3}) (\d\d:\d\d:\d\d)!
                         my $yyyy = $2 + 1900; "$1 $yyyy $3"!e;
-                $ts = eval { str2time($d) };
+                $ts = eval { str2date_zone($d) } and return $ts;
                 if ($@) {
                         my $mid = $hdr->header_raw('Message-ID');
                         warn "bad Date: $d in $mid: $@\n";
-                } elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) {
-                        $zone = $1;
                 }
         }
-        defined $ts ? [ $ts, zone_clamp($zone) ] : undef;
+        undef;
 }
 
 # Favors Received header for sorting globally
diff --git a/t/msgtime.t b/t/msgtime.t
new file mode 100644
index 00000000..c390670a
--- /dev/null
+++ b/t/msgtime.t
@@ -0,0 +1,87 @@
+# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::MIME;
+use PublicInbox::MsgTime;
+
+sub datestamp ($) {
+        my ($date) = @_;
+        local $SIG{__WARN__} = sub {};  # Suppress warnings
+        my $mime = PublicInbox::MIME->create(
+                header => [
+                        From => 'a@example.com',
+                        To => 'b@example.com',
+                        'Content-Type' => 'text/plain',
+                        Subject => 'this is a subject',
+                        'Message-ID' => '<a@example.com>',
+                        Date => $date,
+                        'Received' => '(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S932173AbXAVSQY (ORCPT <rfc822;w@1wt.eu>);\n\tMon, 22 Jan 2007 13:16:24 -0500',
+                ],
+                body => "hello world\n",
+            );
+        my @ts = PublicInbox::MsgTime::msg_datestamp($mime->header_obj);
+        return \@ts;
+}
+
+sub timestamp ($) {
+        my ($received) = @_;
+        local $SIG{__WARN__} = sub {};  # Suppress warnings
+        my $mime = PublicInbox::MIME->create(
+                header => [
+                        From => 'a@example.com',
+                        To => 'b@example.com',
+                        'Content-Type' => 'text/plain',
+                        Subject => 'this is a subject',
+                        'Message-ID' => '<a@example.com>',
+                        Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
+                        'Received' => '(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S932173AbXAVSQY (ORCPT <rfc822;w@1wt.eu>);\n\t' . $received,
+                ],
+                body => "hello world\n",
+            );
+        my @ts = PublicInbox::MsgTime::msg_timestamp($mime->header_obj);
+        return \@ts;
+}
+
+# Verify that the parser sucks up the timezone for dates
+for (my $min = -1440; $min <= 1440; $min += 30) {
+        my $sign = ($min < 0) ? '-': '+';
+        my $h = abs(int($min / 60));
+        my $m = $min % 60;
+
+        my $ts_expect = 749520000 - ($min * 60);
+        my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m);
+        if ($tz_expect >= 1400 || $tz_expect <= -1400) {
+                $tz_expect = '+0000';
+        }
+        my $date = sprintf("Fri, 02 Oct 1993 00:00:00 %s%02d%02d",
+                           $sign, $h, $m);
+        my $result = datestamp($date);
+        is_deeply($result, [ $ts_expect, $tz_expect ]);
+}
+
+# Verify that the parser sucks up the timezone and for received timestamps
+for (my $min = -1440; $min <= 1440; $min += 30) {
+        my $sign = ($min < 0) ? '-' : '+';
+        my $h = abs(int($min / 60));
+        my $m = $min %60;
+
+        my $ts_expect = 1169471784 - ($min * 60);
+        my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m);
+        if ($tz_expect >= 1400 || $tz_expect <= -1400) {
+                $tz_expect = '+0000';
+        }
+        my $received = sprintf('Mon, 22 Jan 2007 13:16:24 %s%02d%02d',
+                               $sign, $h, $m);
+        is_deeply(timestamp($received), [ $ts_expect, $tz_expect ]);
+}
+
+is_deeply(datestamp('Wed, 13 Dec 2006 10:26:38 +1'), [1166001998, '+0100']);
+is_deeply(datestamp('Fri, 3 Feb 2006 18:11:22 -00'), [1138990282, '+0000']);
+is_deeply(datestamp('Thursday, 20 Feb 2003 01:14:34 +000'), [1045703674, '+0000']);
+is_deeply(datestamp('Fri, 28 Jun 2002 12:54:40 -700'), [1025294080, '-0700']);
+is_deeply(datestamp('Sat, 12 Jan 2002 12:52:57 -200'), [1010847177, '-0200']);
+is_deeply(datestamp('Mon, 05 Nov 2001 10:36:16 -800'), [1004985376, '-0800']);
+
+done_testing();