diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | lib/PublicInbox/MsgTime.pm | 41 | ||||
-rw-r--r-- | t/msgtime.t | 87 |
3 files changed, 112 insertions, 17 deletions
@@ -174,6 +174,7 @@ t/mid.t t/mime.t t/msg_iter.t t/msgmap.t +t/msgtime.t t/nntp.t t/nntpd.t t/over.t diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm index c67a41ff..f3ebb644 100644 --- a/lib/PublicInbox/MsgTime.pm +++ b/lib/PublicInbox/MsgTime.pm @@ -5,19 +5,31 @@ use strict; use warnings; use base qw(Exporter); our @EXPORT_OK = qw(msg_timestamp msg_datestamp); -use Date::Parse qw(str2time); -use Time::Zone qw(tz_offset); +use Date::Parse qw(str2time strptime); + +sub str2date_zone ($) { + my ($date) = @_; + + my $ts = str2time($date); + return undef unless(defined $ts); + + # off is the time zone offset in seconds from GMT + my ($ss,$mm,$hh,$day,$month,$year,$off) = strptime($date); + return undef unless(defined $off); + + # Compute the time zone from offset + my $sign = ($off < 0) ? '-' : '+'; + my $hour = abs(int($off / 3600)); + my $min = ($off / 60) % 60; + my $zone = sprintf('%s%02d%02d', $sign, $hour, $min); -sub zone_clamp ($) { - my ($zone) = @_; - $zone ||= '+0000'; # "-1200" is the furthest westermost zone offset, # but git fast-import is liberal so we use "-1400" if ($zone >= 1400 || $zone <= -1400) { warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; $zone = '+0000'; } - $zone; + [$ts, $zone]; } sub time_response ($) { @@ -28,37 +40,32 @@ sub time_response ($) { sub msg_received_at ($) { my ($hdr) = @_; # Email::MIME::Header my @recvd = $hdr->header_raw('Received'); - my ($ts, $zone); + my ($ts); foreach my $r (@recvd) { - $zone = undef; $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next; - $zone = $2; - $ts = eval { str2time($1) } and last; + $ts = eval { str2date_zone($1) } and return $ts; my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; } - defined $ts ? [ $ts, zone_clamp($zone) ] : undef; + undef; } sub msg_date_only ($) { my ($hdr) = @_; # Email::MIME::Header my @date = $hdr->header_raw('Date'); - my ($ts, $zone); + my ($ts); foreach my $d (@date) { - $zone = undef; # Y2K problems: 3-digit years $d =~ s!([A-Za-z]{3}) (\d{3}) (\d\d:\d\d:\d\d)! my $yyyy = $2 + 1900; "$1 $yyyy $3"!e; - $ts = eval { str2time($d) }; + $ts = eval { str2date_zone($d) } and return $ts; if ($@) { my $mid = $hdr->header_raw('Message-ID'); warn "bad Date: $d in $mid: $@\n"; - } elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) { - $zone = $1; } } - defined $ts ? [ $ts, zone_clamp($zone) ] : undef; + undef; } # Favors Received header for sorting globally diff --git a/t/msgtime.t b/t/msgtime.t new file mode 100644 index 00000000..c390670a --- /dev/null +++ b/t/msgtime.t @@ -0,0 +1,87 @@ +# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Test::More; +use PublicInbox::MIME; +use PublicInbox::MsgTime; + +sub datestamp ($) { + my ($date) = @_; + local $SIG{__WARN__} = sub {}; # Suppress warnings + my $mime = PublicInbox::MIME->create( + header => [ + From => 'a@example.com', + To => 'b@example.com', + 'Content-Type' => 'text/plain', + Subject => 'this is a subject', + 'Message-ID' => '<a@example.com>', + Date => $date, + 'Received' => '(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S932173AbXAVSQY (ORCPT <rfc822;w@1wt.eu>);\n\tMon, 22 Jan 2007 13:16:24 -0500', + ], + body => "hello world\n", + ); + my @ts = PublicInbox::MsgTime::msg_datestamp($mime->header_obj); + return \@ts; +} + +sub timestamp ($) { + my ($received) = @_; + local $SIG{__WARN__} = sub {}; # Suppress warnings + my $mime = PublicInbox::MIME->create( + header => [ + From => 'a@example.com', + To => 'b@example.com', + 'Content-Type' => 'text/plain', + Subject => 'this is a subject', + 'Message-ID' => '<a@example.com>', + Date => 'Fri, 02 Oct 1993 00:00:00 +0000', + 'Received' => '(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S932173AbXAVSQY (ORCPT <rfc822;w@1wt.eu>);\n\t' . $received, + ], + body => "hello world\n", + ); + my @ts = PublicInbox::MsgTime::msg_timestamp($mime->header_obj); + return \@ts; +} + +# Verify that the parser sucks up the timezone for dates +for (my $min = -1440; $min <= 1440; $min += 30) { + my $sign = ($min < 0) ? '-': '+'; + my $h = abs(int($min / 60)); + my $m = $min % 60; + + my $ts_expect = 749520000 - ($min * 60); + my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m); + if ($tz_expect >= 1400 || $tz_expect <= -1400) { + $tz_expect = '+0000'; + } + my $date = sprintf("Fri, 02 Oct 1993 00:00:00 %s%02d%02d", + $sign, $h, $m); + my $result = datestamp($date); + is_deeply($result, [ $ts_expect, $tz_expect ]); +} + +# Verify that the parser sucks up the timezone and for received timestamps +for (my $min = -1440; $min <= 1440; $min += 30) { + my $sign = ($min < 0) ? '-' : '+'; + my $h = abs(int($min / 60)); + my $m = $min %60; + + my $ts_expect = 1169471784 - ($min * 60); + my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m); + if ($tz_expect >= 1400 || $tz_expect <= -1400) { + $tz_expect = '+0000'; + } + my $received = sprintf('Mon, 22 Jan 2007 13:16:24 %s%02d%02d', + $sign, $h, $m); + is_deeply(timestamp($received), [ $ts_expect, $tz_expect ]); +} + +is_deeply(datestamp('Wed, 13 Dec 2006 10:26:38 +1'), [1166001998, '+0100']); +is_deeply(datestamp('Fri, 3 Feb 2006 18:11:22 -00'), [1138990282, '+0000']); +is_deeply(datestamp('Thursday, 20 Feb 2003 01:14:34 +000'), [1045703674, '+0000']); +is_deeply(datestamp('Fri, 28 Jun 2002 12:54:40 -700'), [1025294080, '-0700']); +is_deeply(datestamp('Sat, 12 Jan 2002 12:52:57 -200'), [1010847177, '-0200']); +is_deeply(datestamp('Mon, 05 Nov 2001 10:36:16 -800'), [1004985376, '-0800']); + +done_testing(); |