diff options
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Import.pm | 17 | ||||
-rw-r--r-- | lib/PublicInbox/MsgTime.pm | 80 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 5 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdxSkeleton.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/SearchMsg.pm | 26 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 30 |
8 files changed, 103 insertions, 71 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index e50f1156..d69934b0 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -11,7 +11,7 @@ use base qw(PublicInbox::Lock); use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mids mid_mime mid2path); use PublicInbox::Address; -use PublicInbox::MsgTime qw(msg_timestamp); +use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); use PublicInbox::ContentId qw(content_digest); use PublicInbox::MDA; @@ -244,9 +244,8 @@ sub remove { (($self->{tip} = ":$commit"), $cur); } -sub parse_date ($) { - my ($mime) = @_; - my ($ts, $zone) = msg_timestamp($mime->header_obj); +sub git_timestamp { + my ($ts, $zone) = @_; $ts = 0 if $ts < 0; # git uses unsigned times "$ts $zone"; } @@ -295,7 +294,11 @@ sub add { my ($self, $mime, $check_cb) = @_; # mime = Email::MIME my ($name, $email) = extract_author_info($mime); - my $date_raw = parse_date($mime); + my $hdr = $mime->header_obj; + my @at = msg_datestamp($hdr); + my @ct = msg_timestamp($hdr); + my $author_time_raw = git_timestamp(@at); + my $commit_time_raw = git_timestamp(@ct); my $subject = $mime->header('Subject'); $subject = '(no subject)' unless defined $subject; my $path_type = $self->{path_type}; @@ -349,8 +352,8 @@ sub add { utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", - "author $name <$email> $date_raw\n", - "committer $self->{ident} ", now_raw(), "\n" or wfail; + "author $name <$email> $author_time_raw\n", + "committer $self->{ident} $commit_time_raw\n" or wfail; print $w "data ", (length($subject) + 1), "\n", $subject, "\n\n" or wfail; if ($tip ne '') { diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm index 87664f4b..4295e873 100644 --- a/lib/PublicInbox/MsgTime.pm +++ b/lib/PublicInbox/MsgTime.pm @@ -4,48 +4,76 @@ package PublicInbox::MsgTime; use strict; use warnings; use base qw(Exporter); -our @EXPORT_OK = qw(msg_timestamp); +our @EXPORT_OK = qw(msg_timestamp msg_datestamp); use Date::Parse qw(str2time); use Time::Zone qw(tz_offset); -sub msg_timestamp ($) { +sub zone_clamp ($) { + my ($zone) = @_; + $zone ||= '+0000'; + # "-1200" is the furthest westermost zone offset, + # but git fast-import is liberal so we use "-1400" + if ($zone >= 1400 || $zone <= -1400) { + warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; + $zone = '+0000'; + } + $zone; +} + +sub time_response ($) { + my ($ret) = @_; + wantarray ? @$ret : $ret->[0]; +} + +sub msg_received_at ($) { my ($hdr) = @_; # Email::MIME::Header - my ($ts, $zone); - my $mid; my @recvd = $hdr->header_raw('Received'); + my ($ts, $zone); foreach my $r (@recvd) { $zone = undef; $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next; $zone = $2; $ts = eval { str2time($1) } and last; - $mid ||= $hdr->header_raw('Message-ID'); + my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; } - unless (defined $ts) { - my @date = $hdr->header_raw('Date'); - foreach my $d (@date) { - $zone = undef; - $ts = eval { str2time($d) }; - if ($@) { - $mid ||= $hdr->header_raw('Message-ID'); - warn "bad Date: $d in $mid: $@\n"; - } elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) { - $zone = $1; - } + defined $ts ? [ $ts, zone_clamp($zone) ] : undef; +} + +sub msg_date_only ($) { + my ($hdr) = @_; # Email::MIME::Header + my @date = $hdr->header_raw('Date'); + my ($ts, $zone); + foreach my $d (@date) { + $zone = undef; + $ts = eval { str2time($d) }; + if ($@) { + my $mid = $hdr->header_raw('Message-ID'); + warn "bad Date: $d in $mid: $@\n"; + } elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) { + $zone = $1; } } - $ts = time unless defined $ts; - return $ts unless wantarray; + defined $ts ? [ $ts, zone_clamp($zone) ] : undef; +} - $zone ||= '+0000'; - # "-1200" is the furthest westermost zone offset, - # but git fast-import is liberal so we use "-1400" - if ($zone >= 1400 || $zone <= -1400) { - warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; - $zone = '+0000'; - } - ($ts, $zone); +# Favors Received header for sorting globally +sub msg_timestamp ($) { + my ($hdr) = @_; # Email::MIME::Header + my $ret; + $ret = msg_received_at($hdr) and return time_response($ret); + $ret = msg_date_only($hdr) and return time_response($ret); + wantarray ? (time, '+0000') : time; +} + +# Favors the Date: header for display and sorting within a thread +sub msg_datestamp ($) { + my ($hdr) = @_; # Email::MIME::Header + my $ret; + $ret = msg_date_only($hdr) and return time_response($ret); + $ret = msg_received_at($hdr) and return time_response($ret); + wantarray ? (time, '+0000') : time; } 1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 7e7c989d..f08b9870 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -8,11 +8,12 @@ use strict; use warnings; # values for searching -use constant TS => 0; # timestamp +use constant DS => 0; # Date: header in Unix time use constant NUM => 1; # NNTP article number use constant BYTES => 2; # :bytes as defined in RFC 3977 use constant LINES => 3; # :lines as defined in RFC 3977 -use constant YYYYMMDD => 4; # for searching in the WWW UI +use constant TS => 4; # Received: header in Unix time +use constant YYYYMMDD => 5; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 3d80b002..ef723a4b 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -134,7 +134,9 @@ sub add_values ($$) { my $lines = $values->[PublicInbox::Search::LINES]; add_val($doc, PublicInbox::Search::LINES, $lines); - my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); + my $ds = $values->[PublicInbox::Search::DS]; + add_val($doc, PublicInbox::Search::DS, $ds); + my $yyyymmdd = strftime('%Y%m%d', gmtime($ds)); add_val($doc, PublicInbox::Search::YYYYMMDD, $yyyymmdd); } @@ -298,7 +300,7 @@ sub add_message { } my $lines = $mime->body_raw =~ tr!\n!\n!; - my @values = ($smsg->ts, $num, $bytes, $lines); + my @values = ($smsg->ds, $num, $bytes, $lines, $smsg->ts); add_values($doc, \@values); my $tg = $self->term_generator; diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm index ba439696..78a17303 100644 --- a/lib/PublicInbox/SearchIdxSkeleton.pm +++ b/lib/PublicInbox/SearchIdxSkeleton.pm @@ -121,18 +121,16 @@ sub remote_remove { die $err if $err; } -# values: [ TS, NUM, BYTES, LINES, MID, XPATH, doc_data ] +# values: [ DS, NUM, BYTES, LINES, TS, MIDS, XPATH, doc_data ] sub index_skeleton_real ($$) { my ($self, $values) = @_; my $doc_data = pop @$values; my $xpath = pop @$values; my $mids = pop @$values; - my $ts = $values->[PublicInbox::Search::TS]; my $smsg = PublicInbox::SearchMsg->new(undef); my $doc = $smsg->{doc}; PublicInbox::SearchIdx::add_values($doc, $values); $doc->set_data($doc_data); - $smsg->{ts} = $ts; $smsg->load_from_data($doc_data); my $num = $values->[PublicInbox::Search::NUM]; my @refs = ($smsg->references =~ /<([^>]+)>/g); diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a1cd0c28..de1fd131 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -9,7 +9,7 @@ use warnings; use Search::Xapian; use PublicInbox::MID qw/mid_clean mid_mime/; use PublicInbox::Address; -use PublicInbox::MsgTime qw(msg_timestamp); +use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); sub new { my ($class, $mime) = @_; @@ -46,6 +46,7 @@ sub load_expand { my $doc = $self->{doc}; my $data = $doc->get_data or return; $self->{ts} = get_val($doc, &PublicInbox::Search::TS); + $self->{ds} = get_val($doc, &PublicInbox::Search::DS); utf8::decode($data); load_from_data($self, $data); $self; @@ -53,12 +54,8 @@ sub load_expand { sub load_doc { my ($class, $doc) = @_; - my $data = $doc->get_data or return; - my $ts = get_val($doc, &PublicInbox::Search::TS); - utf8::decode($data); - my $self = bless { doc => $doc, ts => $ts }, $class; - load_from_data($self, $data); - $self + my $self = bless { doc => $doc }, $class; + $self->load_expand; } # :bytes and :lines metadata in RFC 3977 @@ -91,9 +88,9 @@ my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); sub date ($) { my ($self) = @_; - my $ts = $self->{ts}; - return unless defined $ts; - my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ts); + my $ds = $self->{ds}; + return unless defined $ds; + my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ds); "$DoW[$wday], " . sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000", $mday, $year+1900, $hour, $min, $sec); @@ -119,9 +116,12 @@ sub from_name { sub ts { my ($self) = @_; - $self->{ts} ||= eval { - msg_timestamp($self->{mime}->header_obj); - } || 0; + $self->{ts} ||= eval { msg_timestamp($self->{mime}->header_obj) } || 0; +} + +sub ds { + my ($self) = @_; + $self->{ds} ||= eval { msg_datestamp($self->{mime}->header_obj); } || 0; } sub to_doc_data { diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 53b88c34..55c588cb 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -117,11 +117,11 @@ sub mset_summary { obfuscate_addrs($obfs_ibx, $s); obfuscate_addrs($obfs_ibx, $f); } - my $ts = PublicInbox::View::fmt_ts($smsg->ts); + my $date = PublicInbox::View::fmt_ts($smsg->ds); my $mid = PublicInbox::Hval->new_msgid($smsg->mid)->{href}; $$res .= qq{$rank. <b><a\nhref="$mid/">}. $s . "</a></b>\n"; - $$res .= "$pfx - by $f @ $ts UTC [$pct%]\n\n"; + $$res .= "$pfx - by $f @ $date UTC [$pct%]\n\n"; } $$res .= search_nav_bot($mset, $q); *noop; @@ -227,7 +227,7 @@ sub mset_thread { } ($mset->items) ]}); my $r = $q->{r}; my $rootset = PublicInbox::SearchThread::thread($msgs, - $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts, + $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds, $srch); my $skel = search_nav_bot($mset, $q). "<pre>"; my $inbox = $ctx->{-inbox}; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index f811f4f0..18882afd 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -6,7 +6,7 @@ package PublicInbox::View; use strict; use warnings; -use PublicInbox::MsgTime qw(msg_timestamp); +use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::Linkify; use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape/; @@ -735,7 +735,7 @@ sub load_results { sub thread_results { my ($msgs, $srch) = @_; require PublicInbox::SearchThread; - PublicInbox::SearchThread::thread($msgs, *sort_ts, $srch); + PublicInbox::SearchThread::thread($msgs, *sort_ds, $srch); } sub missing_thread { @@ -746,7 +746,7 @@ sub missing_thread { sub _msg_date { my ($hdr) = @_; - fmt_ts(msg_timestamp($hdr)); + fmt_ts(msg_datestamp($hdr)); } sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) } @@ -782,7 +782,7 @@ sub skel_dump { my $obfs_ibx = $ctx->{-obfs_ibx}; obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx; - my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level); + my $d = fmt_ts($smsg->{ds}) . ' ' . indent_for($level) . th_pfx($level); my $attr = $f; $ctx->{first_level} ||= $level; @@ -863,10 +863,10 @@ sub _skel_ghost { $$dst .= $d; } -sub sort_ts { +sub sort_ds { [ sort { - (eval { $a->topmost->{smsg}->ts } || 0) <=> - (eval { $b->topmost->{smsg}->ts } || 0) + (eval { $a->topmost->{smsg}->ds } || 0) <=> + (eval { $b->topmost->{smsg}->ds } || 0) } @{$_[0]} ]; } @@ -877,21 +877,21 @@ sub acc_topic { my $srch = $ctx->{srch}; my $mid = $node->{id}; my $x = $node->{smsg} || $srch->lookup_mail($mid); - my ($subj, $ts); + my ($subj, $ds); my $topic; if ($x) { $subj = $x->subject; $subj = $srch->subject_normalized($subj); - $ts = $x->ts; + $ds = $x->ds; if ($level == 0) { - $topic = [ $ts, 1, { $subj => $mid }, $subj ]; + $topic = [ $ds, 1, { $subj => $mid }, $subj ]; $ctx->{-cur_topic} = $topic; push @{$ctx->{order}}, $topic; return; } $topic = $ctx->{-cur_topic}; # should never be undef - $topic->[0] = $ts if $ts > $topic->[0]; + $topic->[0] = $ds if $ds > $topic->[0]; $topic->[1]++; my $seen = $topic->[2]; if (scalar(@$topic) == 3) { # parent was a ghost @@ -910,7 +910,7 @@ sub acc_topic { sub dump_topics { my ($ctx) = @_; - my $order = delete $ctx->{order}; # [ ts, subj1, subj2, subj3, ... ] + my $order = delete $ctx->{order}; # [ ds, subj1, subj2, subj3, ... ] if (!@$order) { $ctx->{-html_tip} = '<pre>[No topics in range]</pre>'; return 404; @@ -923,14 +923,14 @@ sub dump_topics { # sort by recency, this allows new posts to "bump" old topics... foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) { - my ($ts, $n, $seen, $top, @ex) = @$topic; + my ($ds, $n, $seen, $top, @ex) = @$topic; @$topic = (); next unless defined $top; # ghost topic my $mid = delete $seen->{$top}; my $href = mid_escape($mid); my $prev_subj = [ split(/ /, $top) ]; $top = PublicInbox::Hval->new($top)->as_html; - $ts = fmt_ts($ts); + $ds = fmt_ts($ds); # $n isn't the total number of posts on the topic, # just the number of posts in the current results window @@ -946,7 +946,7 @@ sub dump_topics { my $mbox = qq(<a\nhref="$href/t.mbox.gz">mbox.gz</a>); my $atom = qq(<a\nhref="$href/t.atom">Atom</a>); my $s = "<a\nhref=\"$href/T/$anchor\"><b>$top</b></a>\n" . - " $ts UTC $n - $mbox / $atom\n"; + " $ds UTC $n - $mbox / $atom\n"; for (my $i = 0; $i < scalar(@ex); $i += 2) { my $level = $ex[$i]; my $subj = $ex[$i + 1]; |