From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B3C271F61C for ; Fri, 20 Mar 2020 08:18:21 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/9] v2writable: preserve timestamps from import Date: Fri, 20 Mar 2020 08:18:14 +0000 Message-Id: <20200320081821.21715-3-e@yhbt.net> In-Reply-To: <20200320081821.21715-1-e@yhbt.net> References: <20200320081821.21715-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: While v2 indexing is triggered immediately after writing the commit to the git repository, there may be a gap between when PublicInbox::Import generates a timestamp and when PublicInbox::SearchIdx sees the message. So follow the mirror indexing behavior and take the to-be-indexed (time|date)stamps directly from the git commit. --- lib/PublicInbox/Import.pm | 12 ++++++++---- lib/PublicInbox/V2Writable.pm | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 68dc0c7e..3853ff2b 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -274,8 +274,8 @@ sub git_timestamp { "$ts $zone"; } -sub extract_cmt_info ($) { - my ($mime) = @_; +sub extract_cmt_info ($;$) { + my ($mime, $v2w) = @_; my $sender = ''; my $from = $mime->header('From'); @@ -325,6 +325,10 @@ sub extract_cmt_info ($) { utf8::encode($subject); my $at = git_timestamp(my @at = msg_datestamp($hdr)); my $ct = git_timestamp(my @ct = msg_timestamp($hdr)); + if ($v2w) { # set fallbacks in case message had no date + $v2w->{autime} = $at[0]; + $v2w->{cotime} = $ct[0]; + } ($name, $email, $at, $ct, $subject); } @@ -370,9 +374,9 @@ sub clean_tree_v2 ($$$) { # returns undef on duplicate # returns the :MARK of the most recent commit sub add { - my ($self, $mime, $check_cb) = @_; # mime = Email::MIME + my ($self, $mime, $check_cb, $v2w) = @_; # mime = Email::MIME - my ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime); + my ($name, $email, $at, $ct, $subject) = extract_cmt_info($mime, $v2w); my $path_type = $self->{path_type}; my $path; if ($path_type eq '2/38') { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index f1842843..d39a6f89 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -176,7 +176,7 @@ sub _add { defined $num or return; # duplicate defined $mid0 or die "BUG: $mid0 undefined\n"; my $im = $self->importer; - my $cmt = $im->add($mime); + my $cmt = $im->add($mime, undef, $self); # sets $self->{(au|co)time} $cmt = $im->get_mark($cmt); $self->{last_commit}->[$self->{epoch_max}] = $cmt;