From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 63B8D1FB03; Tue, 6 Mar 2018 08:42:44 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Cc: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= Subject: [PATCH 32/34] import: fall back to Sender for extracting name and email Date: Tue, 6 Mar 2018 08:42:40 +0000 Message-Id: <20180306084242.19988-33-e@80x24.org> In-Reply-To: <20180306084242.19988-1-e@80x24.org> References: <20180306084242.19988-1-e@80x24.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit List-Id: This seems like a reasonable course of action for old messages. Cc: Nicolás Ojeda Bär --- lib/PublicInbox/Import.pm | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 7ba1668..664bec6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -208,15 +208,50 @@ sub parse_date ($) { "$ts $zone"; } -# returns undef on duplicate -# returns the :MARK of the most recent commit -sub add { - my ($self, $mime, $check_cb) = @_; # mime = Email::MIME +sub extract_author_info ($) { + my ($mime) = @_; + my $sender = ''; my $from = $mime->header('From'); my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); + if (!defined($name) || !defined($email)) { + $sender = $mime->header('Sender'); + if (!defined($name)) { + ($name) = PublicInbox::Address::names($sender); + } + if (!defined($email)) { + ($email) = PublicInbox::Address::emails($sender); + } + } + if (defined $email) { + # quiet down wide character warnings with utf8::encode + utf8::encode($email); + } else { + $email = ''; + warn "no email in From: $from or Sender: $sender\n"; + } + + # git gets confused with: + # "'A U Thor ' via foo" + # ref: + # + if (defined $name) { + $name =~ tr/<>//d; + utf8::encode($name); + } else { + $name = ''; + warn "no name in From: $from or Sender: $sender\n"; + } + ($name, $email); +} + +# returns undef on duplicate +# returns the :MARK of the most recent commit +sub add { + my ($self, $mime, $check_cb) = @_; # mime = Email::MIME + my ($name, $email) = extract_author_info($mime); my $date_raw = parse_date($mime); my $subject = $mime->header('Subject'); $subject = '(no subject)' unless defined $subject; @@ -263,25 +298,6 @@ sub add { print $w "reset $ref\n" or wfail; } - # quiet down wide character warnings with utf8::encode - if (defined $email) { - utf8::encode($email); - } else { - $email = ''; - warn "no email in From: $from\n"; - } - - # git gets confused with: - # "'A U Thor ' via foo" - # ref: - # - if (defined $name) { - $name =~ tr/<>//d; - utf8::encode($name); - } else { - $name = ''; - warn "no name in From: $from\n"; - } utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", "author $name <$email> $date_raw\n", -- EW