From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4AEEC1F8DF for ; Sat, 1 Aug 2020 08:12:28 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/4] remove unnecessary ->header_obj calls Date: Sat, 1 Aug 2020 08:12:27 +0000 Message-Id: <20200801081227.21412-5-e@yhbt.net> In-Reply-To: <20200801081227.21412-1-e@yhbt.net> References: <20200801081227.21412-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We used ->header_obj in the past as an optimization with Email::MIME. That optimization is no longer necessary with PublicInbox::Eml. This doesn't make any functional difference even if we were to go back to Email::MIME. However, it reduces the amount of code we have and slightly reduces allocations with PublicInbox::Eml. --- lib/PublicInbox/ContentHash.pm | 15 +++++++------- lib/PublicInbox/Import.pm | 17 +++++++--------- lib/PublicInbox/OverIdx.pm | 9 ++++---- lib/PublicInbox/SearchIdx.pm | 14 ++++++------- lib/PublicInbox/V2Writable.pm | 35 +++++++++++++++----------------- lib/PublicInbox/View.pm | 35 +++++++++++++++----------------- lib/PublicInbox/WatchMaildir.pm | 3 +-- lib/PublicInbox/WwwAtomStream.pm | 9 ++++---- script/public-inbox-edit | 8 ++++---- script/public-inbox-mda | 2 +- 10 files changed, 66 insertions(+), 81 deletions(-) diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index 420dc5e7c..1fe229559 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -53,29 +53,28 @@ sub content_dig_i { } sub content_digest ($) { - my ($mime) = @_; + my ($eml) = @_; my $dig = Digest::SHA->new(256); - my $hdr = $mime->header_obj; # References: and In-Reply-To: get used interchangeably # in some "duplicates" in LKML. We treat them the same # in SearchIdx, so treat them the same for this: # do NOT consider the Message-ID as part of the content_hash # if we got here, we've already got Message-ID reuse - my %seen = map { $_ => 1 } @{mids($hdr)}; - foreach my $mid (@{references($hdr)}) { + my %seen = map { $_ => 1 } @{mids($eml)}; + foreach my $mid (@{references($eml)}) { $dig->add("ref\0$mid\0") unless $seen{$mid}++; } # Only use Sender: if From is not present foreach my $h (qw(From Sender)) { - my @v = $hdr->header($h); + my @v = $eml->header($h); if (@v) { digest_addr($dig, $h, $_) foreach @v; } } foreach my $h (qw(Subject Date)) { - my @v = $hdr->header($h); + my @v = $eml->header($h); foreach my $v (@v) { utf8::encode($v); $dig->add("$h\0$v\0"); @@ -85,10 +84,10 @@ sub content_digest ($) { # not in the original message. For the purposes of deduplication, # do not take it into account: foreach my $h (qw(To Cc)) { - my @v = $hdr->header($h); + my @v = $eml->header($h); digest_addr($dig, $h, $_) foreach @v; } - msg_iter($mime, \&content_dig_i, $dig); + msg_iter($eml, \&content_dig_i, $dig); $dig; } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 07a495187..700b40262 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -285,15 +285,14 @@ sub extract_cmt_info ($;$) { # $mime is PublicInbox::Eml, but remains Email::MIME-compatible $smsg //= bless {}, 'PublicInbox::Smsg'; - my $hdr = $mime->header_obj; - $smsg->populate($hdr); + $smsg->populate($mime); my $sender = ''; my $from = delete($smsg->{From}) // ''; my ($email) = PublicInbox::Address::emails($from); my ($name) = PublicInbox::Address::names($from); if (!defined($name) || !defined($email)) { - $sender = $hdr->header('Sender') // ''; + $sender = $mime->header('Sender') // ''; $name //= (PublicInbox::Address::names($sender))[0]; $email //= (PublicInbox::Address::emails($sender))[0]; } @@ -346,13 +345,12 @@ sub append_mid ($$) { } sub v1_mid0 ($) { - my ($mime) = @_; - my $hdr = $mime->header_obj; - my $mids = mids($hdr); + my ($eml) = @_; + my $mids = mids($eml); if (!scalar(@$mids)) { # spam often has no Message-ID - my $mid0 = digest2mid(content_digest($mime), $hdr); - append_mid($hdr, $mid0); + my $mid0 = digest2mid(content_digest($eml), $eml); + append_mid($eml, $mid0); return $mid0; } $mids->[0]; @@ -671,8 +669,7 @@ version 1.0 my $parsed = PublicInbox::Eml->new($message); my $ret = $im->add($parsed); if (!defined $ret) { - warn "duplicate: ", - $parsed->header_obj->header_raw('Message-ID'), "\n"; + warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n"; } else { print "imported at mark $ret\n"; } diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 5821c562b..c8f61e012 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -271,11 +271,10 @@ sub subject_path ($) { } sub add_overview { - my ($self, $mime, $smsg) = @_; - $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!; - my $hdr = $mime->header_obj; - my $mids = mids_for_index($hdr); - my $refs = parse_references($smsg, $hdr, $mids); + my ($self, $eml, $smsg) = @_; + $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; + my $mids = mids_for_index($eml); + my $refs = parse_references($smsg, $eml, $mids); my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index feb00de22..a1baa65bd 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -350,8 +350,7 @@ sub index_ids ($$$$) { } sub add_xapian ($$$$) { - my ($self, $mime, $smsg, $mids) = @_; - my $hdr = $mime->header_obj; + my ($self, $eml, $smsg, $mids) = @_; my $doc = $X->{Document}->new; add_val($doc, PublicInbox::Search::TS(), $smsg->{ts}); my @ds = gmtime($smsg->{ds}); @@ -366,10 +365,10 @@ sub add_xapian ($$$$) { $tg->set_document($doc); index_headers($self, $smsg); - msg_iter($mime, \&index_xapian, [ $self, $doc ]); - index_ids($self, $doc, $hdr, $mids); + msg_iter($eml, \&index_xapian, [ $self, $doc ]); + index_ids($self, $doc, $eml, $mids); $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP - PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids); + PublicInbox::OverIdx::parse_references($smsg, $eml, $mids); my $data = $smsg->to_doc_data; $doc->set_data($data); if (my $altid = $self->{-altid}) { @@ -398,8 +397,7 @@ sub _msgmap_init ($) { sub add_message { # mime = PublicInbox::Eml or Email::MIME object my ($self, $mime, $smsg, $sync) = @_; - my $hdr = $mime->header_obj; - my $mids = mids_for_index($hdr); + my $mids = mids_for_index($mime); $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat $smsg->{mid} //= $mids->[0]; # v1 compatibility $smsg->{num} //= do { # v1 @@ -408,7 +406,7 @@ sub add_message { }; # v1 and tests only: - $smsg->populate($hdr, $sync); + $smsg->populate($mime, $sync); $smsg->{bytes} //= length($mime->as_string); eval { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index e1c9a393a..344edbbad 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -197,7 +197,7 @@ sub _add { sub v2_num_for { my ($self, $mime) = @_; - my $mids = mids($mime->header_obj); + my $mids = mids($mime); if (@$mids) { my $mid = $mids->[0]; my $num = $self->{mm}->mid_insert($mid); @@ -244,28 +244,27 @@ sub v2_num_for { } sub v2_num_for_harder { - my ($self, $mime) = @_; + my ($self, $eml) = @_; - my $hdr = $mime->header_obj; - my $dig = content_digest($mime); - my $mid0 = PublicInbox::Import::digest2mid($dig, $hdr); + my $dig = content_digest($eml); + my $mid0 = PublicInbox::Import::digest2mid($dig, $eml); my $num = $self->{mm}->mid_insert($mid0); unless (defined $num) { # it's hard to spoof the last Received: header - my @recvd = $hdr->header_raw('Received'); + my @recvd = $eml->header_raw('Received'); $dig->add("Received: $_") foreach (@recvd); - $mid0 = PublicInbox::Import::digest2mid($dig, $hdr); + $mid0 = PublicInbox::Import::digest2mid($dig, $eml); $num = $self->{mm}->mid_insert($mid0); # fall back to a random Message-ID and give up determinism: until (defined($num)) { $dig->add(rand); - $mid0 = PublicInbox::Import::digest2mid($dig, $hdr); + $mid0 = PublicInbox::Import::digest2mid($dig, $eml); warn "using random Message-ID <$mid0> as fallback\n"; $num = $self->{mm}->mid_insert($mid0); } } - PublicInbox::Import::append_mid($hdr, $mid0); + PublicInbox::Import::append_mid($eml, $mid0); ($num, $mid0); } @@ -384,7 +383,7 @@ sub rewrite_internal ($$;$$$) { my $over = $self->{over}; my $chashes = content_hashes($old_eml); my $removed = []; - my $mids = mids($old_eml->header_obj); + my $mids = mids($old_eml); # We avoid introducing new blobs into git since the raw content # can be slightly different, so we do not need the user-supplied @@ -514,9 +513,7 @@ sub _check_mids_match ($$$) { # Message-IDs are pretty complex and rethreading hasn't been fully # implemented, yet. sub check_mids_match ($$) { - my ($old_mime, $new_mime) = @_; - my $old = $old_mime->header_obj; - my $new = $new_mime->header_obj; + my ($old, $new) = @_; _check_mids_match(mids($old), mids($new), 'Message-ID(s)'); _check_mids_match(references($old), references($new), 'References/In-Reply-To'); @@ -894,9 +891,9 @@ sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; return if $size == 0; # purged my ($num, $mid0); - my $mime = PublicInbox::Eml->new($$bref); - my $mids = mids($mime->header_obj); - my $chash = content_hash($mime); + my $eml = PublicInbox::Eml->new($$bref); + my $mids = mids($eml); + my $chash = content_hash($eml); my $self = $arg->{v2w}; if (scalar(@$mids) == 0) { @@ -960,8 +957,8 @@ sub index_oid { # cat_async callback blob => $oid, mid => $mid0, }, 'PublicInbox::Smsg'; - $smsg->populate($mime, $arg); - if (do_idx($self, $bref, $mime, $smsg)) { + $smsg->populate($eml, $arg); + if (do_idx($self, $bref, $eml, $smsg)) { ${$arg->{need_checkpoint}} = 1; } } @@ -1113,7 +1110,7 @@ sub unindex_oid ($$;$) { # git->cat_async callback my $self = $sync->{v2w}; my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; my $mm = $self->{mm}; - my $mids = mids(PublicInbox::Eml->new($bref)->header_obj); + my $mids = mids(PublicInbox::Eml->new($bref)); undef $$bref; my $over = $self->{over}; foreach my $mid (@$mids) { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index d7ec4eb0a..4cb72bea8 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -33,8 +33,7 @@ sub msg_page_i { $ctx->{smsg} = $ctx->{over}->next_by_mid(@{$ctx->{next_arg}}); $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ? "../${\mid_href($smsg->{mid})}/" : ''; - my $hdr = $eml->header_obj; - my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx); + my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); multipart_text_as_html($eml, $ctx); delete $ctx->{obuf}; $$obuf .= '
'; @@ -50,14 +49,13 @@ sub no_over_html ($) { my ($ctx) = @_; my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return; # 404 my $eml = PublicInbox::Eml->new($bref); - my $hdr = $eml->header_obj; $ctx->{mhref} = ''; PublicInbox::WwwStream::init($ctx); - my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx); + my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); multipart_text_as_html($eml, $ctx); delete $ctx->{obuf}; $$obuf .= '
'; - eval { $$obuf .= html_footer($ctx, $hdr) }; + eval { $$obuf .= html_footer($ctx, $eml) }; html_oneshot($ctx, 200, $obuf); } @@ -199,16 +197,15 @@ sub eml_entry { # Deleting these fields saves about 400K as we iterate across 1K msgs delete @$smsg{qw(ts blob)}; - my $hdr = $eml->header_obj; - my $from = _hdr_names_html($hdr, 'From'); + my $from = _hdr_names_html($eml, 'From'); obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx; $rv .= "From: $from @ ".fmt_ts($ds)." UTC"; my $upfx = $ctx->{-upfx}; my $mhref = $upfx . mid_href($mid_raw) . '/'; $rv .= qq{ (permalink / }; $rv .= qq{raw)\n}; - my $to = fold_addresses(_hdr_names_html($hdr, 'To')); - my $cc = fold_addresses(_hdr_names_html($hdr, 'Cc')); + my $to = fold_addresses(_hdr_names_html($eml, 'To')); + my $cc = fold_addresses(_hdr_names_html($eml, 'Cc')); my ($tlen, $clen) = (length($to), length($cc)); my $to_cc = ''; if (($tlen + $clen) > COLS) { @@ -227,7 +224,7 @@ sub eml_entry { $rv .= $to_cc; my $mapping = $ctx->{mapping}; - if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) { + if (!$mapping && (defined($irt) || defined($irt = in_reply_to($eml)))) { my $href = $upfx . mid_href($irt) . '/'; my $html = ascii_html($irt); $rv .= qq(In-Reply-To: <$html>\n) @@ -626,16 +623,16 @@ sub add_text_body { # callback for each_part } sub _msg_page_prepare_obuf { - my ($hdr, $ctx) = @_; + my ($eml, $ctx) = @_; my $over = $ctx->{-inbox}->over; my $obfs_ibx = $ctx->{-obfs_ibx}; my $rv = ''; - my $mids = mids_for_index($hdr); + my $mids = mids_for_index($eml); my $nr = $ctx->{nr}++; if ($nr) { # unlikely $rv .= '
';
 	} else {
-		$ctx->{first_hdr} = $hdr;
+		$ctx->{first_hdr} = $eml->header_obj;
 		if ($ctx->{smsg}) {
 			$rv .=
 "
WARNING: multiple messages have this Message-ID\n
"; @@ -644,7 +641,7 @@ sub _msg_page_prepare_obuf { } $ctx->{-upfx} = '../' if $over; my @title; # (Subject[0], From[0]) - for my $v ($hdr->header('From')) { + for my $v ($eml->header('From')) { my @n = PublicInbox::Address::names($v); $v = ascii_html($v); $title[1] //= ascii_html(join(', ', @n)); @@ -655,14 +652,14 @@ sub _msg_page_prepare_obuf { $rv .= "From: $v\n" if $v ne ''; } foreach my $h (qw(To Cc)) { - for my $v ($hdr->header($h)) { + for my $v ($eml->header($h)) { fold_addresses($v); $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; $rv .= "$h: $v\n" if $v ne ''; } } - my @subj = $hdr->header('Subject'); + my @subj = $eml->header('Subject'); if (@subj) { my $v = ascii_html(shift @subj); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; @@ -678,7 +675,7 @@ sub _msg_page_prepare_obuf { $rv .= qq() if $over; $title[0] = '(no subject)'; } - for my $v ($hdr->header('Date')) { + for my $v ($eml->header('Date')) { $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P $rv .= "Date: $v\n"; @@ -697,12 +694,12 @@ sub _msg_page_prepare_obuf { my $lnk = PublicInbox::Linkify->new; my $s = ''; for my $h (qw(Message-ID X-Alt-Message-ID)) { - $s .= "$h: $_\n" for ($hdr->header_raw($h)); + $s .= "$h: $_\n" for ($eml->header_raw($h)); } $lnk->linkify_mids('..', \$s, 1); $rv .= $s; } - $rv .= _parent_headers($hdr, $over); + $rv .= _parent_headers($eml, $over); $rv .= "\n"; \$rv; } diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 814b455b2..7d4139a5d 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -163,9 +163,8 @@ sub import_eml ($$$) { # any header match means it's eligible for the inbox: if (my $watch_hdrs = $ibx->{-watchheaders}) { my $ok; - my $hdr = $eml->header_obj; for my $wh (@$watch_hdrs) { - my @v = $hdr->header_raw($wh->[0]); + my @v = $eml->header_raw($wh->[0]); $ok = grep(/$wh->[1]/, @v) and last; } return unless $ok; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 1ed806fd4..388def123 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -116,9 +116,8 @@ sub atom_header { # returns undef or string sub feed_entry { my ($ctx, $smsg, $eml) = @_; - my $hdr = $eml->header_obj; my $mid = $smsg->{mid}; - my $irt = PublicInbox::View::in_reply_to($hdr); + my $irt = PublicInbox::View::in_reply_to($eml); my $uuid = to_uuid($mid); my $base = $ctx->{feed_base_url}; if (defined $irt) { @@ -130,13 +129,13 @@ sub feed_entry { $irt = ''; } my $href = $base . mid_href($mid) . '/'; - my $updated = feed_updated(msg_timestamp($hdr)); + my $updated = feed_updated(msg_timestamp($eml)); - my $title = $hdr->header('Subject'); + my $title = $eml->header('Subject'); $title = '(no subject)' unless defined $title && $title ne ''; $title = title_tag($title); - my $from = $hdr->header('From') // $hdr->header('Sender') // + my $from = $eml->header('From') // $eml->header('Sender') // $ctx->{-inbox}->{-primary_address}; my ($email) = PublicInbox::Address::emails($from); my $name = ascii_html(join(', ', PublicInbox::Address::names($from))); diff --git a/script/public-inbox-edit b/script/public-inbox-edit index 2d3c4af4d..240beb3a1 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -93,7 +93,7 @@ Multiple messages with different content found matching } } else { my $eml = eml_from_path($file) or die "open($file) failed: $!"; - my $mids = mids($eml->header_obj); + my $mids = mids($eml); find_mid($found, $_, \@ibxs) for (@$mids); # populates $found my $chash = content_hash($eml); my $to_edit = $found->{$chash}; @@ -214,9 +214,9 @@ W: possible message boundary splitting error # allow changing Received: and maybe other headers which can # contain sensitive info. - my $nhdr = $new_mime->header_obj; - my $ohdr = $old_mime->header_obj; - if (($nhdr->as_string eq $ohdr->as_string) && + my $nhdr = $new_mime->header_obj->as_string; + my $ohdr = $old_mime->header_obj->as_string; + if (($nhdr eq $ohdr) && (content_hash($new_mime) eq content_hash($old_mime))) { warn "No change detected to:\n", show_cmd($ibx, $smsg); diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 42d0e00cb..02ca34316 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -119,7 +119,7 @@ for my $ibx (@$dests) { # destination succeeds $emm->abort; } else { # v1-only - my $mid = $mime->header_obj->header_raw('Message-ID'); + my $mid = $mime->header_raw('Message-ID'); # this message is similar to what ssoma-mda shows: print STDERR "CONFLICT: Message-ID: $mid exists\n"; }