From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E70341FAEE for ; Thu, 22 Mar 2018 09:40:17 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 11/13] v2writable: clarify header cleanups Date: Thu, 22 Mar 2018 09:40:13 +0000 Message-Id: <20180322094015.14422-12-e@80x24.org> In-Reply-To: <20180322094015.14422-1-e@80x24.org> References: <20180322094015.14422-1-e@80x24.org> List-Id: We want to make it clear to the code and DEBUG_DIFF users that we do not introduce messages with unsuitable headers into public archives. --- lib/PublicInbox/Import.pm | 12 +++++++++--- lib/PublicInbox/V2Writable.pm | 7 +++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index d69934b..5d116a1 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -288,6 +288,14 @@ sub extract_author_info ($) { ($name, $email); } +# kill potentially confusing/misleading headers +sub drop_unwanted_headers ($) { + my ($mime) = @_; + + $mime->header_set($_) for qw(bytes lines content-length status); + $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS; +} + # returns undef on duplicate # returns the :MARK of the most recent commit sub add { @@ -321,9 +329,7 @@ sub add { _check_path($r, $w, $tip, $path) and return; } - # kill potentially confusing/misleading headers - $mime->header_set($_) for qw(bytes lines content-length status); - $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS; + drop_unwanted_headers($mime); # spam check: if ($check_cb) { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 605f688..44b5528 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -223,6 +223,12 @@ sub remove { my $mm = $skel->{mm}; my $removed; my $mids = mids($mime->header_obj); + + # We avoid introducing new blobs into git since the raw content + # can be slightly different, so we do not need the user-supplied + # message now that we have the mids and content_id + $mime = undef; + foreach my $mid (@$mids) { $srch->reopen->each_smsg_by_mid($mid, sub { my ($smsg) = @_; @@ -430,6 +436,7 @@ sub diff ($$$) { print $ah $cur->as_string or die "print: $!"; close $ah or die "close: $!"; my ($bh, $bn) = tempfile('email-new-XXXXXXXX'); + PublicInbox::Import::drop_unwanted_headers($new); print $bh $new->as_string or die "print: $!"; close $bh or die "close: $!"; my $cmd = [ qw(diff -u), $an, $bn ]; -- EW