From 770d8b3f465db89d80922c32d766b022cf4b31ed Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 11 Oct 2023 07:20:53 +0000 Subject: treewide: consolidate "From " line removal Aside from our prior import bugs (fixed in a0c07cba0e5d8b6a (mda: drop leading "From " lines again, 2016-06-26)), we'll always have to be dealing with mutt piping messages to us and `git format-patch' output. So just share the regexp so we can use it everywhere. In may be desirable to allow importing messages with a leading "From " line for FUSE, even. Additionally, some instances of this regexp needlessly added optional `\r?' (CR) checks ahead of the `\n' (LF) element; but they're pointless anyways since [^\n]* is enough to exclude all non-LF bytes. --- lib/PublicInbox/Eml.pm | 6 ++++++ lib/PublicInbox/IMAP.pm | 2 +- lib/PublicInbox/Import.pm | 8 +++----- lib/PublicInbox/LeiInput.pm | 5 +---- lib/PublicInbox/LeiInspect.pm | 2 +- lib/PublicInbox/LeiToMail.pm | 3 +-- lib/PublicInbox/Mbox.pm | 16 +++++++--------- lib/PublicInbox/MboxReader.pm | 2 +- lib/PublicInbox/NNTP.pm | 3 +-- 9 files changed, 22 insertions(+), 25 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 8b999e1a..24060ec8 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -528,4 +528,10 @@ sub willneed { re_memo($_) for @_ } willneed(qw(From To Cc Date Subject Content-Type In-Reply-To References Message-ID X-Alt-Message-ID)); +# This fixes an old bug from import (pre-a0c07cba0e5d8b6a) +# mutt also pipes single RFC822 messages with a "From " line, +# but no Content-Length or "From " escaping. +# "git format-patch" also generates such files by default. +sub strip_from { $_[0] =~ s/\A[\r\n]*From [^\n]*\n//s } + 1; diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index 3c64cefa..e4a9e304 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -664,7 +664,7 @@ sub op_eml_new { $_[4] = PublicInbox::Eml->new($_[3]) } # s/From / fixes old bug from import (pre-a0c07cba0e5d8b6a) sub to_crlf_full { ${$_[0]} =~ s/(?" if $lf ne "\n"; - - # fixup some bugginess in old versions: - $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; \$buf; } @@ -136,8 +133,9 @@ sub check_remove_v1 { my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef); $info =~ m!\A100644 blob ([a-f0-9]{40,})\t!s or die "not blob: $info"; my $oid = $1; - my $msg = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed"; - my $cur = PublicInbox::Eml->new($msg); + my $bref = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed"; + PublicInbox::Eml::strip_from($$bref); + my $cur = PublicInbox::Eml->new($bref); my $cur_s = $cur->header('Subject') // ''; my $cur_m = $mime->header('Subject') // ''; if ($cur_s ne $cur_m || norm_body($cur) ne norm_body($mime)) { diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 93f8b6b8..28b73ca9 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -84,10 +84,7 @@ sub input_fh { return $self->{lei}->child_error(0, <<""); error reading $name: $! - # mutt pipes single RFC822 messages with a "From " line, - # but no Content-Length or "From " escaping. - # "git format-patch" also generates such files by default. - $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($buf); # a user may feed just a body: git diff | lei rediff -U9 if ($self->{-force_eml}) { diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm index f801610f..65c64cf2 100644 --- a/lib/PublicInbox/LeiInspect.pm +++ b/lib/PublicInbox/LeiInspect.pm @@ -254,7 +254,7 @@ sub inspect_start ($$) { sub do_inspect { # lei->do_env cb my ($lei) = @_; my $str = delete $lei->{istr}; - $str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($str); my $eml = PublicInbox::Eml->new(\$str); inspect_start($lei, [ 'blob:'.$lei->git_oid($eml)->hexdigest, map { "mid:$_" } @{mids($eml)} ]); diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 8771592d..ead60b38 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -53,8 +53,7 @@ sub _mbox_hdr_buf ($$$) { } my $buf = delete $eml->{hdr}; - # fixup old bug from import (pre-a0c07cba0e5d8b6a) - $$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($$buf); my $ident = $smsg->{blob} // 'lei'; if (defined(my $pct = $smsg->{pct})) { $ident .= "=$pct" } diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index bf61bb0e..52f88ae3 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -89,17 +89,15 @@ sub emit_raw { sub msg_hdr ($$) { my ($ctx, $eml) = @_; - my $header_obj = $eml->header_obj; - # drop potentially confusing headers, ssoma already should've dropped - # Lines and Content-Length - foreach my $d (qw(Lines Bytes Content-Length Status)) { - $header_obj->header_set($d); + # drop potentially confusing headers, various importers should've + # already dropped these, but we can't trust stuff we've cloned + for my $d (qw(Lines Bytes Content-Length Status)) { + $eml->header_set($d); } - my $crlf = $header_obj->crlf; - my $buf = $header_obj->as_string; - # fixup old bug from import (pre-a0c07cba0e5d8b6a) - $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + my $crlf = $eml->crlf; + my $buf = $eml->header_obj->as_string; + PublicInbox::Eml::strip_from($buf); "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf . $crlf; } diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm index e4209022..d67fb4eb 100644 --- a/lib/PublicInbox/MboxReader.pm +++ b/lib/PublicInbox/MboxReader.pm @@ -93,7 +93,7 @@ sub _mbox_cl ($$$;@) { undef $mbfh; } while (my $hdr = _extract_hdr(\$buf)) { - $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or + PublicInbox::Eml::strip_from($$hdr) or die "E: no 'From ' line in:\n", Dumper($hdr); my $eml = PublicInbox::Eml->new($hdr); next unless $eml->raw_size; diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 316b7775..603cf094 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -523,8 +523,7 @@ sub msg_hdr_write ($$) { set_nntp_headers($eml, $smsg); my $hdr = $eml->{hdr} // \(my $x = ''); - # fixup old bug from import (pre-a0c07cba0e5d8b6a) - $$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($$hdr); $$hdr =~ s/(?