From 546ff31b4c11d74f63aa4501659224dbf864c21c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 11 Apr 2014 22:17:07 +0000 Subject: filter: use IPC::Run and improve lynx error handling We may occasionally encounter horrid HTML which lynx cannot handle, so improve error reporting. --- INSTALL | 1 - Makefile.PL | 1 - lib/PublicInbox/Filter.pm | 29 +++++++++++++++-------------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/INSTALL b/INSTALL index a69782ad..8e512a63 100644 --- a/INSTALL +++ b/INSTALL @@ -25,7 +25,6 @@ Requirements (server MDA) - Email::MIME::ContentType libemail-mime-contenttype-perl - Encode::MIME::Header perl - File::Path::Expand libfile-path-expand-perl - - IPC::Open2 perl - IPC::Run libipc-run-perl - Mail::Thread (2.5+) libmail-thread-perl - URI::Escape liburi-perl diff --git a/Makefile.PL b/Makefile.PL index f74d146a..7f2d586d 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -21,7 +21,6 @@ WriteMakefile( 'Email::MIME::ContentType' => 0, 'Encode::MIME::Header' => 0, 'File::Path::Expand' => 0, - 'IPC::Open2' => 0, 'IPC::Run' => 0, 'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat 'URI::Escape' => 0, diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm index 4a348217..e5a8fafe 100644 --- a/lib/PublicInbox/Filter.pm +++ b/lib/PublicInbox/Filter.pm @@ -10,7 +10,7 @@ use warnings; use Email::MIME; use Email::MIME::ContentType qw/parse_content_type/; use Email::Filter; -use IPC::Open2; +use IPC::Run; our $VERSION = '0.0.1'; # start with the same defaults as mailman @@ -41,7 +41,7 @@ sub run { # HTML-only, non-multipart my $body = $simple->body; my $ct_parsed = parse_content_type($content_type); - dump_html($body, $ct_parsed->{attributes}->{charset}); + dump_html(\$body, $ct_parsed->{attributes}->{charset}); replace_body($simple, $body); return 1; } elsif ($content_type =~ m!\bmultipart/!i) { @@ -80,28 +80,29 @@ sub html_part_to_text { my ($simple, $part) = @_; my $body = $part->body; my $ct_parsed = parse_content_type($part->content_type); - dump_html($body, $ct_parsed->{attributes}->{charset}); + dump_html(\$body, $ct_parsed->{attributes}->{charset}); replace_part($simple, $part, $body, 'text/plain'); } # modifies $_[0] in place sub dump_html { - my $charset = $_[1] || 'US-ASCII'; - my $cmd = "lynx -stdin -dump"; + my ($body, $charset) = @_; + $charset ||= 'US-ASCII'; + my @cmd = qw(lynx -stdin -stderr -dump); + my $out = ""; + my $err = ""; # be careful about remote command injection! if ($charset =~ /\A[A-Za-z0-9\-]+\z/) { - $cmd .= " -assume_charset=$charset"; + push @cmd, "-assume_charset=$charset"; } - - my $pid = open2(my $out, my $in, $cmd); - print $in $_[0]; - close $in; - { - local $/; - $_[0] = <$out>; + if (IPC::Run::run(\@cmd, $body, \$out, \$err)) { + $$body = $out; + } else { + # give them an ugly version: + $$body = "public-inbox HTML conversion failed: $err\n" . + $$body . "\n"; } - waitpid($pid, 0); } # this is to correct user errors and not expected to cover all corner cases -- cgit v1.2.3-24-ge0c7