about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-03-01 11:47:36 +0600
committerEric Wong <e@80x24.org>2021-03-01 05:52:16 +0000
commit5b0ff78b53a796a54f8a8d7402bd04bcd2235b14 (patch)
tree180c46f241f057cfd5b7677ca8c7e2272076ab2f
parentac6c31a84fc9ef363bca6838c40a2bf30f49e43e (diff)
downloadpublic-inbox-5b0ff78b53a796a54f8a8d7402bd04bcd2235b14.tar.gz
/dev/null mis-handling was reported by Kyle Meyer.

Phrases quoting rules are also refined to avoid leaving spaces
unquoted when "phrase generator" characters exist.  Also,
context-free hunk headers no longer clobber the in_diff
state of the parser, since git can still generate those.

Link: https://public-inbox.org/meta/87k0qrrhve.fsf@kyleam.com/
-rw-r--r--lib/PublicInbox/LeiP2q.pm10
-rw-r--r--t/lei-p2q.t3
2 files changed, 10 insertions, 3 deletions
diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm
index d1dd125e..e7ddc852 100644
--- a/lib/PublicInbox/LeiP2q.pm
+++ b/lib/PublicInbox/LeiP2q.pm
@@ -12,6 +12,7 @@ use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::Spawn qw(popen_rd);
 use URI::Escape qw(uri_escape_utf8);
+my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!;
 
 sub xphrase ($) {
         my ($s) = @_;
@@ -23,7 +24,7 @@ sub xphrase ($) {
         map {
                 s/\A\s*//;
                 s/\s+\z//;
-                /[\|=><,\sA-Z]/ && !m![\./:\\\@]! ? qq("$_") : $_;
+                m![^\./:\\\@\-\w]! ? qq("$_") : $_ ;
         } ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g);
 }
 
@@ -40,7 +41,7 @@ sub extract_terms { # eml->each_part callback
                         push @{$lei->{qterms}->{dfctx}}, xphrase($_);
                 } elsif (/^-- $/) { # email signature begins
                         $in_diff = undef;
-                } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) {
+                } elsif (m!^diff --git $FN $FN!) {
                         # wait until "---" and "+++" to capture filenames
                         $in_diff = 1;
                 } elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) {
@@ -48,13 +49,16 @@ sub extract_terms { # eml->each_part callback
                         push @{$lei->{qterms}->{dfpre}}, $oa;
                         push @{$lei->{qterms}->{dfpost}}, $ob;
                         # who uses dfblob?
-                } elsif (m!^(?:---|\+{3}) ("?[^/]+/.+)!) {
+                } elsif (m!^(?:---|\+{3}) ($FN)!) {
+                        next if $1 eq '/dev/null';
                         my $fn = (split(m!/!, git_unquote($1.''), 2))[1];
                         push @{$lei->{qterms}->{dfn}}, xphrase($fn);
                 } elsif ($in_diff && s/^\+//) { # diff added
                         push @{$lei->{qterms}->{dfb}}, xphrase($_);
                 } elsif ($in_diff && s/^-//) { # diff removed
                         push @{$lei->{qterms}->{dfa}}, xphrase($_);
+                } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) {
+                        # traditional diff w/o -p
                 } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) {
                         push @{$lei->{qterms}->{dfhh}}, xphrase($1);
                 } elsif (/^(?:dis)similarity index/ ||
diff --git a/t/lei-p2q.t b/t/lei-p2q.t
index 1a2c2e4f..87cf9fa7 100644
--- a/t/lei-p2q.t
+++ b/t/lei-p2q.t
@@ -25,5 +25,8 @@ test_lei(sub {
                         "dfpost:6e006fd73b OR " .
                         "dfpost:6e006fd73\n",
                 '3-byte chop');
+
+        lei_ok(qw(p2q t/data/message_embed.eml --want=dfb));
+        like($lei_out, qr/\bdfb:\S+/, 'got dfb off /dev/null file');
 });
 done_testing;