about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-05-06 10:40:53 +0000
committerEric Wong <e@yhbt.net>2020-05-07 19:09:41 +0000
commit65b37aeb8392a62e9aa8ceac08227c77c6fa6b8b (patch)
treebc46876af8e9b881a286d4825f89ca261de8f6b8
parent2e168e869df3f1ca88f2eb22a8d1a1dda869b6ef (diff)
downloadpublic-inbox-65b37aeb8392a62e9aa8ceac08227c77c6fa6b8b.tar.gz
For non-malicious messages, we can assume the diffstat and actual
diff appear in the same order.  Thus we can store {-long_paths} as
an arrayref and only compare the first element when we encounter
a truncated path.

This should make HTML rendering stable when there's basename
conflicts in message such as
https://lore.kernel.org/backports/1393202754-12919-13-git-send-email-hauke@hauke-m.de/

This diffstat anchor linkification can still be defeated by
users who make actual path names beginning with "...", but we
won't waste CPU cycles on it, either.
-rw-r--r--lib/PublicInbox/ViewDiff.pm23
1 files changed, 9 insertions, 14 deletions
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 3d6058a9..34df8ad4 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -82,10 +82,8 @@ sub anchor0 ($$$$) {
         $fn =~ s/{(?:.+) => (.+)}/$1/ or $fn =~ s/.* => (.+)/$1/;
         $fn = git_unquote($fn);
 
-        # long filenames will require us to walk backwards in anchor1
-        if ($fn =~ s!\A\.\.\./?!!) {
-                $ctx->{-long_path}->{$fn} = qr/\Q$fn\E\z/s;
-        }
+        # long filenames will require us to check in anchor1()
+        push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!;
 
         if (my $attr = to_attr($ctx->{-apfx}.$fn)) {
                 $ctx->{-anchors}->{$attr} = 1;
@@ -105,17 +103,14 @@ sub anchor1 ($$) {
 
         my $ok = delete $ctx->{-anchors}->{$attr};
 
-        # unlikely, check the end of all long path names we captured:
+        # unlikely, check the end of long path names we captured,
+        # assume diffstat and diff output follow the same order,
+        # and ignore different ordering (could be malicious input)
         unless ($ok) {
-                my $lp = $ctx->{-long_path} or return;
-                foreach my $fn (keys %$lp) {
-                        $pb =~ $lp->{$fn} or next;
-
-                        delete $lp->{$fn};
-                        $attr = to_attr($ctx->{-apfx}.$fn) or return;
-                        $ok = delete $ctx->{-anchors}->{$attr} or return;
-                        last;
-                }
+                my $fn = shift(@{$ctx->{-long_path}}) or return;
+                $pb =~ /\Q$fn\E\z/s or return;
+                $attr = to_attr($ctx->{-apfx}.$fn) or return;
+                $ok = delete $ctx->{-anchors}->{$attr} or return;
         }
         $ok ? "<a\nhref=#i$attr\nid=$attr>diff</a> --git" : undef
 }