From d4adef2ef649b738e83d065170dad9d84027dd77 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Jan 2019 11:40:22 +0000 Subject: t/check-www-inbox: use xmlstarlet to validate Atom if available I almost forgot about this script; but remembering to test it against real-world data can be useful to hunt for bugs. --- t/check-www-inbox.perl | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 08e62471..7dd1eebe 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ # Parallel WWW checker my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n"; @@ -23,6 +23,16 @@ my %opts = ( GetOptions(%opts) or die "bad command-line args\n$usage"; my $root_url = shift or die $usage; +chomp(my $xmlstarlet = `which xmlstarlet 2>/dev/null`); +my $atom_check = eval { + require IPC::Run; + my $cmd = [ qw(xmlstarlet val -e -) ]; + sub { + my ($in, $out, $err) = @_; + IPC::Run::run($cmd, $in, $out, $err); + } +} if $xmlstarlet; + my %workers; $SIG{TERM} = sub { exit 0 }; $SIG{CHLD} = sub { @@ -146,7 +156,15 @@ sub worker_loop { # make sure the HTML source doesn't screw up terminals # when people curl the source (not remotely an expert # on languages or encodings, here). - next if $r->header('Content-Type') !~ m!\btext/html\b!; + my $ct = $r->header('Content-Type'); + if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { + my $raw = $r->decoded_content; + my ($out, $err) = ('', ''); + $atom_check->(\$raw, \$out, \$err) and + warn "Atom ($?) - $u - <1:$out> <2:$err>\n"; + } + + next if $ct !~ m!\btext/html\b!; my $dc = $r->decoded_content; if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) { my $o = $1; -- cgit v1.2.3-24-ge0c7