user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [RFC] www: set "<!DOCTYPE html>" everywhere
@ 2019-04-27 21:23 Eric Wong
  2019-04-28 22:32 ` Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2019-04-27 21:23 UTC (permalink / raw)
  To: meta

I'm no fan of the "Living Standard" quicksand that is HTML 5
(or wasting 15 bytes on every response).  However, being easy
to validate everything with tidy(1) seems alright...

t/check-www-inbox.perl now runs tidy(1) for every text/html
response, now.
---
 Documentation/txt2pre          |  2 +-
 lib/PublicInbox/Mbox.pm        |  2 +-
 lib/PublicInbox/Unsubscribe.pm |  2 +-
 lib/PublicInbox/WWW.pm         |  2 +-
 lib/PublicInbox/WwwStream.pm   |  2 +-
 t/check-www-inbox.perl         | 76 ++++++++++++++++++++--------------
 6 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/Documentation/txt2pre b/Documentation/txt2pre
index 4c4b2ca..4ad2372 100755
--- a/Documentation/txt2pre
+++ b/Documentation/txt2pre
@@ -19,7 +19,7 @@ $str = $l->linkify_1($str);
 $str = ascii_html($str);
 $str = $l->linkify_2($str);
 
-print '<html><head>',
+print '<!DOCTYPE html><html><head>',
   qq(<meta\nhttp-equiv="Content-Type"\ncontent="text/html; charset=utf-8"\n/>),
   "<title>$title</title>",
   "</head><body><pre>",  $str , '</pre></body></html>';
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 78dbe27..1e85573 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -222,7 +222,7 @@ sub need_gzip {
 	my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]);
 	my $title = 'gzipped mbox not available';
 	$fh->write(<<EOF);
-<html><head><title>$title</title><body><pre>$title
+<!DOCTYPE html><html><head><title>$title</title><body><pre>$title
 The administrator needs to install the IO::Compress::Gzip Perl module
 to support gzipped mboxes.
 <a href="../">Return to index</a></pre></body></html>
diff --git a/lib/PublicInbox/Unsubscribe.pm b/lib/PublicInbox/Unsubscribe.pm
index 11a347e..1e66011 100644
--- a/lib/PublicInbox/Unsubscribe.pm
+++ b/lib/PublicInbox/Unsubscribe.pm
@@ -134,7 +134,7 @@ sub finalize_unsub { # on POST
 
 sub r {
 	my ($self, $code, $title, @body) = @_;
-	[ $code, [ @CT_HTML ], [
+	[ $code, [ @CT_HTML ], [ '<!DOCTYPE html>' .
 		"<html><head><title>$title</title></head><body><pre>".
 		join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
 		"<pre>This page is available under AGPL-3.0+\n" .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6e69001..6e46caa 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -314,7 +314,7 @@ sub searcher {
 sub need_search {
 	my ($ctx) = @_;
 	my $msg = <<EOF;
-<html><head><title>Search not available for this
+<!DOCTYPE html><html><head><title>Search not available for this
 public-inbox</title><body><pre>Search is not available for this public-inbox
 <a href="../">Return to index</a></pre></body></html>
 EOF
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 8b79923..811f6bc 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -64,7 +64,7 @@ sub _html_top ($) {
 	} else {
 		$top = '<pre>' . $top . "\n" . $links . '</pre>';
 	}
-	"<html><head><title>$title</title>" .
+	"<!DOCTYPE html><html><head><title>$title</title>" .
 		"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
 		"href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
 	        $ctx->{www}->style($upfx) .
diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl
index db292c5..0cab68e 100644
--- a/t/check-www-inbox.perl
+++ b/t/check-www-inbox.perl
@@ -30,37 +30,14 @@ my %opts = (
 GetOptions(%opts) or die "bad command-line args\n$usage";
 my $root_url = shift or die $usage;
 
-chomp(my $xmlstarlet = which('xmlstarlet'));
-my $atom_check = eval {
-	my $cmd = [ qw(xmlstarlet val -e -) ];
-	sub {
-		my ($in, $out, $err) = @_;
-		use autodie;
-		open my $in_fh, '+>', undef;
-		open my $out_fh, '+>', undef;
-		open my $err_fh, '+>', undef;
-		print $in_fh $$in;
-		$in_fh->flush;
-		sysseek($in_fh, 0, 0);
-		my $rdr = {
-			0 => fileno($in_fh),
-			1 => fileno($out_fh),
-			2 => fileno($err_fh),
-		};
-		my $pid = spawn($cmd, undef, $rdr);
-		defined $pid or die "spawn failure: $!";
-		while (waitpid($pid, 0) != $pid) {
-			next if $!{EINTR};
-			warn "waitpid(xmlstarlet, $pid) $!";
-			return $!;
-		}
-		sysseek($out_fh, 0, 0);
-		sysread($out_fh, $$out, -s $out_fh);
-		sysseek($err_fh, 0, 0);
-		sysread($err_fh, $$err, -s $err_fh);
-		$?
-	}
-} if $xmlstarlet;
+my $xmlstarlet = which('xmlstarlet');
+my $atom_check = cmd_check([ $xmlstarlet, qw(val -e -) ]) if $xmlstarlet;
+
+# FIXME: highlight creates empty spans:
+my @TIDY_OPT = qw(--drop-empty-elements 0);
+
+my $tidy = which('tidy');
+my $tidy_check = cmd_check([ $tidy, qw(-e -q), @TIDY_OPT ]) if $tidy;
 
 my %workers;
 $SIG{INT} = sub { exit 130 };
@@ -205,5 +182,42 @@ sub worker_loop {
 			my $c = Dumper($o);
 			warn "bad: $u $c\n";
 		}
+		if ($tidy_check) {
+			my $raw = $r->decoded_content;
+			my ($out, $err) = ('', '');
+			my $fail = $tidy_check->(\$raw, \$out, \$err);
+			warn "Tidy ($fail) - $u - <1:$out> <2:$err>\n" if $fail;
+		}
+	}
+}
+
+sub cmd_check {
+	my ($cmd) = @_;
+	sub {
+		my ($in, $out, $err) = @_;
+		use autodie;
+		open my $in_fh, '+>', undef;
+		open my $out_fh, '+>', undef;
+		open my $err_fh, '+>', undef;
+		print $in_fh $$in;
+		$in_fh->flush;
+		sysseek($in_fh, 0, 0);
+		my $rdr = {
+			0 => fileno($in_fh),
+			1 => fileno($out_fh),
+			2 => fileno($err_fh),
+		};
+		my $pid = spawn($cmd, undef, $rdr);
+		defined $pid or die "spawn failure: $!";
+		while (waitpid($pid, 0) != $pid) {
+			next if $!{EINTR};
+			warn "waitpid($cmd->[0], $pid) $!";
+			return $!;
+		}
+		sysseek($out_fh, 0, 0);
+		sysread($out_fh, $$out, -s $out_fh);
+		sysseek($err_fh, 0, 0);
+		sysread($err_fh, $$err, -s $err_fh);
+		$?
 	}
 }
-- 
EW

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-04-28 22:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-27 21:23 [RFC] www: set "<!DOCTYPE html>" everywhere Eric Wong
2019-04-28 22:32 ` Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).