From 0a04fa7bd38c8f491b429dc7d8578735ca7ca3f4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Jan 2019 03:19:20 +0000 Subject: highlight: initial wrapper and PSGI service I'll probably expose the PSGI service for cgit; but it could be useful to others as well. --- lib/PublicInbox/HlMod.pm | 126 ++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WwwHighlight.pm | 73 +++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 lib/PublicInbox/HlMod.pm create mode 100644 lib/PublicInbox/WwwHighlight.pm (limited to 'lib') diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm new file mode 100644 index 00000000..5cbfb298 --- /dev/null +++ b/lib/PublicInbox/HlMod.pm @@ -0,0 +1,126 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ + +# I have no idea how stable or safe this is for handling untrusted +# input, but it seems to have been around for a while, and the +# highlight(1) executable is supported by gitweb and cgit. +# +# I'm also unsure about API stability, but highlight 3.x seems to +# have been around a few years and ikiwiki (apparently the only +# user of the SWIG/Perl bindings, at least in Debian) hasn't needed +# major changes to support it in recent years. +# +# Some code stolen from ikiwiki (GPL-2.0+) +# wrapper for SWIG-generated highlight.pm bindings +package PublicInbox::HlMod; +use strict; +use warnings; +use highlight; # SWIG-generated stuff + +sub _parse_filetypes ($) { + my $ft_conf = $_[0]->searchFile('filetypes.conf') or + die 'filetypes.conf not found by highlight'; + open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!"; + local $/; + my $cfg = <$fh>; + my %ext2lang; + my @shebang; # order matters + + # Hrm... why isn't this exposed by the highlight API? + # highlight >= 3.2 format (bind-style) (from ikiwiki) + while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+ + Extensions\s*=\s*{([^}]+)}/sgx) { + my $lang = $1; + foreach my $bit (split(/,/, $2)) { + $bit =~ s/.*"(.*)".*/$1/s; + $ext2lang{$bit} = $lang; + } + } + # AFAIK, all the regexps used by in filetypes.conf distributed + # by highlight work as Perl REs + while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+ + Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) { + my ($lang, $re) = ($1, $2); + eval { + my $perl_re = qr/$re/; + push @shebang, [ $lang, $perl_re ]; + }; + if ($@) { + warn "$lang shebang=[[$re]] did not work in Perl: $@"; + } + } + (\%ext2lang, \@shebang); +} + +sub new { + my ($class) = @_; + my $dir = highlight::DataDir->new; + $dir->initSearchDirectories(''); + my ($ext2lang, $shebang) = _parse_filetypes($dir); + bless { + -dir => $dir, + -ext2lang => $ext2lang, + -shebang => $shebang, + }, $class; +} + +sub _shebang2lang ($$) { + my ($self, $str) = @_; + my $shebang = $self->{-shebang}; + foreach my $s (@$shebang) { + return $s->[0] if $$str =~ $s->[1]; + } + undef; +} + +sub _path2lang ($$) { + my ($self, $path) = @_; + my ($ext) = ($path =~ m!([^\\/\.]+)\z!); + $ext = lc($ext); + $self->{-ext2lang}->{$ext} || $ext; +} + +sub do_hl { + my ($self, $str, $path) = @_; + my $lang = _path2lang($self, $path) if defined $path; + my $dir = $self->{-dir}; + my $langpath; + if (defined $lang) { + $langpath = $dir->getLangPath("$lang.lang") or return; + $langpath = undef unless -f $langpath; + } + unless (defined $langpath) { + $lang = _shebang2lang($self, $str) or return; + $langpath = $dir->getLangPath("$lang.lang") or return; + $langpath = undef unless -f $langpath; + } + return unless defined $langpath; + + my $gen = $self->{$langpath} ||= do { + my $g = highlight::CodeGenerator::getInstance($highlight::HTML); + $g->setFragmentCode(1); # generate html fragment + $g->setHTMLEnclosePreTag(1); # include
+
+		# whatever theme works
+		my $themepath = $dir->getThemePath('print.theme');
+		$g->initTheme($themepath);
+		$g->loadLanguage($langpath);
+		$g->setEncoding('utf-8');
+		$g;
+	};
+	\($gen->generateString($$str))
+}
+
+# SWIG instances aren't reference-counted, but $self is;
+# so we need to delete all the CodeGenerator instances manually
+# at our own destruction
+sub DESTROY {
+	my ($self) = @_;
+	foreach my $gen (values %$self) {
+		if (ref($gen) eq 'highlight::CodeGenerator') {
+			highlight::CodeGenerator::deleteInstance($gen);
+		}
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
new file mode 100644
index 00000000..3d6ca03b
--- /dev/null
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -0,0 +1,73 @@
+# Copyright (C) 2019 all contributors 
+# License: AGPL-3.0+ 
+
+# Standalone PSGI app to provide syntax highlighting as-a-service
+# via "highlight" Perl module ("libhighlight-perl" in Debian).
+#
+# This allows exposing highlight as a persistent HTTP service for
+# other scripts via HTTP PUT requests.  PATH_INFO will be used
+# as a hint for detecting the language for highlight.
+#
+# The following example using curl(1) will do the right thing
+# regarding the file extension:
+#
+#   curl -HExpect: -T /path/to/file http://example.com/
+#
+# You can also force a file extension by giving a path
+# (in this case, "c") via:
+#
+#   curl -HExpect: -T /path/to/file http://example.com/x.c
+
+package PublicInbox::WwwHighlight;
+use strict;
+use warnings;
+use HTTP::Status qw(status_message);
+use parent qw(PublicInbox::HlMod);
+
+# TODO: support highlight(1) for distros which don't package the
+# SWIG extension.  Also, there may be admins who don't want to
+# have ugly SWIG-generated code in a long-lived Perl process.
+
+sub r ($) {
+	my ($code) = @_;
+	my $msg = status_message($code);
+	my $len = length($msg);
+	[ $code, [qw(Content-Type text/plain Content-Length), $len], [$msg] ]
+}
+
+# another slurp API hogging up all my memory :<
+# This is capped by whatever the PSGI server allows,
+# $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} for PublicInbox::HTTP (10 MB)
+sub read_in_full ($) {
+	my ($env) = @_;
+
+	my $in = $env->{'psgi.input'};
+	my $off = 0;
+	my $buf = '';
+	my $len = $env->{CONTENT_LENGTH} || 8192;
+	while (1) {
+		my $r = $in->read($buf, $len, $off);
+		last unless defined $r;
+		return \$buf if $r == 0;
+		$off += $r;
+	}
+	$env->{'psgi.errors'}->print("input read error: $!\n");
+}
+
+# entry point for PSGI
+sub call {
+	my ($self, $env) = @_;
+	my $req_method = $env->{REQUEST_METHOD};
+
+	return r(405) if $req_method ne 'PUT';
+
+	my $bref = read_in_full($env) or return r(500);
+	$bref = $self->do_hl($bref, $env->{PATH_INFO});
+
+	my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+	push @$h, 'Content-Length', bytes::length($$bref);
+
+	[ 200, $h, [ $$bref ] ]
+}
+
+1;
-- 
cgit v1.2.3-24-ge0c7