user/dev discussion of public-inbox itself
 help / color / Atom feed
decfd714d7ba5e0551b125de94cf16d056642ca6 blob 3646 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
 
# Copyright (C) 2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# I have no idea how stable or safe this is for handling untrusted
# input, but it seems to have been around for a while, and the
# highlight(1) executable is supported by gitweb and cgit.
#
# I'm also unsure about API stability, but highlight 3.x seems to
# have been around a few years and ikiwiki (apparently the only
# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
# major changes to support it in recent years.
#
# Some code stolen from ikiwiki (GPL-2.0+)
# wrapper for SWIG-generated highlight.pm bindings
package PublicInbox::HlMod;
use strict;
use warnings;
use highlight; # SWIG-generated stuff

sub _parse_filetypes ($) {
	my $ft_conf = $_[0]->searchFile('filetypes.conf') or
				die 'filetypes.conf not found by highlight';
	open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
	local $/;
	my $cfg = <$fh>;
	my %ext2lang;
	my @shebang; # order matters

	# Hrm... why isn't this exposed by the highlight API?
	# highlight >= 3.2 format (bind-style) (from ikiwiki)
	while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
			 Extensions\s*=\s*{([^}]+)}/sgx) {
		my $lang = $1;
		foreach my $bit (split(/,/, $2)) {
			$bit =~ s/.*"(.*)".*/$1/s;
			$ext2lang{$bit} = $lang;
		}
	}
	# AFAIK, all the regexps used by in filetypes.conf distributed
	# by highlight work as Perl REs
	while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
			Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
		my ($lang, $re) = ($1, $2);
		eval {
			my $perl_re = qr/$re/;
			push @shebang, [ $lang, $perl_re ];
		};
		if ($@) {
			warn "$lang shebang=[[$re]] did not work in Perl: $@";
		}
	}
	(\%ext2lang, \@shebang);
}

sub new {
	my ($class) = @_;
	my $dir = highlight::DataDir->new;
	$dir->initSearchDirectories('');
	my ($ext2lang, $shebang) = _parse_filetypes($dir);
	bless {
		-dir => $dir,
		-ext2lang => $ext2lang,
		-shebang => $shebang,
	}, $class;
}

sub _shebang2lang ($$) {
	my ($self, $str) = @_;
	my $shebang = $self->{-shebang};
	foreach my $s (@$shebang) {
		return $s->[0] if $$str =~ $s->[1];
	}
	undef;
}

sub _path2lang ($$) {
	my ($self, $path) = @_;
	my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
	$ext = lc($ext);
	$self->{-ext2lang}->{$ext} || $ext;
}

sub do_hl {
	my ($self, $str, $path) = @_;
	my $lang = _path2lang($self, $path) if defined $path;
	my $dir = $self->{-dir};
	my $langpath;
	if (defined $lang) {
		$langpath = $dir->getLangPath("$lang.lang") or return;
		$langpath = undef unless -f $langpath;
	}
	unless (defined $langpath) {
		$lang = _shebang2lang($self, $str) or return;
		$langpath = $dir->getLangPath("$lang.lang") or return;
		$langpath = undef unless -f $langpath;
	}
	return unless defined $langpath;

	my $gen = $self->{$langpath} ||= do {
		my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
		$g->setFragmentCode(1); # generate html fragment

		# whatever theme works
		my $themepath = $dir->getThemePath('print.theme');
		$g->initTheme($themepath);
		$g->loadLanguage($langpath);
		$g->setEncoding('utf-8');
		$g;
	};

	# we assume $$str is valid UTF-8, but the SWIG binding doesn't
	# know that, so ensure it's marked as UTF-8 even if it isnt...
	my $out = $gen->generateString($$str);
	utf8::decode($out);
	\$out;
}

# SWIG instances aren't reference-counted, but $self is;
# so we need to delete all the CodeGenerator instances manually
# at our own destruction
sub DESTROY {
	my ($self) = @_;
	foreach my $gen (values %$self) {
		if (ref($gen) eq 'highlight::CodeGenerator') {
			highlight::CodeGenerator::deleteInstance($gen);
		}
	}
}

1;
debug log:

solving decfd71 ...
found decfd71 in https://80x24.org/public-inbox.git

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox