user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/2] selective obfuscation
@ 2017-06-23 22:34  7% Eric Wong
  2017-06-23 22:34  4% ` [PATCH 2/2] allow admins to configure non-obfuscated addresses/domains Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2017-06-23 22:34 UTC (permalink / raw)
  To: meta

Obfuscating well-known, publically known addresses such as the
list address is pointless and poses accessibility problems.
Mitigate that by allowing admins to avoid obfuscating certain
addresses.

For example, I currently have this in my config:

==> ~/.public-inbox/config <==
	[publicinbox]
		noObfuscate = @80x24.org @vger.kernel.org
		noObfuscate = @public-inbox.org @nongnu.org @googlegroups.com
		noObfuscate = @ruby-lang.org @debian.org @lists.debian.org

Eric Wong (2):
      config: assume lists have multiple addresses
      allow admins to configure non-obfuscated addresses/domains

 MANIFEST                      |  1 +
 lib/PublicInbox/Config.pm     | 42 +++++++++++++++++++++++++++-------
 lib/PublicInbox/Hval.pm       | 15 +++++++++++-
 lib/PublicInbox/SearchView.pm |  9 ++++----
 lib/PublicInbox/View.pm       | 53 +++++++++++++++++++++++--------------------
 t/config.t                    | 30 +++++++++++++++++++++++-
 t/hval.t                      | 33 +++++++++++++++++++++++++++
 7 files changed, 144 insertions(+), 39 deletions(-)

^ permalink raw reply	[relevance 7%]

* [PATCH 2/2] allow admins to configure non-obfuscated addresses/domains
  2017-06-23 22:34  7% [PATCH 0/2] selective obfuscation Eric Wong
@ 2017-06-23 22:34  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2017-06-23 22:34 UTC (permalink / raw)
  To: meta

We will also treat all known list addresses as non-obfuscated.

By setting publicinbox.noObfuscate in ~/.public-inbox/config,
this will allow users to disable address obfuscation on a
per-domain or per-address basis.
---
 MANIFEST                      |  1 +
 lib/PublicInbox/Config.pm     | 35 ++++++++++++++++++++++++++--
 lib/PublicInbox/Hval.pm       | 15 +++++++++++-
 lib/PublicInbox/SearchView.pm |  9 ++++----
 lib/PublicInbox/View.pm       | 53 +++++++++++++++++++++++--------------------
 t/config.t                    | 28 +++++++++++++++++++++++
 t/hval.t                      | 33 +++++++++++++++++++++++++++
 7 files changed, 142 insertions(+), 32 deletions(-)
 create mode 100644 t/hval.t

diff --git a/MANIFEST b/MANIFEST
index 43ac991..983bd1e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -142,6 +142,7 @@ t/httpd-corner.psgi
 t/httpd-corner.t
 t/httpd-unix.t
 t/httpd.t
+t/hval.t
 t/import.t
 t/inbox.t
 t/init.t
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 2be485e..369d9bd 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -20,7 +20,28 @@ sub new {
 	$self->{-by_addr} ||= {};
 	$self->{-by_name} ||= {};
 	$self->{-by_newsgroup} ||= {};
+	$self->{-no_obfuscate} ||= {};
 	$self->{-limiters} ||= {};
+
+	if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
+		$no = [ $no ] if ref($no) ne 'ARRAY';
+		my @domains;
+		foreach my $n (@$no) {
+			my @n = split(/\s+/, $n);
+			foreach (@n) {
+				if (/\S+@\S+/) { # full address
+					$self->{-no_obfuscate}->{lc $_} = 1;
+				} else {
+					# allow "example.com" or "@example.com"
+					s/\A@//;
+					push @domains, quotemeta($_);
+				}
+			}
+		}
+		my $nod = join('|', @domains);
+		$self->{-no_obfuscate_re} = qr/(?:$nod)\z/i;
+	}
+
 	$self;
 }
 
@@ -127,6 +148,7 @@ sub git_config_dump {
 		}
 	}
 	close $fh or die "failed to close ($cmd) pipe: $?";
+
 	\%rv;
 }
 
@@ -151,7 +173,6 @@ sub _fill {
 			warn "Ignoring $pfx.$k=$v in config, not boolean\n";
 		}
 	}
-
 	# TODO: more arrays, we should support multi-value for
 	# more things to encourage decentralization
 	foreach my $k (qw(address altid nntpmirror)) {
@@ -166,11 +187,21 @@ sub _fill {
 	$rv->{name} = $name;
 	$rv->{-pi_config} = $self;
 	$rv = PublicInbox::Inbox->new($rv);
-	$self->{-by_addr}->{lc($_)} = $rv foreach @{$rv->{address}};
+	foreach (@{$rv->{address}}) {
+		my $lc_addr = lc($_);
+		$self->{-by_addr}->{$lc_addr} = $rv;
+		$self->{-no_obfuscate}->{$lc_addr} = 1;
+	}
 	if (my $ng = $rv->{newsgroup}) {
 		$self->{-by_newsgroup}->{$ng} = $rv;
 	}
 	$self->{-by_name}->{$name} = $rv;
+	if ($rv->{obfuscate}) {
+		$rv->{-no_obfuscate} = $self->{-no_obfuscate};
+		$rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
+		each_inbox($self, sub {}); # noop to populate -no_obfuscate
+	}
+	$rv
 }
 
 1;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 2379b91..8005088 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -91,6 +91,19 @@ sub prurl {
 # &#8228; &#183; and &#890; were also candidates:
 #   https://public-inbox.org/meta/20170615015250.GA6484@starla/
 # However, &#8226; was chosen to make copy+paste errors more obvious
-sub obfuscate_addrs ($) { $_[0] =~ s/(\S+@[^\.]+)\./$1&#8226;/g }
+sub obfuscate_addrs ($$) {
+	my $ibx = $_[0];
+	my $re = $ibx->{-no_obfuscate_re}; # regex of domains
+	my $addrs = $ibx->{-no_obfuscate}; # { adddress => 1 }
+	$_[1] =~ s/([\w\.\+=\-]+\@([\w\-]+\.[\w\.\-]+))/
+		my ($addr, $domain) = ($1, $2);
+		if ($addrs->{$addr} || ((defined $re && $domain =~ $re))) {
+			$addr;
+		} else {
+			$addr =~ s!([^\.]+)\.!$1&#8226;!g;
+			$addr
+		}
+		/sge;
+}
 
 1;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 777710e..a597403 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -89,7 +89,8 @@ sub mset_summary {
 	my $pfx = ' ' x $pad;
 	my $res = \($ctx->{-html_tip});
 	my $srch = $ctx->{srch};
-	my $obfs = $ctx->{-inbox}->{obfuscate};
+	my $ibx = $ctx->{-inbox};
+	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
 	foreach my $m ($mset->items) {
 		my $rank = sprintf("%${pad}d", $m->get_rank + 1);
 		my $pct = $m->get_percent;
@@ -103,9 +104,9 @@ sub mset_summary {
 		}
 		my $s = ascii_html($smsg->subject);
 		my $f = ascii_html($smsg->from_name);
-		if ($obfs) {
-			obfuscate_addrs($s);
-			obfuscate_addrs($f);
+		if ($obfs_ibx) {
+			obfuscate_addrs($obfs_ibx, $s);
+			obfuscate_addrs($obfs_ibx, $f);
 		}
 		my $ts = PublicInbox::View::fmt_ts($smsg->ts);
 		my $mid = PublicInbox::Hval->new_msgid($smsg->mid)->{href};
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 388207c..e96f773 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -24,12 +24,13 @@ sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD };
 sub msg_html {
 	my ($ctx, $mime) = @_;
 	my $hdr = $mime->header_obj;
-	my $obfs = $ctx->{-inbox}->{obfuscate};
-	my $tip = _msg_html_prepare($hdr, $ctx, $obfs);
+	my $ibx = $ctx->{-inbox};
+	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
+	my $tip = _msg_html_prepare($hdr, $ctx, $obfs_ibx);
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		my ($nr, undef) = @_;
 		if ($nr == 1) {
-			$tip . multipart_text_as_html($mime, '', $obfs) .
+			$tip . multipart_text_as_html($mime, '', $obfs_ibx) .
 				'</pre><hr>'
 		} elsif ($nr == 2) {
 			# fake an EOF if generating the footer fails;
@@ -138,11 +139,11 @@ sub index_entry {
 
 	my $root_anchor = $ctx->{root_anchor} || '';
 	my $irt;
-	my $obfs = $ctx->{-obfuscate};
+	my $obfs_ibx = $ctx->{-obfs_ibx};
 
 	my $rv = "<a\nhref=#e$id\nid=m$id>*</a> ";
 	$subj = '<b>'.ascii_html($subj).'</b>';
-	obfuscate_addrs($subj) if $obfs;
+	obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
 	$subj = "<u\nid=u>$subj</u>" if $root_anchor eq $id_m;
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
@@ -150,12 +151,12 @@ sub index_entry {
 	foreach my $f (qw(To Cc)) {
 		my $dst = _hdr_names_html($hdr, $f);
 		if ($dst ne '') {
-			obfuscate_addrs($dst) if $obfs;
+			obfuscate_addrs($obfs_ibx, $dst) if $obfs_ibx;
 			push @tocc, "$f: $dst";
 		}
 	}
 	my $from = _hdr_names_html($hdr, 'From');
-	obfuscate_addrs($from) if $obfs;
+	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
 	$rv .= "From: $from @ "._msg_date($hdr)." UTC";
 	my $upfx = $ctx->{-upfx};
 	my $mhref = $upfx . mid_escape($mid_raw) . '/';
@@ -173,7 +174,7 @@ sub index_entry {
 	$rv .= "\n";
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs, $_[0]) });
+	msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
 
 	# add the footer
 	$rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -319,7 +320,7 @@ sub stream_thread ($$) {
 	}
 	return missing_thread($ctx) unless $mime;
 
-	$ctx->{-obfuscate} = $ctx->{-inbox}->{obfuscate};
+	$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
 	$mime = PublicInbox::MIME->new($mime);
 	$ctx->{-title_html} = ascii_html($mime->header('Subject'));
 	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
@@ -374,14 +375,14 @@ sub thread_html {
 	my $rootset = thread_results($msgs);
 
 	# reduce hash lookups in pre_thread->skel_dump
-	$ctx->{-obfuscate} = $ctx->{-inbox}->{obfuscate};
+	my $inbox = $ctx->{-inbox};
+	$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
 	walk_thread($rootset, $ctx, *pre_thread);
 
 	$skel .= '</pre>';
 	return stream_thread($rootset, $ctx) unless $ctx->{flat};
 
 	# flat display: lazy load the full message from smsg
-	my $inbox = $ctx->{-inbox};
 	my $mime;
 	while ($mime = shift @$msgs) {
 		$mime = $inbox->msg_by_smsg($mime) and last;
@@ -406,11 +407,11 @@ sub thread_html {
 }
 
 sub multipart_text_as_html {
-	my ($mime, $upfx, $obfs) = @_;
+	my ($mime, $upfx, $obfs_ibx) = @_;
 	my $rv = "";
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs, $_[0]) });
+	msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) });
 	$rv;
 }
 
@@ -463,7 +464,7 @@ sub attach_link ($$$$;$) {
 }
 
 sub add_text_body {
-	my ($upfx, $obfs, $p) = @_;
+	my ($upfx, $obfs_ibx, $p) = @_;
 	# $p - from msg_iter: [ Email::MIME, depth, @idx ]
 	my ($part, $depth) = @$p; # attachment @idx is unused
 	my $ct = $part->content_type || 'text/plain';
@@ -515,10 +516,10 @@ sub add_text_body {
 
 	if (@quot) { # ugh, top posted
 		flush_quote(\$s, $l, \@quot);
-		obfuscate_addrs($s) if $obfs;
+		obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
 		$s;
 	} else {
-		obfuscate_addrs($s) if $obfs;
+		obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
 		if ($s =~ /\n\z/s) { # common, last line ends with a newline
 			$s;
 		} else { # some editors don't do newlines...
@@ -528,7 +529,7 @@ sub add_text_body {
 }
 
 sub _msg_html_prepare {
-	my ($hdr, $ctx, $obfs) = @_;
+	my ($hdr, $ctx, $obfs_ibx) = @_;
 	my $srch = $ctx->{srch} if $ctx;
 	my $atom = '';
 	my $rv = "<pre\nid=b>"; # anchor for body start
@@ -547,7 +548,7 @@ sub _msg_html_prepare {
 		if ($h eq 'From') {
 			my @n = PublicInbox::Address::names($v->raw);
 			$title[1] = ascii_html(join(', ', @n));
-			obfuscate_addrs($title[1]) if $obfs;
+			obfuscate_addrs($obfs_ibx, $title[1]) if $obfs_ibx;
 		} elsif ($h eq 'Subject') {
 			$title[0] = $v->as_html;
 			if ($srch) {
@@ -557,7 +558,7 @@ sub _msg_html_prepare {
 			}
 		}
 		$v = $v->as_html;
-		obfuscate_addrs($v) if $obfs;
+		obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
 		$rv .= "$h: $v\n";
 
 	}
@@ -605,7 +606,8 @@ sub thread_skel {
 	$sres = load_results($srch, $sres);
 
 	# reduce hash lookups in skel_dump
-	$ctx->{-obfuscate} = $ctx->{-inbox}->{obfuscate};
+	my $ibx = $ctx->{-inbox};
+	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	walk_thread(thread_results($sres), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
@@ -763,8 +765,8 @@ sub skel_dump {
 	my $mid = $smsg->{mid};
 
 	my $f = ascii_html($smsg->from_name);
-	my $obfs = $ctx->{-obfuscate};
-	obfuscate_addrs($f) if $obfs;
+	my $obfs_ibx = $ctx->{-obfs_ibx};
+	obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx;
 
 	my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
 	my $attr = $f;
@@ -799,7 +801,7 @@ sub skel_dump {
 		$ctx->{seen}->{$h} = 1;
 		$subj = PublicInbox::Hval->new($subj);
 		$subj = $subj->as_html;
-		obfuscate_addrs($subj) if $obfs;
+		obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
 	}
 	my $m;
 	my $id = '';
@@ -896,7 +898,8 @@ sub dump_topics {
 	}
 
 	my @out;
-	my $obfs = $ctx->{-inbox}->{obfuscate};
+	my $ibx = $ctx->{-inbox};
+	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
 
 	# sort by recency, this allows new posts to "bump" old topics...
 	foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) {
@@ -928,7 +931,7 @@ sub dump_topics {
 			my $subj = $ex[$i + 1];
 			$mid = delete $seen->{$subj};
 			$subj = ascii_html($subj);
-			obfuscate_addrs($subj) if $obfs;
+			obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
 			$href = mid_escape($mid);
 			$s .= indent_for($level) . TCHILD;
 			$s .= "<a\nhref=\"$href/T/#u\">$subj</a>\n";
diff --git a/t/config.t b/t/config.t
index 437f1d1..353dac6 100644
--- a/t/config.t
+++ b/t/config.t
@@ -86,4 +86,32 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1);
 	is($ibx->{nntpserver}, 'news.alt.example.com','per-inbox NNTP server');
 }
 
+# no obfuscate domains
+{
+	my $pfx = "publicinbox.test";
+	my $pfx2 = "publicinbox.foo";
+	my %h = (
+		"$pfx.address" => 'test@example.com',
+		"$pfx.mainrepo" => '/path/to/non/existent',
+		"$pfx2.address" => 'foo@example.com',
+		"$pfx2.mainrepo" => '/path/to/foo',
+		lc("publicinbox.noObfuscate") =>
+			'public-inbox.org @example.com z@EXAMPLE.com',
+		"$pfx.obfuscate" => 'true', # :<
+	);
+	my %tmp = %h;
+	my $cfg = PublicInbox::Config->new(\%tmp);
+	my $ibx = $cfg->lookup_name('test');
+	my $re = $ibx->{-no_obfuscate_re};
+	like('meta@public-inbox.org', $re,
+		'public-inbox.org address not to be obfuscated');
+	like('t@example.com', $re, 'example.com address not to be obfuscated');
+	unlike('t@example.comM', $re, 'example.comM address does not match');
+	is_deeply($ibx->{-no_obfuscate}, {
+			'test@example.com' => 1,
+			'foo@example.com' => 1,
+			'z@example.com' => 1,
+		}, 'known addresses populated');
+}
+
 done_testing();
diff --git a/t/hval.t b/t/hval.t
new file mode 100644
index 0000000..dcbd838
--- /dev/null
+++ b/t/hval.t
@@ -0,0 +1,33 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok 'PublicInbox::Hval';
+
+my $ibx = {
+	-no_obfuscate_re => qr/(?:example\.com)\z/i,
+	-no_obfuscate => {
+		'meta@public-inbox.org' => 1,
+	}
+};
+
+my $html = <<'EOF';
+hello@example.comm
+hello@example.com
+meta@public-inbox.org
+test@public-inbox.org
+EOF
+
+PublicInbox::Hval::obfuscate_addrs($ibx, $html);
+
+my $exp = <<'EOF';
+hello@example&#8226;comm
+hello@example.com
+meta@public-inbox.org
+test@public-inbox&#8226;org
+EOF
+
+is($html, $exp, 'only obfuscated relevant addresses');
+
+done_testing();
-- 
EW


^ permalink raw reply related	[relevance 4%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2017-06-23 22:34  7% [PATCH 0/2] selective obfuscation Eric Wong
2017-06-23 22:34  4% ` [PATCH 2/2] allow admins to configure non-obfuscated addresses/domains Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).