user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] view: do not redundantly obfuscate addresses
@ 2020-04-06  8:32  7% Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2020-04-06  8:32 UTC (permalink / raw)
  To: meta

We shouldn't rerun the address obfuscator on data we've
already run through.  Instead, run through the unescaped
text part and substitute the UTF-8 "\x{2022}" substitution
before it hits HTML escaping

Fixes: 9bdd81dc16ba6511 ("view: msg_iter calls add_body_text directly")
---
 lib/PublicInbox/Hval.pm | 2 +-
 lib/PublicInbox/View.pm | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 79005d21..23233f0e 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -78,7 +78,7 @@ sub prurl ($$) {
 # However, • was chosen to make copy+paste errors more obvious
 sub obfuscate_addrs ($$;$) {
 	my $ibx = $_[0];
-	my $repl = $_[2] || '•';
+	my $repl = $_[2] // '•';
 	my $re = $ibx->{-no_obfuscate_re}; # regex of domains
 	my $addrs = $ibx->{-no_obfuscate}; # { adddress => 1 }
 	$_[1] =~ s/(([\w\.\+=\-]+)\@([\w\-]+\.[\w\.\-]+))/
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index ddd94e48..33b323dc 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -532,6 +532,9 @@ sub add_text_body { # callback for msg_iter
 	# link generation in diffs with the extra '%0D'
 	$s =~ s/\r\n/\n/sg;
 
+	# will be escaped to `•' in HTML
+	obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};
+
 	# always support diff-highlighting, but we can't linkify hunk
 	# headers for solver unless some coderepo are configured:
 	my $diff;
@@ -589,8 +592,6 @@ sub add_text_body { # callback for msg_iter
 		}
 		undef $cur; # free memory
 	}
-
-	obfuscate_addrs($ibx, $$rv) if $ibx->{obfuscate};
 }
 
 sub _msg_page_prepare_obuf {

^ permalink raw reply related	[relevance 7%]

* [PATCH 26/30] view: msg_iter calls add_body_text directly
  2019-12-25  7:50  6% [PATCH 00/30] www: eliminate most per-request closures Eric Wong
@ 2019-12-25  7:51  6% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2019-12-25  7:51 UTC (permalink / raw)
  To: meta

No need to waste several kilobytes creating an anonymous sub for
every invocation of msg_iter.
---
 lib/PublicInbox/View.pm | 53 ++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 6f827754..c38a1289 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -269,8 +269,10 @@ sub index_entry {
 	$rv .= "\n";
 
 	# scan through all parts, looking for displayable text
-	my $ibx = $ctx->{-inbox};
-	msg_iter($mime, sub { $rv .= add_text_body($mhref, $ctx, $_[0]) });
+	$ctx->{mhref} = $mhref;
+	$ctx->{rv} = \$rv;
+	msg_iter($mime, \&add_text_body, $ctx);
+	delete $ctx->{rv};
 
 	# add the footer
 	$rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -500,12 +502,13 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 }
 
 sub multipart_text_as_html {
-	my ($mime, $upfx, $ctx) = @_;
-	my $rv = "";
+	my ($mime, $mhref, $ctx) = @_;
+	$ctx->{mhref} = $mhref;
+	$ctx->{rv} = \(my $rv = '');
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, sub { $rv .= add_text_body($upfx, $ctx, $_[0]) });
-	$rv;
+	msg_iter($mime, \&add_text_body, $ctx);
+	${delete $ctx->{rv}};
 }
 
 sub flush_quote {
@@ -523,7 +526,7 @@ sub flush_quote {
 }
 
 sub attach_link ($$$$;$) {
-	my ($upfx, $ct, $p, $fn, $err) = @_;
+	my ($ctx, $ct, $p, $fn, $err) = @_;
 	my ($part, $depth, @idx) = @$p;
 	my $nl = $idx[-1] > 1 ? "\n" : '';
 	my $idx = join('.', @idx);
@@ -544,29 +547,29 @@ sub attach_link ($$$$;$) {
 	} else {
 		$sfn = 'a.bin';
 	}
-	my $ret = qq($nl<a\nhref="$upfx$idx-$sfn">);
+	my $rv = $ctx->{rv};
+	$$rv .= qq($nl<a\nhref="$ctx->{mhref}$idx-$sfn">);
 	if ($err) {
-		$ret .=
-"[-- Warning: decoded text below may be mangled --]\n";
+		$$rv .= "[-- Warning: decoded text below may be mangled --]\n";
 	}
-	$ret .= "[-- Attachment #$idx: ";
+	$$rv .= "[-- Attachment #$idx: ";
 	my $ts = "Type: $ct, Size: $size bytes";
 	$desc = ascii_html($desc);
-	$ret .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
-	$ret .= "</a>\n";
+	$$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
+	$$rv .= "</a>\n";
+	undef;
 }
 
-sub add_text_body {
-	my ($upfx, $ctx, $p) = @_;
+sub add_text_body { # callback for msg_iter
+	my ($p, $ctx) = @_;
+	my $upfx = $ctx->{mhref};
 	my $ibx = $ctx->{-inbox};
-	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
 	# $p - from msg_iter: [ Email::MIME, depth, @idx ]
 	my ($part, $depth, @idx) = @$p;
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
 	my ($s, $err) = msg_part_text($part, $ct);
-
-	return attach_link($upfx, $ct, $p, $fn) unless defined $s;
+	return attach_link($ctx, $ct, $p, $fn) unless defined $s;
 
 	# makes no difference to browsers, and don't screw up filename
 	# link generation in diffs with the extra '%0D'
@@ -607,29 +610,29 @@ sub add_text_body {
 	# split off quoted and unquoted blocks:
 	my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
 	$s = '';
+	my $rv = $ctx->{rv};
 	if (defined($fn) || $depth > 0 || $err) {
 		# badly-encoded message with $err? tell the world about it!
-		$s .= attach_link($upfx, $ct, $p, $fn, $err);
-		$s .= "\n";
+		attach_link($ctx, $ct, $p, $fn, $err);
+		$$rv .= "\n";
 	}
 	my $l = PublicInbox::Linkify->new;
 	foreach my $cur (@sections) {
 		if ($cur =~ /\A>/) {
-			flush_quote(\$s, $l, \$cur);
+			flush_quote($rv, $l, \$cur);
 		} elsif ($diff) {
 			@$diff = split(/^/m, $cur);
 			$cur = undef;
-			flush_diff(\$s, $ctx, $l);
+			flush_diff($rv, $ctx, $l);
 		} else {
 			# regular lines, OK
 			$l->linkify_1($cur);
-			$s .= $l->linkify_2(ascii_html($cur));
+			$$rv .= $l->linkify_2(ascii_html($cur));
 			$cur = undef;
 		}
 	}
 
-	obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
-	$s;
+	obfuscate_addrs($ibx, $$rv) if $ibx->{obfuscate};
 }
 
 sub _msg_html_prepare {

^ permalink raw reply related	[relevance 6%]

* [PATCH 00/30] www: eliminate most per-request closures
@ 2019-12-25  7:50  6% Eric Wong
  2019-12-25  7:51  6% ` [PATCH 26/30] view: msg_iter calls add_body_text directly Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2019-12-25  7:50 UTC (permalink / raw)
  To: meta

Closures (aka "anonymous subs") tack several KB of memory onto
every WWW request/response, decreasing scalability and
performance of our WWW endpoints.  They also increase human
review time to check for reference cycles.

Similar changes to -nntpd and the generic parts of -httpd were
also done recently:
https://public-inbox.org/meta/20191221235319.27082-1-e@80x24.org/
https://public-inbox.org/meta/20191221080007.27810-1-e@80x24.org/

These could still use some naming improvements, and it's been
pretty tiring writing the same-ish commit message over and over.

All these changes around eliminating closures also make it
easier to port our codebase to languages which lack closures.

Fwiw, I've been brainstorming ideas to create a new, refcounted
language where cyclic references are impossible by design.  Such
a design would not be possible if closures were implemented; but
doable otherwise by taking a hint from *nix FSes.

Eric Wong (30):
  git: allow async_cat to pass arg to callback
  httpd/async: support passing arg to callbacks
  qspawn: remove some anonymous subs for psgi_qx
  qspawn: disambiguate command vs PSGI env
  qspawn: replace anonymous $end callbacks w/ event_step
  msg_iter: provide means to stop using anonymous subs
  qspawn: reduce local vars, de-anonymize rd_hdr
  httpd/async: get rid of ephemeral main_cb
  qspawn: psgi_return: initial cb can be named
  qspawn: psgi_return_start: hoist out from psgi_return
  qspawn: psgi_qx: eliminate anonymous subs
  qspawn: drop "qspawn.filter" support, for now
  qspawn: psgi_return: allow non-anon parse_hdr callback
  githttpbackend: split out wwwstatic
  www: lazy load Plack::Util
  mboxgz: pass $ctx to callback to avoid anon subs
  feed: avoid anonymous subs
  config: each_inbox: pass user arg to callback
  view: avoid anon sub in stream_thread
  view: msg_html: stop using an anonymous sub
  contentid: no anonymous sub
  wwwtext: avoid anonymous sub in response
  searchview: pass named subs to Www*Stream
  view: thread_html: pass named sub to WwwStream
  searchview: remove anonymous sub when sorting threads by relevance
  view: msg_iter calls add_body_text directly
  wwwattach: avoid anonymous sub for msg_iter
  viewvcs: avoid anonymous sub for HTML response
  solvergit: allow passing arg to user-supplied callback
  search: retry_reopen passes user arg to callback

 MANIFEST                          |   1 +
 lib/PublicInbox/Cgit.pm           |  19 +-
 lib/PublicInbox/Config.pm         |  11 +-
 lib/PublicInbox/ContentId.pm      |  53 +++---
 lib/PublicInbox/ExtMsg.pm         |  58 +++---
 lib/PublicInbox/Feed.pm           |  51 +++--
 lib/PublicInbox/GetlineBody.pm    |  12 +-
 lib/PublicInbox/Git.pm            |  14 +-
 lib/PublicInbox/GitHTTPBackend.pm |  99 +---------
 lib/PublicInbox/HTTPD/Async.pm    |  56 +++---
 lib/PublicInbox/Mbox.pm           | 131 +++++++------
 lib/PublicInbox/MboxGz.pm         |   2 +-
 lib/PublicInbox/MsgIter.pm        |   8 +-
 lib/PublicInbox/NewsWWW.pm        |  16 +-
 lib/PublicInbox/Qspawn.pm         | 296 +++++++++++++++---------------
 lib/PublicInbox/Search.pm         |  16 +-
 lib/PublicInbox/SearchMsg.pm      |   9 +-
 lib/PublicInbox/SearchView.pm     | 100 +++++-----
 lib/PublicInbox/SolverGit.pm      | 149 ++++++++-------
 lib/PublicInbox/View.pm           | 187 ++++++++++---------
 lib/PublicInbox/ViewVCS.pm        | 111 ++++++-----
 lib/PublicInbox/WWW.pm            |   2 +-
 lib/PublicInbox/WwwAtomStream.pm  |   2 +-
 lib/PublicInbox/WwwAttach.pm      |  49 ++---
 lib/PublicInbox/WwwListing.pm     |  37 ++--
 lib/PublicInbox/WwwStatic.pm      | 105 +++++++++++
 lib/PublicInbox/WwwText.pm        |  20 +-
 t/git.t                           |  21 +++
 t/qspawn.t                        |  19 +-
 29 files changed, 882 insertions(+), 772 deletions(-)
 create mode 100644 lib/PublicInbox/WwwStatic.pm


^ permalink raw reply	[relevance 6%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2019-12-25  7:50  6% [PATCH 00/30] www: eliminate most per-request closures Eric Wong
2019-12-25  7:51  6% ` [PATCH 26/30] view: msg_iter calls add_body_text directly Eric Wong
2020-04-06  8:32  7% [PATCH] view: do not redundantly obfuscate addresses Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).