From b400772bf3801cb29949cf2ae5021e8e3a8e2d94 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 22 Feb 2018 23:05:46 +0000 Subject: Don't use LIMIT in UPDATE statements ...not all distributions build SQLite with that enabled. [ew: LIMIT shouldn't be necessary because `key' is primary] --- lib/PublicInbox/Msgmap.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index a147b9f3..6b6d1c6e 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -57,7 +57,7 @@ sub meta_accessor { $prev = $dbh->selectrow_array($sql, undef, $key); if (defined $prev) { - $sql = 'UPDATE meta SET val = ? WHERE key = ? LIMIT 1'; + $sql = 'UPDATE meta SET val = ? WHERE key = ?'; $dbh->do($sql, undef, $value, $key); } else { $sql = 'INSERT INTO meta (key,val) VALUES (?,?)'; -- cgit v1.2.3-24-ge0c7 From ebb59815035b42c276a89a585e16e69f51dbdb98 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Tue, 27 Feb 2018 20:25:23 +0000 Subject: searchidx: do not modify Xapian DB while iterating Iterating through a list of documents while modifying them does not seem to be supported in Xapian and it can trigger DatabaseCorruptError exceptions. This only worked with past datasets out of dumb luck. With the work-in-progress "v2" public-inbox layout, this problem might become more visible as the "thread skeleton" is partitioned out to a separate, smaller Xapian database. I've reproduced the problem on both Debian 8.x and 9.x with Xapian 1.2.19 (chert backend) and 1.4.3 (glass backend) respectively. --- lib/PublicInbox/SearchIdx.pm | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 66faed31..5559b39d 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -696,15 +696,22 @@ sub create_ghost { sub merge_threads { my ($self, $winner_tid, $loser_tid) = @_; return if $winner_tid == $loser_tid; - my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid); my $db = $self->{xdb}; - for (; $head != $tail; $head->inc) { - my $docid = $head->get_docid; - my $doc = $db->get_document($docid); - $doc->remove_term('G' . $loser_tid); - $doc->add_term('G' . $winner_tid); - $db->replace_document($docid, $doc); + my $batch_size = 1000; # don't let @ids grow too large to avoid OOM + while (1) { + my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid); + return if $head == $tail; + my @ids; + for (; $head != $tail && @ids < $batch_size; $head->inc) { + push @ids, $head->get_docid; + } + foreach my $docid (@ids) { + my $doc = $db->get_document($docid); + $doc->remove_term('G' . $loser_tid); + $doc->add_term('G' . $winner_tid); + $db->replace_document($docid, $doc); + } } } -- cgit v1.2.3-24-ge0c7 From 747dadaf56334765b29e63e6559e735b914edff9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Mar 2018 09:46:46 +0000 Subject: nntp: improve fairness during XOVER and similar commands For other commands generating long responses, we generally want to yield to another client after emitting 100 . However, XOVER-based responses already query 200 lines worth of responses at a time, so we were sending 20000 lines before yielding to other clients. This may help avoid timeouts for some clients. --- lib/PublicInbox/NNTP.pm | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 56d8e010..1e564634 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -590,9 +590,10 @@ sub long_response ($$$$) { my $err; do { - eval { $cb->(\$beg) }; + eval { $cb->(\$beg, \$lim) }; } until (($err = $@) || $self->{closed} || - ++$beg > $end || !--$lim || $self->{write_buf_size}); + ++$beg > $end || --$lim < 0 || + $self->{write_buf_size}); if ($err || $self->{closed}) { $self->{long_res} = undef; @@ -609,7 +610,7 @@ sub long_response ($$$$) { update_idle_time($self); $self->watch_read(1); } - } elsif (!$lim || $self->{write_buf_size}) { + } elsif ($lim < 0 || $self->{write_buf_size}) { # no recursion, schedule another call ASAP # but only after all pending writes are done update_idle_time($self); @@ -715,11 +716,12 @@ sub hdr_searchmsg ($$$$) { more($self, $xhdr ? r221 : r225); my $off = 0; long_response($self, $beg, $end, sub { - my ($i) = @_; + my ($i, $lim) = @_; my $res = $srch->query_xover($beg, $end, $off); my $msgs = $res->{msgs}; my $nr = scalar @$msgs or return; $off += $nr; + $$lim -= $nr; my $tmp = ''; foreach my $s (@$msgs) { $tmp .= $s->num . ' ' . $s->$field . "\r\n"; @@ -853,11 +855,12 @@ sub cmd_xover ($;$) { my $srch = $self->{ng}->search; my $off = 0; long_response($self, $beg, $end, sub { - my ($i) = @_; + my ($i, $lim) = @_; my $res = $srch->query_xover($beg, $end, $off); my $msgs = $res->{msgs}; my $nr = scalar @$msgs or return; $off += $nr; + $$lim -= $nr; # OVERVIEW.FMT more($self, join("\r\n", map { -- cgit v1.2.3-24-ge0c7 From b912a4ea144004aeedde2e28dee33c6c83dd2273 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Mar 2018 19:05:20 +0000 Subject: nntp: do not drain rbuf if there is a command pending Some clients pipeline requests aggressively (enough to match LINE_MAX) and we should not read from the client socket until we know there's no pending command in our read buffer. Reported-and-tested-by: Sergey Organov --- lib/PublicInbox/NNTP.pm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 1e564634..267fe4b9 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -949,10 +949,12 @@ sub event_write { sub event_read { my ($self) = @_; use constant LINE_MAX => 512; # RFC 977 section 2.3 - my $r = 1; - my $buf = $self->read(LINE_MAX) or return $self->close; - $self->{rbuf} .= $$buf; + if (index($self->{rbuf}, "\n") < 0) { + my $buf = $self->read(LINE_MAX) or return $self->close; + $self->{rbuf} .= $$buf; + } + my $r = 1; while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]+)\r?\n//) { my $line = $1; return $self->close if $line =~ /[[:cntrl:]]/s; -- cgit v1.2.3-24-ge0c7 From 79c034bf6e2a64c09d77e2ac4bf734347d686559 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 19 Mar 2018 07:51:09 +0000 Subject: extmsg: rework partial MID matching to favor current inbox The current inbox is more important for partial Message-ID matching, so we try harder on that to fix common errors before moving onto other inboxes. Then, prevent expensive scanning of other inboxes by requiring a Message-ID length of at least 16 bytes. Finally, we limit the overall partial responses to 200 when scanning other inboxes to avoid excessive memory usage. --- lib/PublicInbox/ExtMsg.pm | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index ab9591f9..e0aabfb1 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -84,17 +84,33 @@ sub ext_msg { my $n_partial = 0; my @partial; - eval { require PublicInbox::Msgmap }; - my $have_mm = $@ ? 0 : 1; - if ($have_mm) { + if (my $mm = $cur->mm) { + my $tmp_mid = $mid; + my $res = $mm->mid_prefixes($tmp_mid, 100); + if ($res && scalar(@$res)) { + $n_partial += scalar(@$res); + push @partial, [ $cur, $res ]; + # fixup common errors: + } elsif ($tmp_mid =~ s,/[tTf],,) { + $res = $mm->mid_prefixes($tmp_mid, 100); + if ($res && scalar(@$res)) { + $n_partial += scalar(@$res); + push @partial, [ $cur, $res ]; + } + } + } + + # can't find a partial match in current inbox, try the others: + if (!$n_partial && length($mid) >= 16) { my $tmp_mid = $mid; again: - unshift @ibx, $cur; foreach my $ibx (@ibx) { my $mm = $ibx->mm or next; - if (my $res = $mm->mid_prefixes($tmp_mid)) { + my $res = $mm->mid_prefixes($tmp_mid, 100); + if ($res && scalar(@$res)) { $n_partial += scalar(@$res); push @partial, [ $ibx, $res ]; + last if $n_partial >= 100; } } # fixup common errors: -- cgit v1.2.3-24-ge0c7 From 3e8a4842d3f0ec51bab024322a934b91ace6f4ed Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 27 Mar 2018 10:46:10 +0000 Subject: extmsg: use news.gmane.org for Message-ID lookups http://mid.gmane.org/ has not worked for a while, but their NNTP server continues to work. Use that and perhaps give NNTP more exposure. Reported-by: Jonathan Corbet --- lib/PublicInbox/ExtMsg.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index e0aabfb1..760614df 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -17,7 +17,7 @@ our @EXT_URL = ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', - 'http://mid.gmane.org/%s', + 'nntp://news.gmane.org/%s', 'https://lists.debian.org/msgid-search/%s', '//docs.FreeBSD.org/cgi/mid.cgi?db=mid&id=%s', 'https://www.w3.org/mid/%s', -- cgit v1.2.3-24-ge0c7 From 703490825ebf9e22e30ab79063a81f8476ad2a0c Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Tue, 27 Mar 2018 21:26:59 +0000 Subject: http: fix modification of read-only value This fails in the rare case we get a partial send() on "\r\n" when writing chunked HTTP responses out. --- lib/PublicInbox/HTTP.pm | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 3dd49be3..bc10814e 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -316,9 +316,12 @@ sub more ($$) { if (MSG_MORE && !$self->{write_buf_size}) { my $n = send($self->{sock}, $_[1], MSG_MORE); if (defined $n) { - my $dlen = length($_[1]); - return 1 if $n == $dlen; # all done! - $_[1] = substr($_[1], $n, $dlen - $n); + my $nlen = length($_[1]) - $n; + return 1 if $nlen == 0; # all done! + eval { $_[1] = substr($_[1], $n, $nlen) }; + if ($@) { # modification of read-only value: + return $self->write(substr($_[1], $n, $nlen)); + } # fall through to normal write: } } -- cgit v1.2.3-24-ge0c7 From 61ffb7233be0ac177dcf4feadba871fea8e025dd Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Tue, 27 Mar 2018 21:27:00 +0000 Subject: githttpbackend: avoid infinite loop on generic PSGI servers We must detect EOF when reading a POST body with standard PSGI servers. This does not affect deployments using the standard public-inbox-httpd; but most smaller inboxes should be able to get away using a generic PSGI server. --- lib/PublicInbox/GitHTTPBackend.pm | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 4d2816a0..6efe5b31 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -280,6 +280,7 @@ sub input_to_file { err($env, "error reading input: $!"); return; } + last if $r == 0; my $off = 0; while ($r > 0) { my $w = syswrite($in, $buf, $r, $off); -- cgit v1.2.3-24-ge0c7 From 051a182852a9eef8b0dc8714c81293daded1d4dc Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 30 Mar 2018 15:25:57 -0600 Subject: Allow specification of the number of search results to return Add an "l=" parameter to the search query syntax to specify how many results should be returned. --- lib/PublicInbox/SearchView.pm | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 53b88c34..bf4415f0 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -35,7 +35,7 @@ sub sres_top_html { my $code = 200; # double the limit for expanded views: my $opts = { - limit => $LIM, + limit => $q->{l}, offset => $q->{o}, mset => 1, relevance => $q->{r}, @@ -182,6 +182,7 @@ sub search_nav_bot { my $total = $mset->get_matches_estimated; my $nr = scalar $mset->items; my $o = $q->{o}; + my $l = $q->{l}; my $end = $o + $nr; my $beg = $o + 1; my $rv = '
';
@@ -191,15 +192,15 @@ sub search_nav_bot {
 	} else {
 		$rv .= "No more results, only $total";
 	}
-	my $n = $o + $LIM;
+	my $n = $o + $l;
 
 	if ($n < $total) {
-		my $qs = $q->qs_html(o => $n);
+		my $qs = $q->qs_html(o => $n, l => $l);
 		$rv .= qq{  next}
 	}
 	if ($o > 0) {
 		$rv .= $n < $total ? '/' : '       ';
-		my $p = $o - $LIM;
+		my $p = $o - $l;
 		my $qs = $q->qs_html(o => ($p > 0 ? $p : 0));
 		$rv .= qq{prev};
 	}
@@ -308,10 +309,15 @@ sub new {
 	my ($class, $qp) = @_;
 
 	my $r = $qp->{r};
+	my $l = $qp->{l} || '200';
+	if (! ($l =~ /(\d+)/ && $l <= $LIM)) {
+		$l = $LIM;
+	}
 	bless {
 		q => $qp->{'q'},
 		x => $qp->{x} || '',
 		o => (($qp->{o} || '0') =~ /(\d+)/),
+		l => $l,
 		r => (defined $r && $r ne '0'),
 	}, $class;
 }
@@ -334,6 +340,9 @@ sub qs_html {
 	if (my $o = $self->{o}) { # ignore o == 0
 		$qs .= "&o=$o";
 	}
+	if (my $l = $self->{l}) {
+		$qs .= "&l=$l";
+	}
 	if (my $r = $self->{r}) {
 		$qs .= "&r";
 	}
-- 
cgit v1.2.3-24-ge0c7


From 3f3d9cf7d88a851721f1f8468e1311a4f0c02ff6 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Sun, 1 Apr 2018 22:58:19 +0000
Subject: searchview: fix non-numeric comparison

We don't want non-fully-numeric limits being compared and
tripping warnings.  While we're at it, avoid hard-coding
'200' and reuse $LIM as the default.
---
 lib/PublicInbox/SearchView.pm | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index bf4415f0..219006a0 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -309,10 +309,8 @@ sub new {
 	my ($class, $qp) = @_;
 
 	my $r = $qp->{r};
-	my $l = $qp->{l} || '200';
-	if (! ($l =~ /(\d+)/ && $l <= $LIM)) {
-		$l = $LIM;
-	}
+	my ($l) = (($qp->{l} || '') =~ /(\d+)/);
+	$l = $LIM if !$l || $l > $LIM;
 	bless {
 		q => $qp->{'q'},
 		x => $qp->{x} || '',
-- 
cgit v1.2.3-24-ge0c7


From 4f0b09919ae9c8823bf6c1fa1452bc27945952a3 Mon Sep 17 00:00:00 2001
From: "Eric Wong (Contractor, The Linux Foundation)" 
Date: Sat, 3 Mar 2018 20:18:34 +0000
Subject: nntp: fix NEWNEWS command

I guess nobody uses this command (slrnpull does not), and
the breakage was not noticed until I started writing new
tests for multi-MID handling.

Fixes: 3fc411c772a21d8f ("search: drop pointless range processors for Unix timestamp")
---
 lib/PublicInbox/NNTP.pm   |  2 +-
 lib/PublicInbox/Search.pm | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 267fe4b9..23be7754 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -336,7 +336,7 @@ sub cmd_newnews ($$$$;$$) {
 	long_response($self, 0, long_response_limit, sub {
 		my ($i) = @_;
 		my $srch = $srch[0];
-		my $res = $srch->query($ts, $opts);
+		my $res = $srch->query_ts($ts, $opts);
 		my $msgs = $res->{msgs};
 		if (my $nr = scalar @$msgs) {
 			more($self, '<' .
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 9ab5afe6..df02e0b5 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -281,6 +281,19 @@ sub query_xover {
 	_do_enquire($self, $query, {num => 1, limit => 200, offset => $offset});
 }
 
+sub query_ts {
+	my ($self, $ts, $opts) = @_;
+	my $qp = $self->{qp_ts} ||= eval {
+		my $q = Search::Xapian::QueryParser->new;
+		$q->set_database($self->{xdb});
+		$q->add_valuerangeprocessor(
+			Search::Xapian::NumberValueRangeProcessor->new(TS));
+		$q
+	};
+	my $query = $qp->parse_query($ts, QP_FLAGS);
+	_do_enquire($self, $query, $opts);
+}
+
 sub lookup_message {
 	my ($self, $mid) = @_;
 	$mid = mid_clean($mid);
-- 
cgit v1.2.3-24-ge0c7


From 15eb65ffd59c7cf5cce67c6b7621f63194fd1083 Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Tue, 3 Apr 2018 10:34:54 +0000
Subject: mbox: do not barf on queries which return no results

Having zero search results means we never get a chance
to populate the Content-Disposition header for mbox
downloads.
---
 lib/PublicInbox/Mbox.pm       | 7 ++++++-
 lib/PublicInbox/SearchView.pm | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 84cc3845..04c86cc1 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -157,7 +157,12 @@ sub response {
 	# http://www.iana.org/assignments/media-types/application/gzip
 	$body->{hdr} = [ 'Content-Type', 'application/gzip' ];
 	$body->{fn} = $fn;
-	my $hdr = $body->getline; # fill in Content-Disposition filename
+	# fill in Content-Disposition filename
+	my $hdr = $body->getline;
+	if ($body->{hdr}) {
+		return [ 404, ['Content-Type','text/plain'],
+			[ "No results found\n" ] ];
+	}
 	[ 200, $hdr, $body ];
 }
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 219006a0..1c4442e4 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -22,6 +22,7 @@ sub mbox_results {
 	my ($ctx) = @_;
 	my $q = PublicInbox::SearchQuery->new($ctx->{qp});
 	my $x = $q->{x};
+	require PublicInbox::Mbox;
 	return PublicInbox::Mbox::mbox_all($ctx, $q->{'q'}) if $x eq 'm';
 	sres_top_html($ctx);
 }
-- 
cgit v1.2.3-24-ge0c7


From 119463b3b8517e5ec149198bb83588999118ee1d Mon Sep 17 00:00:00 2001
From: Eric Wong 
Date: Wed, 18 Apr 2018 20:30:22 +0000
Subject: nntp: allow and ignore empty commands

Somebody hitting "\n" into telnet shouldn't hold a client up
indefinitely and prevent shutdown.
---
 lib/PublicInbox/NNTP.pm | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 23be7754..c574c9e6 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -116,6 +116,7 @@ sub args_ok ($$) {
 sub process_line ($$) {
 	my ($self, $l) = @_;
 	my ($req, @args) = split(/\s+/, $l);
+	return unless defined($req);
 	$req = lc($req);
 	$req = eval {
 		no strict 'refs';
@@ -955,7 +956,7 @@ sub event_read {
 		$self->{rbuf} .= $$buf;
 	}
 	my $r = 1;
-	while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]+)\r?\n//) {
+	while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]*)\r?\n//) {
 		my $line = $1;
 		return $self->close if $line =~ /[[:cntrl:]]/s;
 		my $t0 = now();
@@ -975,7 +976,7 @@ sub event_read {
 sub watch_read {
 	my ($self, $bool) = @_;
 	my $rv = $self->SUPER::watch_read($bool);
-	if ($bool && $self->{rbuf} ne '') {
+	if ($bool && index($self->{rbuf}, "\n") >= 0) {
 		# Force another read if there is a pipelined request.
 		# We don't know if the socket has anything for us to read,
 		# and we must double-check again by the time the timer fires
-- 
cgit v1.2.3-24-ge0c7