user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/6] more NNTP server updates
@ 2015-09-21 11:11 Eric Wong
  2015-09-21 11:11 ` [PATCH 1/6] msgmap: minor cleanup to move constant declaration Eric Wong
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

Always finding new bugs.  The XHDR Xref performance improvement
is pretty huge, and the XOVER speedup is noticeable as well.

We will begin to implement RFC 3977, AFAIK the latest NNTP RFC.

Eric Wong (6):
      msgmap: minor cleanup to move constant declaration
      nntp: speed up XHDR Xref for rtin
      nntp: speed up xover slightly
      t/nntpd.t: improve test runnability
      remove bytes:: for length checks
      nntp: proper UTF-8 support (hopefully?)

 lib/PublicInbox/Filter.pm     |  2 +-
 lib/PublicInbox/GitCatFile.pm |  5 +++--
 lib/PublicInbox/Msgmap.pm     |  2 +-
 lib/PublicInbox/NNTP.pm       | 47 ++++++++++++++++++++++++++++++++++++-------
 public-inbox-nntpd            |  4 ++--
 t/nntpd.t                     |  3 ++-
 6 files changed, 49 insertions(+), 14 deletions(-)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] msgmap: minor cleanup to move constant declaration
  2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
@ 2015-09-21 11:11 ` Eric Wong
  2015-09-21 11:11 ` [PATCH 2/6] nntp: speed up XHDR Xref for rtin Eric Wong
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

This doesn't actually change anything as the constant is still
usable in other subroutines, but helps with consistency and
readability IMHO.
---
 lib/PublicInbox/Msgmap.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 2f64d90..f285790 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -84,10 +84,10 @@ sub mid_insert {
 	$dbh->last_insert_id(undef, undef, 'msgmap', 'num');
 }
 
-use constant MID_FOR => 'SELECT mid FROM msgmap WHERE num = ? LIMIT 1';
 sub mid_for {
 	my ($self, $num) = @_;
 	my $dbh = $self->{dbh};
+	use constant MID_FOR => 'SELECT mid FROM msgmap WHERE num = ? LIMIT 1';
 	my $sth = $self->{mid_for} ||= $dbh->prepare(MID_FOR);
 	$sth->bind_param(1, $num);
 	$sth->execute;
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] nntp: speed up XHDR Xref for rtin
  2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
  2015-09-21 11:11 ` [PATCH 1/6] msgmap: minor cleanup to move constant declaration Eric Wong
@ 2015-09-21 11:11 ` Eric Wong
  2015-09-21 11:11 ` [PATCH 3/6] nntp: speed up xover slightly Eric Wong
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

We could also start displaying Xref in XOVER as rtin seems to
prefer it.  Anyways this is nearly 100 times faster now and
requires no DB changes.
---
 lib/PublicInbox/NNTP.pm | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 79f2c2f..d5eb497 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -371,7 +371,7 @@ found:
 	if ($set_headers) {
 		$s->header_set('Newsgroups', $ng->{name});
 		$s->header_set('Lines', $s->body =~ tr!\n!\n!);
-		$s->header_set('Xref', "$ng->{domain} $ng->{name}:$n");
+		$s->header_set('Xref', xref($ng, $n));
 
 		# must be last
 		if ($set_headers == 2) {
@@ -562,6 +562,35 @@ sub xhdr_message_id ($$) { # optimize XHDR Message-ID [range] for slrnpull.
 	}
 }
 
+sub xref ($$) {
+	my ($ng, $n) = @_;
+	"$ng->{domain} $ng->{name}:$n"
+}
+
+sub xhdr_xref ($$) { # optimize XHDR Xref [range] for rtin
+	my ($self, $range) = @_;
+
+	my $ng = $self->{ng};
+	my $mm = $ng->mm;
+	if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID
+		my $n = $mm->num_for($range);
+		more($self, '221 Header follows');
+		more($self, "<$range> ".xref($ng, $n)) if defined $n;
+		'.';
+	} else { # numeric range
+		$range = $self->{article} unless defined $range;
+		my $r = get_range($self, $range);
+		return $r unless ref $r;
+		my ($beg, $end) = @$r;
+		more($self, '221 Header follows');
+		$self->long_response($beg, $end, sub {
+			my ($i) = @_;
+			my $mid = $mm->mid_for($$i);
+			more($self, "$$i ".xref($ng, $$i)) if defined $mid;
+		});
+	}
+}
+
 sub header_obj_for {
 	my ($srch, $mid) = @_;
 	eval {
@@ -612,6 +641,8 @@ sub cmd_xhdr ($$;$) {
 	my $sub = lc $header;
 	if ($sub eq 'message-id') {
 		xhdr_message_id($self, $range);
+	} elsif ($sub eq 'xref') {
+		xhdr_xref($self, $range);
 	} elsif ($sub =~ /\A(subject|references|date)\z/ && $ng->search) {
 		xhdr_searchmsg($self, $sub, $range);
 	} else {
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] nntp: speed up xover slightly
  2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
  2015-09-21 11:11 ` [PATCH 1/6] msgmap: minor cleanup to move constant declaration Eric Wong
  2015-09-21 11:11 ` [PATCH 2/6] nntp: speed up XHDR Xref for rtin Eric Wong
@ 2015-09-21 11:11 ` Eric Wong
  2015-09-21 11:11 ` [PATCH 4/6] t/nntpd.t: improve test runnability Eric Wong
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

Reserializing the message to a string to check size wastes
considerable time and should be able to get by with slightly
less accuracy.
---
 lib/PublicInbox/GitCatFile.pm | 3 ++-
 lib/PublicInbox/NNTP.pm       | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/GitCatFile.pm b/lib/PublicInbox/GitCatFile.pm
index 4f16762..dd3f291 100644
--- a/lib/PublicInbox/GitCatFile.pm
+++ b/lib/PublicInbox/GitCatFile.pm
@@ -38,7 +38,7 @@ sub _cat_file_begin {
 }
 
 sub cat_file {
-	my ($self, $object) = @_;
+	my ($self, $object, $sizeref) = @_;
 
 	$object .= "\n";
 	my $len = bytes::length($object);
@@ -58,6 +58,7 @@ sub cat_file {
 		die "Unexpected result from git cat-file: $head\n";
 
 	my $size = $1;
+	$$sizeref = $size if $sizeref;
 	my $bytes_left = $size;
 	my $offset = 0;
 	my $rv = '';
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index d5eb497..fb93330 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -366,7 +366,8 @@ find_mid:
 	}
 found:
 	my $o = 'HEAD:' . mid2path($mid);
-	my $s = eval { Email::Simple->new($ng->gcf->cat_file($o)) };
+	my $bytes;
+	my $s = eval { Email::Simple->new($ng->gcf->cat_file($o, \$bytes)) };
 	return $err unless $s;
 	if ($set_headers) {
 		$s->header_set('Newsgroups', $ng->{name});
@@ -375,7 +376,7 @@ found:
 
 		# must be last
 		if ($set_headers == 2) {
-			$s->header_set('Bytes', bytes::length($s->as_string));
+			$s->header_set('Bytes', $bytes);
 			$s->body_set('');
 		}
 	}
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] t/nntpd.t: improve test runnability
  2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
                   ` (2 preceding siblings ...)
  2015-09-21 11:11 ` [PATCH 3/6] nntp: speed up xover slightly Eric Wong
@ 2015-09-21 11:11 ` Eric Wong
  2015-09-21 11:11 ` [PATCH 5/6] remove bytes:: for length checks Eric Wong
  2015-09-21 11:11 ` [PATCH 6/6] nntp: proper UTF-8 support (hopefully?) Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

The created socket FD number may not be 3 in the test,
force it to be so inside the child process.
---
 t/nntpd.t | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/nntpd.t b/t/nntpd.t
index 527cfc2..d30ed63 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -44,7 +44,6 @@ my %opts = (
 	Listen => 1024,
 );
 my $sock = IO::Socket::INET->new(%opts);
-plan skip_all => 'sock fd!=3, cannot test nntpd integration' if fileno($sock) != 3;
 my $pid;
 END { kill 'TERM', $pid if defined $pid };
 {
@@ -78,8 +77,10 @@ EOF
 	is($fl, FD_CLOEXEC, 'cloexec set by default (Perl behavior)');
 	$pid = fork;
 	if ($pid == 0) {
+		use POSIX qw(dup2);
 		# pretend to be systemd
 		fcntl($sock, F_SETFD, $fl &= ~FD_CLOEXEC);
+		dup2(fileno($sock), 3) or die "dup2 failed: $!\n";
 		$ENV{LISTEN_PID} = $$;
 		$ENV{LISTEN_FDS} = 1;
 		exec $nntpd, "--stdout=$out", "--stderr=$err";
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] remove bytes:: for length checks
  2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
                   ` (3 preceding siblings ...)
  2015-09-21 11:11 ` [PATCH 4/6] t/nntpd.t: improve test runnability Eric Wong
@ 2015-09-21 11:11 ` Eric Wong
  2015-09-21 11:11 ` [PATCH 6/6] nntp: proper UTF-8 support (hopefully?) Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

Apparently, my mental model of Perl internals is still incorrect
after all these years.  I am but a simple *nix programmer:
everything is a bag of bytes to me.

This fixes a problem with UTF-8 headers from Xapian (via "XHDR
Subject [range]") triggering partial writes and writing an extra
newline to the outputs.
---
 lib/PublicInbox/Filter.pm     | 2 +-
 lib/PublicInbox/GitCatFile.pm | 2 +-
 lib/PublicInbox/NNTP.pm       | 6 +++---
 public-inbox-nntpd            | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index 84df0e7..cab6aad 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -233,7 +233,7 @@ sub replace_body {
 sub recheck_type_ok {
 	my ($part) = @_;
 	my $s = $part->body;
-	((bytes::length($s) < 0x10000) &&
+	((length($s) < 0x10000) &&
 		($s =~ /\A([\P{XPosixPrint}\f\n\r\t]+)\z/))
 }
 
diff --git a/lib/PublicInbox/GitCatFile.pm b/lib/PublicInbox/GitCatFile.pm
index dd3f291..48ae673 100644
--- a/lib/PublicInbox/GitCatFile.pm
+++ b/lib/PublicInbox/GitCatFile.pm
@@ -41,7 +41,7 @@ sub cat_file {
 	my ($self, $object, $sizeref) = @_;
 
 	$object .= "\n";
-	my $len = bytes::length($object);
+	my $len = length($object);
 
 	$self->_cat_file_begin;
 	my $written = syswrite($self->{out}, $object);
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index fb93330..2b580d1 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -771,9 +771,9 @@ sub do_more ($$) {
 	if (MSG_MORE && !$self->{write_buf_size}) {
 		my $n = send($self->{sock}, $data, MSG_MORE);
 		if (defined $n) {
-			my $dlen = bytes::length($data);
+			my $dlen = length($data);
 			return 1 if $n == $dlen; # all done!
-			$data = bytes::substr($data, $n, $dlen - $n);
+			$data = substr($data, $n, $dlen - $n);
 		}
 	}
 	$self->do_write($data);
@@ -813,7 +813,7 @@ again:
 	}
 
 	return $self->close if $r < 0;
-	my $len = bytes::length($self->{rbuf});
+	my $len = length($self->{rbuf});
 	return $self->close if ($len >= LINE_MAX);
 }
 
diff --git a/public-inbox-nntpd b/public-inbox-nntpd
index 0395e98..674ecad 100644
--- a/public-inbox-nntpd
+++ b/public-inbox-nntpd
@@ -172,13 +172,13 @@ sub sockname {
 	my ($s) = @_;
 	my $n = getsockname($s) or return;
 	my ($port, $addr);
-	if (bytes::length($n) >= 28) {
+	if (length($n) >= 28) {
 		require Socket6;
 		($port, $addr) = Socket6::unpack_sockaddr_in6($n);
 	} else {
 		($port, $addr) = Socket::sockaddr_in($n);
 	}
-	if (bytes::length($addr) == 4) {
+	if (length($addr) == 4) {
 		$n = Socket::inet_ntoa($addr)
 	} else {
 		$n = '['.Socket6::inet_ntop(Socket6::AF_INET6(), $addr).']';
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] nntp: proper UTF-8 support (hopefully?)
  2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
                   ` (4 preceding siblings ...)
  2015-09-21 11:11 ` [PATCH 5/6] remove bytes:: for length checks Eric Wong
@ 2015-09-21 11:11 ` Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2015-09-21 11:11 UTC (permalink / raw)
  To: meta

RFC 3977 stipulates the use of UTF-8 as the default charset,
so we shall try using that and hopefully not mangle things.
---
 lib/PublicInbox/NNTP.pm | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 2b580d1..91b10f2 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -8,7 +8,7 @@ use fields qw(nntpd article rbuf ng long_res);
 use PublicInbox::Msgmap;
 use PublicInbox::GitCatFile;
 use PublicInbox::MID qw(mid2path);
-use Email::Simple;
+use Email::MIME;
 use Data::Dumper qw(Dumper);
 use POSIX qw(strftime);
 use Time::HiRes qw(clock_gettime ualarm CLOCK_MONOTONIC);
@@ -29,6 +29,7 @@ my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr);
 sub new ($$$) {
 	my ($class, $sock, $nntpd) = @_;
 	my $self = fields::new($class);
+	binmode $sock, ':utf8'; # RFC 3977
 	$self->SUPER::new($sock);
 	$self->{nntpd} = $nntpd;
 	res($self, '201 server ready - post via email');
@@ -367,7 +368,7 @@ find_mid:
 found:
 	my $o = 'HEAD:' . mid2path($mid);
 	my $bytes;
-	my $s = eval { Email::Simple->new($ng->gcf->cat_file($o, \$bytes)) };
+	my $s = eval { Email::MIME->new($ng->gcf->cat_file($o, \$bytes)) };
 	return $err unless $s;
 	if ($set_headers) {
 		$s->header_set('Newsgroups', $ng->{name});
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-09-21 11:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-21 11:11 [PATCH 0/6] more NNTP server updates Eric Wong
2015-09-21 11:11 ` [PATCH 1/6] msgmap: minor cleanup to move constant declaration Eric Wong
2015-09-21 11:11 ` [PATCH 2/6] nntp: speed up XHDR Xref for rtin Eric Wong
2015-09-21 11:11 ` [PATCH 3/6] nntp: speed up xover slightly Eric Wong
2015-09-21 11:11 ` [PATCH 4/6] t/nntpd.t: improve test runnability Eric Wong
2015-09-21 11:11 ` [PATCH 5/6] remove bytes:: for length checks Eric Wong
2015-09-21 11:11 ` [PATCH 6/6] nntp: proper UTF-8 support (hopefully?) Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).