From 04e4a5573de1b9ed2f6528a0de568a1693882eea Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 31 Mar 2020 08:49:36 +0000 Subject: v2writable: index Message-IDs w/ spaces properly Message-IDs can apparently contain spaces and other weird characters. Ensure we pass those properly to shard subprocesses when importing messages in parallel mode. Our NNTP request parser does not deal with spaces in the Message-ID, yet, and I don't expect most NNTP clients to, either. Nor does the Net::NNTP client handle them in responses. --- lib/PublicInbox/SearchIdxShard.pm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 1ea01095..06bcd403 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -69,8 +69,9 @@ sub shard_worker_loop ($$$$$) { $self->remove_by_oid($oid, $mid); } else { chomp $line; - my ($bytes, $num, $blob, $mid, $ds, $ts) = - split(/ /, $line); + # n.b. $mid may contain spaces(!) + my ($bytes, $num, $blob, $ds, $ts, $mid) = + split(/ /, $line, 6); $self->begin_txn_lazy; my $n = read($r, my $msg, $bytes) or die "read: $!\n"; $n == $bytes or die "short read: $n != $bytes\n"; @@ -93,7 +94,8 @@ sub shard_worker_loop ($$$$$) { sub index_raw { my ($self, $msgref, $mime, $smsg) = @_; if (my $w = $self->{w}) { - print $w join(' ', @$smsg{qw(bytes num blob mid ds ts)}), + # mid must be last, it can contain spaces (but not LF) + print $w join(' ', @$smsg{qw(bytes num blob ds ts mid)}), "\n", $$msgref or die "failed to write shard $!\n"; } else { $$msgref = undef; -- cgit v1.2.3-24-ge0c7