diff options
author | Eric Wong <e@yhbt.net> | 2020-03-31 08:49:36 +0000 |
---|---|---|
committer | Eric Wong <e@yhbt.net> | 2020-03-31 23:41:06 +0000 |
commit | 04e4a5573de1b9ed2f6528a0de568a1693882eea (patch) | |
tree | bcc72fc932592f68c2cecd5a5c0aee3dd8091299 /lib/PublicInbox/SearchIdxShard.pm | |
parent | 14561ae19938facc8ddf5038eafed3df9519ee10 (diff) | |
download | public-inbox-04e4a5573de1b9ed2f6528a0de568a1693882eea.tar.gz |
Message-IDs can apparently contain spaces and other weird characters. Ensure we pass those properly to shard subprocesses when importing messages in parallel mode. Our NNTP request parser does not deal with spaces in the Message-ID, yet, and I don't expect most NNTP clients to, either. Nor does the Net::NNTP client handle them in responses.
Diffstat (limited to 'lib/PublicInbox/SearchIdxShard.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdxShard.pm | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 1ea01095..06bcd403 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -69,8 +69,9 @@ sub shard_worker_loop ($$$$$) { $self->remove_by_oid($oid, $mid); } else { chomp $line; - my ($bytes, $num, $blob, $mid, $ds, $ts) = - split(/ /, $line); + # n.b. $mid may contain spaces(!) + my ($bytes, $num, $blob, $ds, $ts, $mid) = + split(/ /, $line, 6); $self->begin_txn_lazy; my $n = read($r, my $msg, $bytes) or die "read: $!\n"; $n == $bytes or die "short read: $n != $bytes\n"; @@ -93,7 +94,8 @@ sub shard_worker_loop ($$$$$) { sub index_raw { my ($self, $msgref, $mime, $smsg) = @_; if (my $w = $self->{w}) { - print $w join(' ', @$smsg{qw(bytes num blob mid ds ts)}), + # mid must be last, it can contain spaces (but not LF) + print $w join(' ', @$smsg{qw(bytes num blob ds ts mid)}), "\n", $$msgref or die "failed to write shard $!\n"; } else { $$msgref = undef; |