about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-03-31 08:49:36 +0000
committerEric Wong <e@yhbt.net>2020-03-31 23:41:06 +0000
commit04e4a5573de1b9ed2f6528a0de568a1693882eea (patch)
treebcc72fc932592f68c2cecd5a5c0aee3dd8091299 /lib
parent14561ae19938facc8ddf5038eafed3df9519ee10 (diff)
downloadpublic-inbox-04e4a5573de1b9ed2f6528a0de568a1693882eea.tar.gz
Message-IDs can apparently contain spaces and other weird
characters.  Ensure we pass those properly to shard subprocesses
when importing messages in parallel mode.

Our NNTP request parser does not deal with spaces in the
Message-ID, yet, and I don't expect most NNTP clients to,
either.  Nor does the Net::NNTP client handle them in responses.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm8
1 files changed, 5 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 1ea01095..06bcd403 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -69,8 +69,9 @@ sub shard_worker_loop ($$$$$) {
                         $self->remove_by_oid($oid, $mid);
                 } else {
                         chomp $line;
-                        my ($bytes, $num, $blob, $mid, $ds, $ts) =
-                                                        split(/ /, $line);
+                        # n.b. $mid may contain spaces(!)
+                        my ($bytes, $num, $blob, $ds, $ts, $mid) =
+                                                        split(/ /, $line, 6);
                         $self->begin_txn_lazy;
                         my $n = read($r, my $msg, $bytes) or die "read: $!\n";
                         $n == $bytes or die "short read: $n != $bytes\n";
@@ -93,7 +94,8 @@ sub shard_worker_loop ($$$$$) {
 sub index_raw {
         my ($self, $msgref, $mime, $smsg) = @_;
         if (my $w = $self->{w}) {
-                print $w join(' ', @$smsg{qw(bytes num blob mid ds ts)}),
+                # mid must be last, it can contain spaces (but not LF)
+                print $w join(' ', @$smsg{qw(bytes num blob ds ts mid)}),
                         "\n", $$msgref or die "failed to write shard $!\n";
         } else {
                 $$msgref = undef;