From cd389aac52936c82f3416b3ceefe21e1250b8a3e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Jun 2020 07:05:02 +0000 Subject: index: account for CRLF conversion when storing bytes NNTP and IMAP both require CRLF conversions on the wire. They're also the only components which care about $smsg->{bytes}, so store the CRLF-adjusted value in over.sqlite3 and Xapian DBs.. This will allow us to optimize RFC822.SIZE fetch item in IMAP without triggering size mismatch errors in some clients' default configurations (e.g. Mail::IMAPClient), but not most others. It could also fix hypothetical problems with NNTP clients that report discrepancies between overview and article data. --- lib/PublicInbox/SearchIdxShard.pm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/PublicInbox/SearchIdxShard.pm') diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index c1f52d8b..f7ba293f 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -71,11 +71,11 @@ sub shard_worker_loop ($$$$$) { } else { chomp $line; # n.b. $mid may contain spaces(!) - my ($bytes, $num, $blob, $ds, $ts, $mid) = - split(/ /, $line, 6); + my ($to_read, $bytes, $num, $blob, $ds, $ts, $mid) = + split(/ /, $line, 7); $self->begin_txn_lazy; - my $n = read($r, my $msg, $bytes) or die "read: $!\n"; - $n == $bytes or die "short read: $n != $bytes\n"; + my $n = read($r, my $msg, $to_read) or die "read: $!\n"; + $n == $to_read or die "short read: $n != $to_read\n"; my $mime = PublicInbox::Eml->new(\$msg); my $smsg = bless { bytes => $bytes, @@ -96,7 +96,8 @@ sub index_raw { my ($self, $msgref, $mime, $smsg) = @_; if (my $w = $self->{w}) { # mid must be last, it can contain spaces (but not LF) - print $w join(' ', @$smsg{qw(bytes num blob ds ts mid)}), + print $w join(' ', @$smsg{qw(raw_bytes bytes + num blob ds ts mid)}), "\n", $$msgref or die "failed to write shard $!\n"; } else { $$msgref = undef; -- cgit v1.2.3-24-ge0c7