about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm12
1 files changed, 12 insertions, 0 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index a790ac40..85821ea7 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -549,11 +549,23 @@ sub unindex_mm {
         $self->{mm}->mid_delete(mid_mime($mime));
 }
 
+# returns the number of bytes to add if given a non-CRLF arg
+sub crlf_adjust ($) {
+        if (index($_[0], "\r\n") < 0) {
+                # common case is LF-only, every \n needs an \r;
+                # so favor a cheap tr// over an expensive m//g
+                $_[0] =~ tr/\n/\n/;
+        } else { # count number of '\n' w/o '\r', expensive:
+                scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
+        }
+}
+
 sub index_both { # git->cat_async callback
         my ($bref, $oid, $type, $size, $sync) = @_;
         my ($nr, $max) = @$sync{qw(nr max)};
         ++$$nr;
         $$max -= $size;
+        $size += crlf_adjust($$bref);
         my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg';
         my $self = $sync->{sidx};
         my $eml = PublicInbox::Eml->new($bref);