about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-01-03 02:06:15 +0000
committerEric Wong <e@80x24.org>2021-01-03 18:30:31 +0000
commit71461c67fee940b05309baa8c67bac10c8c51ac6 (patch)
tree07ab30ed55e4bd62ab2022167e14e0ae09bb43ad /lib/PublicInbox/SearchIdx.pm
parent323d8bac125e89a76c904a54a7ae0b2e36f05cc6 (diff)
downloadpublic-inbox-71461c67fee940b05309baa8c67bac10c8c51ac6.tar.gz
We don't need to be keeping the raw message around after it hits
git.  Shard work now relies on Storable (or Sereal) and all of
the indexing code relies on the Email::MIME-like API of Eml to
access interesting parts of the message.

Similarly, smsg->{raw_bytes} is no longer carried around and we
do the CRLF adjustment when setting smsg->{bytes}.

There's also a small simplification to t/import.t while
we're in the area to use xqx instead of spawn/popen_rd.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm17
1 files changed, 3 insertions, 14 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index da3ac2e3..a7005051 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -22,7 +22,7 @@ use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn nodatacow_dir);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
+our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack
         index_text term_generator add_val is_bad_blob);
 my $X = \%PublicInbox::Search::X;
 our ($DB_CREATE_OR_OPEN, $DB_OPEN);
@@ -613,17 +613,6 @@ sub index_mm {
         }
 }
 
-# returns the number of bytes to add if given a non-CRLF arg
-sub crlf_adjust ($) {
-        if (index($_[0], "\r\n") < 0) {
-                # common case is LF-only, every \n needs an \r;
-                # so favor a cheap tr// over an expensive m//g
-                $_[0] =~ tr/\n/\n/;
-        } else { # count number of '\n' w/o '\r', expensive:
-                scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
-        }
-}
-
 sub is_bad_blob ($$$$) {
         my ($oid, $type, $size, $expect_oid) = @_;
         if ($type ne 'blob') {
@@ -640,8 +629,8 @@ sub index_both { # git->cat_async callback
         my ($nr, $max) = @$sync{qw(nr max)};
         ++$$nr;
         $$max -= $size;
-        $size += crlf_adjust($$bref);
-        my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg';
+        my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
+        $smsg->set_bytes($$bref, $size);
         my $self = $sync->{sidx};
         local $self->{current_info} = "$self->{current_info}: $oid";
         my $eml = PublicInbox::Eml->new($bref);