about summary refs log tree commit homepage
path: root/lib/PublicInbox/ExtSearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-01-03 02:06:15 +0000
committerEric Wong <e@80x24.org>2021-01-03 18:30:31 +0000
commit71461c67fee940b05309baa8c67bac10c8c51ac6 (patch)
tree07ab30ed55e4bd62ab2022167e14e0ae09bb43ad /lib/PublicInbox/ExtSearchIdx.pm
parent323d8bac125e89a76c904a54a7ae0b2e36f05cc6 (diff)
downloadpublic-inbox-71461c67fee940b05309baa8c67bac10c8c51ac6.tar.gz
We don't need to be keeping the raw message around after it hits
git.  Shard work now relies on Storable (or Sereal) and all of
the indexing code relies on the Email::MIME-like API of Eml to
access interesting parts of the message.

Similarly, smsg->{raw_bytes} is no longer carried around and we
do the CRLF adjustment when setting smsg->{bytes}.

There's also a small simplification to t/import.t while
we're in the area to use xqx instead of spawn/popen_rd.
Diffstat (limited to 'lib/PublicInbox/ExtSearchIdx.pm')
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm11
1 files changed, 4 insertions, 7 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index d55d3db9..e6c21866 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -21,8 +21,7 @@ use Carp qw(croak carp);
 use Sys::Hostname qw(hostname);
 use POSIX qw(strftime);
 use PublicInbox::Search;
-use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor
-        is_bad_blob);
+use PublicInbox::SearchIdx qw(prepare_stack is_ancestor is_bad_blob);
 use PublicInbox::OverIdx;
 use PublicInbox::MiscIdx;
 use PublicInbox::MID qw(mids);
@@ -82,8 +81,6 @@ sub check_batch_limit ($) {
         my ($req) = @_;
         my $self = $req->{self};
         my $new_smsg = $req->{new_smsg};
-
-        # {raw_bytes} may be unset, so just use {bytes}
         my $n = $self->{transact_bytes} += $new_smsg->{bytes};
 
         # set flag for PublicInbox::V2Writable::index_todo:
@@ -239,7 +236,7 @@ sub index_oid { # git->cat_async callback for 'm'
         my $new_smsg = $req->{new_smsg} = bless {
                 blob => $oid,
         }, 'PublicInbox::Smsg';
-        $new_smsg->{bytes} = $size + crlf_adjust($$bref);
+        $new_smsg->set_bytes($$bref, $size);
         defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return;
         ++${$req->{nr}};
         do_step($req);
@@ -496,7 +493,7 @@ sub _reindex_oid { # git->cat_async callback
         my $ci = $self->{current_info};
         local $self->{current_info} = "$ci #$docid $oid";
         my $re_smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
-        $re_smsg->{bytes} = $size + crlf_adjust($$bref);
+        $re_smsg->set_bytes($$bref, $size);
         my $eml = PublicInbox::Eml->new($bref);
         $re_smsg->populate($eml, { autime => $orig_smsg->{ds},
                                 cotime => $orig_smsg->{ts} });
@@ -676,7 +673,7 @@ sub _reindex_unseen { # git->cat_async callback
         my $self = $req->{self} // die 'BUG: {self} unset';
         local $self->{current_info} = "$self->{current_info} $oid";
         my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg';
-        $new_smsg->{bytes} = $size + crlf_adjust($$bref);
+        $new_smsg->set_bytes($$bref, $size);
         my $eml = $req->{eml} = PublicInbox::Eml->new($bref);
         $req->{new_smsg} = $new_smsg;
         $req->{chash} = content_hash($eml);