diff options
author | Eric Wong <e@80x24.org> | 2021-01-03 02:06:15 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-01-03 18:30:31 +0000 |
commit | 71461c67fee940b05309baa8c67bac10c8c51ac6 (patch) | |
tree | 07ab30ed55e4bd62ab2022167e14e0ae09bb43ad /lib/PublicInbox/SearchIdx.pm | |
parent | 323d8bac125e89a76c904a54a7ae0b2e36f05cc6 (diff) | |
download | public-inbox-71461c67fee940b05309baa8c67bac10c8c51ac6.tar.gz |
We don't need to be keeping the raw message around after it hits git. Shard work now relies on Storable (or Sereal) and all of the indexing code relies on the Email::MIME-like API of Eml to access interesting parts of the message. Similarly, smsg->{raw_bytes} is no longer carried around and we do the CRLF adjustment when setting smsg->{bytes}. There's also a small simplification to t/import.t while we're in the area to use xqx instead of spawn/popen_rd.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 17 |
1 files changed, 3 insertions, 14 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index da3ac2e3..a7005051 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -22,7 +22,7 @@ use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn nodatacow_dir); use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); -our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack +our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack index_text term_generator add_val is_bad_blob); my $X = \%PublicInbox::Search::X; our ($DB_CREATE_OR_OPEN, $DB_OPEN); @@ -613,17 +613,6 @@ sub index_mm { } } -# returns the number of bytes to add if given a non-CRLF arg -sub crlf_adjust ($) { - if (index($_[0], "\r\n") < 0) { - # common case is LF-only, every \n needs an \r; - # so favor a cheap tr// over an expensive m//g - $_[0] =~ tr/\n/\n/; - } else { # count number of '\n' w/o '\r', expensive: - scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g)); - } -} - sub is_bad_blob ($$$$) { my ($oid, $type, $size, $expect_oid) = @_; if ($type ne 'blob') { @@ -640,8 +629,8 @@ sub index_both { # git->cat_async callback my ($nr, $max) = @$sync{qw(nr max)}; ++$$nr; $$max -= $size; - $size += crlf_adjust($$bref); - my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg'; + my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg'; + $smsg->set_bytes($$bref, $size); my $self = $sync->{sidx}; local $self->{current_info} = "$self->{current_info}: $oid"; my $eml = PublicInbox::Eml->new($bref); |