diff options
-rw-r--r-- | lib/PublicInbox/NNTP.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/OverIdx.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 37 | ||||
-rw-r--r-- | lib/PublicInbox/SearchMsg.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 2 | ||||
-rwxr-xr-x | script/public-inbox-compact | 6 | ||||
-rw-r--r-- | t/search.t | 24 | ||||
-rw-r--r-- | t/v2writable.t | 7 |
8 files changed, 31 insertions, 59 deletions
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index fa890cb2..ace56e7a 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -725,7 +725,7 @@ sub hdr_searchmsg ($$$$) { my $nr = scalar @$msgs or return; my $tmp = ''; foreach my $s (@$msgs) { - $tmp .= $s->num . ' ' . $s->$field . "\r\n"; + $tmp .= $s->{num} . ' ' . $s->$field . "\r\n"; } utf8::encode($tmp); do_more($self, $tmp); diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 08f87447..62fec0da 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -207,8 +207,8 @@ sub link_refs { $tid; } -sub parse_references ($$$$) { - my ($self, $smsg, $mid0, $mids) = @_; +sub parse_references ($$$) { + my ($smsg, $mid0, $mids) = @_; my $mime = $smsg->{mime}; my $hdr = $mime->header_obj; my $refs = references($hdr); @@ -241,7 +241,7 @@ sub add_overview { blob => $oid, }, 'PublicInbox::SearchMsg'; my $mids = mids($mime->header_obj); - my $refs = $self->parse_references($smsg, $mid0, $mids); + my $refs = parse_references($smsg, $mid0, $mids); my $subj = $smsg->subject; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 7cfa7452..f9b40b0d 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -273,18 +273,12 @@ sub add_message { my $smsg = PublicInbox::SearchMsg->new($mime); my $doc = $smsg->{doc}; my $subj = $smsg->subject; - - $smsg->{lines} = $mime->body_raw =~ tr!\n!\n!; - defined $bytes or $bytes = length($mime->as_string); - $smsg->{bytes} = $bytes; - add_val($doc, PublicInbox::Search::TS(), $smsg->ts); my @ds = gmtime($smsg->ds); my $yyyymmdd = strftime('%Y%m%d', @ds); add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd); my $dt = strftime('%Y%m%d%H%M%S', @ds); add_val($doc, PublicInbox::Search::DT(), $dt); - my @vals = ($smsg->{ts}, $smsg->{ds}); my $tg = $self->term_generator; @@ -333,11 +327,11 @@ sub add_message { index_body($tg, \@orig, $doc) if @orig; }); - # populates smsg->references for smsg->to_doc_data - my $data = $smsg->to_doc_data($oid, $mid0); foreach my $mid (@$mids) { $tg->index_text($mid, 1, 'XM'); } + $smsg->{to} = $smsg->{cc} = ''; + my $data = $smsg->to_doc_data($oid, $mid0); $doc->set_data($data); if (my $altid = $self->{-altid}) { foreach my $alt (@$altid) { @@ -350,24 +344,11 @@ sub add_message { } } - $self->delete_article($num) if defined $num; # for reindexing - if (my $over = $self->{over}) { - utf8::encode($data); - $data = compress($data); - my $refs = $over->parse_references($smsg, $mid0, $mids); - my $xpath; - if ($subj ne '') { - $xpath = $self->subject_path($subj); - $xpath = id_compress($xpath); - } - - push @vals, $num, $mids, $refs, $xpath, $data; - $over->add_over(\@vals); + $over->add_overview($mime, $bytes, $num, $oid, $mid0); } $doc->add_boolean_term('Q' . $_) foreach @$mids; - $doc->add_boolean_term('XNUM' . $num) if defined $num; - $doc_id = $self->{xdb}->add_document($doc); + $self->{xdb}->replace_document($doc_id = $num, $doc); }; if ($@) { @@ -419,16 +400,6 @@ sub remove_message { } } -sub delete_article { - my ($self, $num) = @_; - my $ndel = 0; - batch_do($self, 'XNUM' . $num, sub { - my ($ids) = @_; - $ndel += scalar @$ids; - $self->{xdb}->delete_document($_) for @$ids; - }); -} - # MID is a hint in V2 sub remove_by_oid { my ($self, $oid, $mid) = @_; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 3278802b..ab971e00 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -45,12 +45,11 @@ sub to_doc_data { $self->cc, $oid, $mid0, - $self->{bytes}, - $self->{lines} + $self->{bytes} || '', + $self->{lines} || '' ); } - sub load_from_data ($$) { my ($self) = $_[0]; # data = $_[1] ( @@ -92,7 +91,6 @@ sub load_doc { # :bytes and :lines metadata in RFC 3977 sub bytes ($) { $_[0]->{bytes} } sub lines ($) { $_[0]->{lines} } -sub num ($) { $_[0]->{num} ||= _get_term_val($_[0], 'XNUM', qr/\AXNUM/) } sub __hdr ($$) { my ($self, $field) = @_; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 53fdb738..1cc4b005 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -800,7 +800,7 @@ sub unindex_oid { my %gone; my ($id, $prev); while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { - $gone{$smsg->num} = 1 if $oid eq $smsg->{blob}; + $gone{$smsg->{num}} = 1 if $oid eq $smsg->{blob}; 1; # continue } my $n = scalar keys %gone; diff --git a/script/public-inbox-compact b/script/public-inbox-compact index d855b9e1..9f332657 100755 --- a/script/public-inbox-compact +++ b/script/public-inbox-compact @@ -48,7 +48,7 @@ sub commit_changes ($$$) { $im->lock_release; remove_tree("$old/old") or die "failed to remove $old/old: $!\n"; } - +my @compact = qw(xapian-compact --no-renumber); if ($v == 2) { require PublicInbox::V2Writable; my $v2w = PublicInbox::V2Writable->new($ibx); @@ -70,7 +70,7 @@ if ($v == 2) { } close $dh; die "No Xapian parts found in $old\n" unless @parts; - my $cmd = ['xapian-compact', @parts, "$new/0" ]; + my $cmd = [@compact, @parts, "$new/0" ]; PublicInbox::Import::run_die($cmd); commit_changes($v2w, $old, $new); }); @@ -84,7 +84,7 @@ if ($v == 2) { my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $v1_root); $ibx->with_umask(sub { $im->lock_acquire; - PublicInbox::Import::run_die(['xapian-compact', $old, $new]); + PublicInbox::Import::run_die([@compact, $old, $new]); commit_changes($im, $old, $new); }); } else { @@ -306,31 +306,33 @@ sub filter_mids { # names and addresses { - my $res = $ro->query('t:list@example.com'); - is(scalar @$res, 6, 'searched To: successfully'); - foreach my $smsg (@$res) { + my $mset = $ro->query('t:list@example.com', {mset => 1}); + is($mset->size, 6, 'searched To: successfully'); + foreach my $m ($mset->items) { + my $smsg = $ro->lookup_article($m->get_docid); like($smsg->to, qr/\blist\@example\.com\b/, 'to appears'); } - $res = $ro->query('tc:list@example.com'); - is(scalar @$res, 6, 'searched To+Cc: successfully'); - foreach my $smsg (@$res) { + $mset = $ro->query('tc:list@example.com', {mset => 1}); + is($mset->size, 6, 'searched To+Cc: successfully'); + foreach my $m ($mset->items) { + my $smsg = $ro->lookup_article($m->get_docid); my $tocc = join("\n", $smsg->to, $smsg->cc); like($tocc, qr/\blist\@example\.com\b/, 'tocc appears'); } foreach my $pfx ('tcf:', 'c:') { - $res = $ro->query($pfx . 'foo@example.com'); - is(scalar @$res, 1, - "searched $pfx successfully for Cc:"); - foreach my $smsg (@$res) { + my $mset = $ro->query($pfx . 'foo@example.com', { mset => 1 }); + is($mset->items, 1, "searched $pfx successfully for Cc:"); + foreach my $m ($mset->items) { + my $smsg = $ro->lookup_article($m->get_docid); like($smsg->cc, qr/\bfoo\@example\.com\b/, 'cc appears'); } } foreach my $pfx ('', 'tcf:', 'f:') { - $res = $ro->query($pfx . 'Laggy'); + my $res = $ro->query($pfx . 'Laggy'); is(scalar(@$res), 1, "searched $pfx successfully for From:"); foreach my $smsg (@$res) { diff --git a/t/v2writable.t b/t/v2writable.t index b543c53f..85fb6a6d 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -220,13 +220,14 @@ EOF 'commit message propagated to git'); is_deeply(\@after, \@before, 'only one commit written to git'); is($ibx->mm->num_for($smsg->mid), undef, 'no longer in Msgmap by mid'); - like($smsg->num, qr/\A\d+\z/, 'numeric number in return message'); - is($ibx->mm->mid_for($smsg->num), undef, 'no longer in Msgmap by num'); + my $num = $smsg->{num}; + like($num, qr/\A\d+\z/, 'numeric number in return message'); + is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num'); my $srch = $ibx->search->reopen; my $mset = $srch->query('m:'.$smsg->mid, { mset => 1}); is($mset->size, 0, 'no longer found in Xapian'); my @log1 = qw(log -1 --pretty=raw --raw -r --no-abbrev --no-renames); - is($srch->{over_ro}->get_art($smsg->num), undef, + is($srch->{over_ro}->get_art($num), undef, 'removal propagated to Over DB'); my $after = $git0->qx(@log1); |