From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/14] lei q: reinstate smsg dedupe
Date: Wed, 13 Jan 2021 19:06:21 -1200 [thread overview]
Message-ID: <20210114070627.18195-9-e@80x24.org> (raw)
In-Reply-To: <20210114070627.18195-1-e@80x24.org>
Now that dedupe is serialization and fork-safe, we can
wire it back up in our query results paths.
---
lib/PublicInbox/LeiQuery.pm | 5 ++---
lib/PublicInbox/LeiXSearch.pm | 8 ++++++--
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 1a3e1193..69d2f9a6 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -26,14 +26,13 @@ sub lei_q {
my $sto = $self->_lei_store(1);
my $cfg = $self->_lei_cfg(1);
my $opt = $self->{opt};
- require PublicInbox::LeiDedupe;
- my $dd = PublicInbox::LeiDedupe->new($self);
# --local is enabled by default
# src: LeiXSearch || LeiSearch || Inbox
my @srcs;
require PublicInbox::LeiXSearch;
require PublicInbox::LeiOverview;
+ require PublicInbox::LeiDedupe;
my $lxs = PublicInbox::LeiXSearch->new;
# --external is enabled by default, but allow --no-external
@@ -49,8 +48,8 @@ sub lei_q {
unshift(@srcs, $sto->search) if $opt->{'local'};
# no forking workers after this
- require PublicInbox::LeiOverview;
$self->{ovv} = PublicInbox::LeiOverview->new($self);
+ $self->{dd} = PublicInbox::LeiDedupe->new($self);
my %mset_opt = map { $_ => $opt->{$_} } qw(thread limit offset);
$mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
$mset_opt{qstr} = join(' ', map {;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 68889e81..80e7a7f7 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -103,6 +103,8 @@ sub query_thread_mset { # for --thread
my $mo = { %{$lei->{mset_opt}} };
my $mset;
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+ my $dd = $lei->{dd};
+ $dd->prepare_dedupe;
do {
$mset = $srch->mset($mo->{qstr}, $mo);
my $ids = $srch->mset_to_artnums($mset, $mo);
@@ -112,7 +114,7 @@ sub query_thread_mset { # for --thread
while ($over->expand_thread($ctx)) {
for my $n (@{$ctx->{xids}}) {
my $smsg = $over->get_art($n) or next;
- # next if $dd->is_smsg_dup($smsg); TODO
+ next if $dd->is_smsg_dup($smsg);
my $mitem = delete $n2item{$smsg->{num}};
$each_smsg->($smsg, $mitem);
# $self->out($buf .= $ORS);
@@ -132,11 +134,13 @@ sub query_mset { # non-parallel for non-"--thread" users
my $mset;
$self->attach_external($_) for @$srcs;
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
+ my $dd = $lei->{dd};
+ $dd->prepare_dedupe;
do {
$mset = $self->mset($mo->{qstr}, $mo);
for my $it ($mset->items) {
my $smsg = smsg_for($self, $it) or next;
- # next if $dd->is_smsg_dup($smsg);
+ next if $dd->is_smsg_dup($smsg);
$each_smsg->($smsg, $it);
# $self->out($buf .= $ORS) if defined $buf;
#$emit_cb->($smsg);
next prev parent reply other threads:[~2021-01-14 7:06 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-14 7:06 [PATCH 00/14] lei: another pile of changes Eric Wong
2021-01-14 7:06 ` [PATCH 01/14] cmd_ipc: support + test EINTR + EAGAIN, no FDs Eric Wong
2021-01-14 7:06 ` [PATCH 02/14] lei: test SIGPIPE, stop xsearch workers on client abort Eric Wong
2021-01-14 7:06 ` [PATCH 03/14] daemon+watch: fix localization of %SIG for non-signalfd users Eric Wong
2021-01-14 7:06 ` [PATCH 04/14] lei: do not unlink socket path at exit Eric Wong
2021-01-14 7:06 ` [PATCH 05/14] lei: reduce live FD references in wq child Eric Wong
2021-01-14 7:06 ` [PATCH 06/14] lei: rely on localized $current_lei for warnings Eric Wong
2021-01-14 7:06 ` [PATCH 07/14] lei_dedupe+shared_kv: ensure round-tripping serialization Eric Wong
2021-01-14 7:06 ` Eric Wong [this message]
2021-01-14 7:06 ` [PATCH 09/14] search: rename "ts:" prefix to "rt:" Eric Wong
2021-01-14 7:06 ` [PATCH 10/14] lei_overview: rename "references" to "refs" Eric Wong
2021-01-14 7:06 ` [PATCH 11/14] lei: q: lock stdout on overview output Eric Wong
2021-01-15 0:18 ` Eric Wong
2021-01-14 7:06 ` [PATCH 12/14] leixsearch: remove some commented out code Eric Wong
2021-01-14 7:06 ` [PATCH 13/14] lei: remove temporary var on open Eric Wong
2021-01-14 7:06 ` [PATCH 14/14] lei: pass FD to CWD via cmsg, use fchdir on server Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210114070627.18195-9-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).