user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/5] lei: reinstate JSON smsg output deduplication
Date: Sun, 24 Jan 2021 17:18:53 -0800	[thread overview]
Message-ID: <20210125011857.563-2-e@80x24.org> (raw)
In-Reply-To: <20210125011857.563-1-e@80x24.org>

This was accidentally clobbered completely in
("lei q: fix JSON overview with remote externals").
There are now more tests to prevent future regressions.
---
 lib/PublicInbox/LeiOverview.pm |  7 ++++++-
 t/lei.t                        | 19 ++++++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 928d66cb..880c7acc 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -203,12 +203,14 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 	my ($self, $lei, $ibxish) = @_;
 	my $json;
 	$lei->{1}->autoflush(1);
+	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
 	if (my $pkg = $self->{json}) {
 		$json = $pkg->new;
 		$json->utf8->canonical;
 		$json->ascii(1) if $lei->{opt}->{ascii};
+		$lei->{ovv_buf} = \(my $buf = '');
 	}
-	my $l2m = $lei->{l2m};
+	my $l2m = $lei->{l2m} or $dedupe->prepare_dedupe;
 	if ($l2m && !$ibxish) { # remote https?:// mboxrd
 		delete $l2m->{-wq_s1};
 		my $g2m = $l2m->can('git_to_mail');
@@ -241,6 +243,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git
 		$self->{git} = $git; # for ovv_atexit_child
 		my $g2m = $l2m->can('git_to_mail');
+		$dedupe->prepare_dedupe;
 		sub {
 			my ($smsg, $mitem) = @_;
 			$smsg->{pct} = get_pct($mitem) if $mitem;
@@ -251,6 +254,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$lei->{ovv_buf} = \(my $buf = '');
 		sub { # DIY prettiness :P
 			my ($smsg, $mitem) = @_;
+			return if $dedupe->is_smsg_dup($smsg);
 			$smsg = _unbless_smsg($smsg, $mitem);
 			$buf .= "{\n";
 			$buf .= join(",\n", map {
@@ -274,6 +278,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$lei->{ovv_buf} = \(my $buf = '');
 		sub {
 			my ($smsg, $mitem) = @_;
+			return if $dedupe->is_smsg_dup($smsg);
 			$buf .= $json->encode(_unbless_smsg(@_)) . $ORS;
 			if (length($buf) > 65536) {
 				my $lk = $self->lock_for_scope;
diff --git a/t/lei.t b/t/lei.t
index 3fd1d1fe..f826a966 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -17,6 +17,7 @@ my $err_filter;
 my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/
 	http://czquwvybam4bgbro.onion/meta/
 	http://ou63pmih66umazou.onion/meta/);
+my $json = ref(PublicInbox::Config->json)->new->utf8->canonical;
 my $lei = sub {
 	my ($cmd, $env, $xopt) = @_;
 	$out = $err = '';
@@ -142,8 +143,7 @@ my $setup_publicinboxes = sub {
 		my ($ibx) = @_;
 		my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
 		my $V = $ibx->version;
-		my @eml = glob('t/*.eml');
-		push(@eml, 't/data/0001.patch') if $V == 2;
+		my @eml = (glob('t/*.eml'), 't/data/0001.patch');
 		for (@eml) {
 			next if $_ eq 't/psgi_v2-old.eml'; # dup mid
 			$im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
@@ -176,7 +176,7 @@ SKIP: {
 	my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
 	ok($lei->('q', "m:$mid"), "query $url");
 	is($err, '', "no errors on $url");
-	my $res = PublicInbox::Config->json->decode($out);
+	my $res = $json->decode($out);
 	is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
 	ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
 	like($err, qr/404/, 'noted 404');
@@ -246,6 +246,19 @@ my $test_external = sub {
 	# No double-quoting should be imposed on users on the CLI
 	$lei->('q', 's:use boolean prefix');
 	like($out, qr/search: use boolean prefix/, 'phrase search got result');
+	my $res = $json->decode($out);
+	is(scalar(@$res), 2, 'only 2 element array (1 result)');
+	is($res->[1], undef, 'final element is undef'); # XXX should this be?
+	is(ref($res->[0]), 'HASH', 'first element is hashref');
+	$lei->('q', '--pretty', 's:use boolean prefix');
+	my $pretty = $json->decode($out);
+	is_deeply($res, $pretty, '--pretty is identical after decode');
+
+	for my $fmt (qw(ldjson ndjson jsonl)) {
+		$lei->('q', '-f', $fmt, 's:use boolean prefix');
+		is($out, $json->encode($pretty->[0])."\n", "-f $fmt");
+	}
+
 	require IO::Uncompress::Gunzip;
 	for my $sfx ('', '.gz') {
 		my $f = "$home/mbox$sfx";

  reply	other threads:[~2021-01-25  1:18 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-25  1:18 [PATCH 0/5] lei: more fixes and usability enhancement Eric Wong
2021-01-25  1:18 ` Eric Wong [this message]
2021-01-25  1:18 ` [PATCH 2/5] lei q: drop "oid" output format Eric Wong
2021-01-25  1:18 ` [PATCH 3/5] lei q: demangle and quiet curl output Eric Wong
2021-01-25  1:18 ` [PATCH 4/5] lei q: reject remotes early if curl(1) is missing Eric Wong
2021-01-25  1:18 ` [PATCH 5/5] lei q: continue remote search if torsocks(1) " Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210125011857.563-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).