user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] lei: add some labels support
Date: Fri, 26 Mar 2021 06:29:37 +0200	[thread overview]
Message-ID: <20210326042937.15913-4-e@80x24.org> (raw)
In-Reply-To: <20210326042937.15913-1-e@80x24.org>

"lei q" now displays labels in JSON output, "lei mark"
can add or remove labels for any messages.

"lei ls-label" is supported, too.

Unfortunately, "lei q" won't hande "kw:" or "L:" for
external messages, they must be imported, first.
---
 MANIFEST                       |  1 +
 lib/PublicInbox/LEI.pm         |  1 +
 lib/PublicInbox/LeiLsLabel.pm  | 17 ++++++++++
 lib/PublicInbox/LeiMark.pm     |  6 ++--
 lib/PublicInbox/LeiOverview.pm |  4 +--
 lib/PublicInbox/LeiSearch.pm   | 37 ++++++++++++++++++---
 lib/PublicInbox/LeiStore.pm    | 59 +++++++++++++++++++++++++---------
 lib/PublicInbox/LeiXSearch.pm  | 13 +++++---
 lib/PublicInbox/Search.pm      |  6 ++--
 lib/PublicInbox/SearchIdx.pm   |  2 +-
 t/lei-mark.t                   | 46 +++++++++++++++++++++++---
 11 files changed, 156 insertions(+), 36 deletions(-)
 create mode 100644 lib/PublicInbox/LeiLsLabel.pm

diff --git a/MANIFEST b/MANIFEST
index 87e4b616..6b2b33ac 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -188,6 +188,7 @@ lib/PublicInbox/LeiExternal.pm
 lib/PublicInbox/LeiHelp.pm
 lib/PublicInbox/LeiImport.pm
 lib/PublicInbox/LeiInput.pm
+lib/PublicInbox/LeiLsLabel.pm
 lib/PublicInbox/LeiMark.pm
 lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index b42ba0ae..fab2af90 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -145,6 +145,7 @@ our %CMD = ( # sorted in order of importance/use:
 	PublicInbox::LeiQuery::curl_opt() ],
 'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations',
 	qw(format|f=s z|0 globoff|g invert-match|v local remote), @c_opt ],
+'ls-label' => [ '', 'list labels', qw(z|0 stats:s), @c_opt ],
 'forget-external' => [ 'LOCATION...|--prune',
 	'exclude further results from a publicinbox|extindex',
 	qw(prune), @c_opt ],
diff --git a/lib/PublicInbox/LeiLsLabel.pm b/lib/PublicInbox/LeiLsLabel.pm
new file mode 100644
index 00000000..474224d4
--- /dev/null
+++ b/lib/PublicInbox/LeiLsLabel.pm
@@ -0,0 +1,17 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei ls-label" command
+package PublicInbox::LeiLsLabel;
+use strict;
+use v5.10.1;
+
+sub lei_ls_label { # the "lei ls-label" method
+	my ($lei, @argv) = @_;
+	# TODO: document stats/counts (expensive)
+	my @L = eval { $lei->_lei_store->search->all_terms('L') };
+	my $ORS = $lei->{opt}->{z} ? "\0" : "\n";
+	$lei->out(map { $_.$ORS } @L);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm
index 9d77f4b4..7a2ccf77 100644
--- a/lib/PublicInbox/LeiMark.pm
+++ b/lib/PublicInbox/LeiMark.pm
@@ -60,7 +60,7 @@ sub vmd_mod_extract {
 sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml) = @_;
 	if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) {
-		$self->{lei}->{sto}->ipc_do('update_xvmd', $xoids,
+		$self->{lei}->{sto}->ipc_do('update_xvmd', $xoids, $eml,
 						$self->{vmd_mod});
 	} else {
 		++$self->{missing};
@@ -168,7 +168,9 @@ sub _complete_mark_common ($) {
 # FIXME: same problems as _complete_forget_external and similar
 sub _complete_mark {
 	my ($self, @argv) = @_;
-	my @all = map { ("+kw:$_", "-kw:$_") } @KW;
+	my @L = eval { $self->_lei_store->search->all_terms('L') };
+	my @all = ((map { ("+kw:$_", "-kw:$_") } @KW),
+		(map { ("+L:$_", "-L:$_") } @L));
 	return @all if !@argv;
 	my ($cur, $re) = _complete_mark_common(\@argv);
 	map {
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 1ce2a098..b4d81328 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -227,7 +227,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		sub { # DIY prettiness :P
 			my ($smsg, $mitem) = @_;
 			return if $dedupe->is_smsg_dup($smsg);
-			$lse->xsmsg_vmd($smsg);
+			$lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1);
 			$smsg = _unbless_smsg($smsg, $mitem);
 			$buf .= "{\n";
 			$buf .= join(",\n", map {
@@ -251,7 +251,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		sub {
 			my ($smsg, $mitem) = @_;
 			return if $dedupe->is_smsg_dup($smsg);
-			$lse->xsmsg_vmd($smsg);
+			$lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1);
 			$buf .= $json->encode(_unbless_smsg(@_)) . $ORS;
 			return if length($buf) < 65536;
 			my $lk = $self->lock_for_scope;
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index bbb00661..07d570ec 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -27,18 +27,25 @@ sub msg_keywords {
 	wantarray ? sort(keys(%$kw)) : $kw;
 }
 
+# lookup keywords+labels for external messages
 sub xsmsg_vmd {
-	my ($self, $smsg) = @_;
+	my ($self, $smsg, $want_label) = @_;
 	return if $smsg->{kw};
 	my $xdb = $self->xdb; # set {nshard};
-	my %kw;
+	my (%kw, %L, $doc, $x);
 	$kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged});
 	my @num = $self->over->blob_exists($smsg->{blob});
 	for my $num (@num) { # there should only be one...
-		my $kw = xap_terms('K', $xdb, num2docid($self, $num));
-		%kw = (%kw, %$kw);
+		$doc = $xdb->get_document(num2docid($self, $num));
+		$x = xap_terms('K', $doc);
+		%kw = (%kw, %$x);
+		if ($want_label) { # JSON/JMAP only
+			$x = xap_terms('L', $doc);
+			%L = (%L, %$x);
+		}
 	}
 	$smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw));
+	$smsg->{L} = [ sort keys %L ] if scalar(keys(%L));
 }
 
 # when a message has no Message-IDs at all, this is needed for
@@ -100,4 +107,26 @@ sub kw_changed {
 	join("\0", @$new_kw_sorted) eq join("\0", @cur_kw) ? 0 : 1;
 }
 
+sub all_terms {
+	my ($self, $pfx) = @_;
+	my $xdb = $self->xdb;
+	my $cur = $xdb->allterms_begin($pfx);
+	my $end = $xdb->allterms_end($pfx);
+	my %ret;
+	for (; $cur != $end; $cur++) {
+		my $tn = $cur->get_termname;
+		index($tn, $pfx) == 0 and
+			$ret{substr($tn, length($pfx))} = undef;
+	}
+	wantarray ? (sort keys %ret) : \%ret;
+}
+
+sub qparse_new {
+	my ($self) = @_;
+	my $qp = $self->SUPER::qparse_new; # PublicInbox::Search
+	$qp->add_boolean_prefix('kw', 'K');
+	$qp->add_boolean_prefix('L', 'L');
+	$qp
+}
+
 1;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 1311ad46..b76af4d3 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -228,8 +228,30 @@ sub set_eml {
 		set_eml_vmd($self, $eml, $vmd);
 }
 
+sub _external_only ($$$) {
+	my ($self, $xoids, $eml) = @_;
+	my $eidx = $self->{priv_eidx};
+	my $oidx = $eidx->{oidx} // die 'BUG: {oidx} missing';
+	my $smsg = bless { blob => '' }, 'PublicInbox::Smsg';
+	$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+	# save space for an externals-only message
+	my $hdr = $eml->header_obj;
+	$smsg->populate($hdr); # sets lines == 0
+	$smsg->{bytes} = 0;
+	delete @$smsg{qw(From Subject)};
+	$smsg->{to} = $smsg->{cc} = $smsg->{from} = '';
+	$oidx->add_overview($hdr, $smsg); # subject+references for threading
+	$smsg->{subject} = '';
+	for my $oid (keys %$xoids) {
+		$oidx->add_xref3($smsg->{num}, -1, $oid, '.');
+	}
+	my $idx = $eidx->idx_shard($smsg->{num});
+	$idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg);
+	($smsg, $idx);
+}
+
 sub update_xvmd {
-	my ($self, $xoids, $vmd_mod) = @_;
+	my ($self, $xoids, $eml, $vmd_mod) = @_;
 	my $eidx = eidx_init($self);
 	my $oidx = $eidx->{oidx};
 	my %seen;
@@ -242,7 +264,25 @@ sub update_xvmd {
 			my $idx = $eidx->idx_shard($docid);
 			$idx->ipc_do('update_vmd', $docid, $vmd_mod);
 		}
+		delete $xoids->{$oid};
 	}
+	return unless scalar(keys(%$xoids));
+
+	# see if it was indexed, but with different OID(s)
+	if (my @docids = _docids_for($self, $eml)) {
+		for my $docid (@docids) {
+			next if $seen{$docid};
+			for my $oid (keys %$xoids) {
+				$oidx->add_xref3($docid, -1, $oid, '.');
+			}
+			my $idx = $eidx->idx_shard($docid);
+			$idx->ipc_do('update_vmd', $docid, $vmd_mod);
+		}
+		return;
+	}
+	# totally unseen
+	my ($smsg, $idx) = _external_only($self, $xoids, $eml);
+	$idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod);
 }
 
 # set or update keywords for external message, called via ipc_do
@@ -270,6 +310,7 @@ sub set_xvmd {
 	# see if it was indexed, but with different OID(s)
 	if (my @docids = _docids_for($self, $eml)) {
 		for my $docid (@docids) {
+			next if $seen{$docid};
 			for my $oid (keys %$xoids) {
 				$oidx->add_xref3($docid, -1, $oid, '.');
 			}
@@ -279,21 +320,7 @@ sub set_xvmd {
 		return;
 	}
 	# totally unseen
-	my $smsg = bless { blob => '' }, 'PublicInbox::Smsg';
-	$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
-	# save space for an externals-only message
-	my $hdr = $eml->header_obj;
-	$smsg->populate($hdr); # sets lines == 0
-	$smsg->{bytes} = 0;
-	delete @$smsg{qw(From Subject)};
-	$smsg->{to} = $smsg->{cc} = $smsg->{from} = '';
-	$oidx->add_overview($hdr, $smsg); # subject+references for threading
-	$smsg->{subject} = '';
-	for my $oid (keys %$xoids) {
-		$oidx->add_xref3($smsg->{num}, -1, $oid, '.');
-	}
-	my $idx = $eidx->idx_shard($smsg->{num});
-	$idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg);
+	my ($smsg, $idx) = _external_only($self, $xoids, $eml);
 	$idx->ipc_do('add_vmd', $smsg->{num}, $vmd);
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 386c4eba..f64b2c62 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -69,11 +69,13 @@ sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
 
 sub mitem_kw ($$;$) {
 	my ($smsg, $mitem, $flagged) = @_;
-	my $kw = xap_terms('K', $mitem->get_document);
+	my $kw = xap_terms('K', my $doc = $mitem->get_document);
 	$kw->{flagged} = 1 if $flagged;
-	# we keep the empty array here to prevent expensive work in
+	# we keep the empty {kw} array here to prevent expensive work in
 	# ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
 	$smsg->{kw} = [ sort keys %$kw ];
+	my $L = xap_terms('L', $doc);
+	$smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
 }
 
 # like over->get_art
@@ -86,8 +88,10 @@ sub smsg_for {
 	my $num = int(($docid - 1) / $nshard) + 1;
 	my $ibx = $self->{shard2ibx}->[$shard];
 	my $smsg = $ibx->over->get_art($num);
-	return if $smsg->{bytes} == 0;
-	mitem_kw($smsg, $mitem) if $ibx->can('msg_keywords');
+	return if $smsg->{bytes} == 0; # external message
+	if ($ibx->can('msg_keywords')) {
+		mitem_kw($smsg, $mitem);
+	}
 	$smsg;
 }
 
@@ -170,6 +174,7 @@ sub query_thread_mset { # for --threads
 					if ($can_kw) {
 						mitem_kw($smsg, $mitem, $fl);
 					} elsif ($fl) {
+						# call ->xsmsg_vmd, later
 						$smsg->{lei_q_tt_flagged} = 1;
 					}
 				}
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index c7d52daf..ab04d430 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -370,7 +370,7 @@ sub query_approxidate {
 sub mset {
 	my ($self, $query_string, $opts) = @_;
 	$opts ||= {};
-	my $qp = $self->{qp} //= qparse_new($self);
+	my $qp = $self->{qp} //= $self->qparse_new;
 	my $query = $qp->parse_query($query_string, $self->{qp_flags});
 	_do_enquire($self, $query, $opts);
 }
@@ -463,7 +463,7 @@ sub mset_to_smsg {
 sub stemmer { $X{Stem}->new($LANG) }
 
 # read-only
-sub qparse_new ($) {
+sub qparse_new {
 	my ($self) = @_;
 
 	my $xdb = xdb($self);
@@ -516,7 +516,7 @@ EOF
 
 sub help {
 	my ($self) = @_;
-	$self->{qp} //= qparse_new($self); # parse altids
+	$self->{qp} //= $self->qparse_new; # parse altids
 	my @ret = @HELP;
 	if (my $user_pfx = $self->{-user_pfx}) {
 		push @ret, @$user_pfx;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 7d46489c..ca1f3588 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -35,7 +35,7 @@ use constant DEBUG => !!$ENV{DEBUG};
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
-my @VMD_MAP = (kw => 'K', label => 'L');
+my @VMD_MAP = (kw => 'K', L => 'L');
 our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/;
 
 sub new {
diff --git a/t/lei-mark.t b/t/lei-mark.t
index 76995589..23f5002e 100644
--- a/t/lei-mark.t
+++ b/t/lei-mark.t
@@ -4,22 +4,32 @@
 use strict; use v5.10.1; use PublicInbox::TestCommon;
 require_git 2.6;
 require_mods(qw(json DBD::SQLite Search::Xapian));
+my ($ro_home, $cfg_path) = setup_public_inboxes;
 my $check_kw = sub {
 	my ($exp, %opt) = @_;
+	my $args = $opt{args} // [];
 	my $mid = $opt{mid} // 'testmessage@example.com';
-	lei_ok('q', "m:$mid");
+	lei_ok('q', "m:$mid", @$args);
 	my $res = json_utf8->decode($lei_out);
 	is($res->[1], undef, 'only got one result');
 	my $msg = $opt{msg} ? " $opt{msg}" : '';
 	($exp ? is_deeply($res->[0]->{kw}, $exp, "got @$exp$msg")
 		: is($res->[0]->{kw}, undef, "got undef$msg")) or
 			diag explain($res);
+	if (exists $opt{L}) {
+		$exp = $opt{L};
+		($exp ? is_deeply($res->[0]->{L}, $exp, "got @$exp$msg")
+			: is($res->[0]->{L}, undef, "got undef$msg")) or
+				diag explain($res);
+	}
 };
 
 test_lei(sub {
+	lei_ok(qw(ls-label)); is($lei_out, '', 'no labels, yet');
 	lei_ok(qw(import -F eml t/utf8.eml));
-	lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged));
-	$check_kw->(['flagged']);
+	lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged +L:urgent));
+	$check_kw->(['flagged'], L => ['urgent']);
+	lei_ok(qw(ls-label)); is($lei_out, "urgent\n", 'label found');
 	ok(!lei(qw(mark -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected');
 	like($lei_err, qr/`seeen' is not one of/, 'got helpful error');
 	ok(!lei(qw(mark -F eml t/utf8.eml +k:seen)), 'bad prefix rejected');
@@ -41,7 +51,35 @@ test_lei(sub {
 	$check_kw->(['answered'], msg => 'Maildir Status ignored');
 
 	open my $in, '<', 't/utf8.eml' or BAIL_OUT $!;
-	lei_ok([qw(mark -F eml - +kw:seen)], undef, { %$lei_opt, 0 => $in });
+	lei_ok([qw(mark -F eml - +kw:seen +L:nope)],
+		undef, { %$lei_opt, 0 => $in });
 	$check_kw->(['answered', 'seen'], msg => 'stdin works');
+	lei_ok(qw(q L:urgent));
+	my $res = json_utf8->decode($lei_out);
+	is($res->[0]->{'m'}, 'testmessage@example.com', 'L: query works');
+	lei_ok(qw(q kw:seen));
+	my $r2 = json_utf8->decode($lei_out);
+	is_deeply($r2, $res, 'kw: query works, too') or
+		diag explain([$r2, $res]);
+
+	lei_ok(qw(_complete lei mark));
+	my %c = map { $_ => 1 } split(/\s+/, $lei_out);
+	ok($c{'+L:urgent'} && $c{'-L:urgent'} &&
+		$c{'+L:nope'} && $c{'-L:nope'}, 'completed with labels');
+
+	my $mid = 'qp@example.com';
+	lei_ok qw(q -f mboxrd --only), "$ro_home/t2", "mid:$mid";
+	$in = $lei_out;
+	lei_ok [qw(mark -F mboxrd --stdin +kw:seen +L:qp)],
+		undef, { %$lei_opt, 0 => \$in };
+	$check_kw->(['seen'], L => ['qp'], mid => $mid,
+			args => [ '--only', "$ro_home/t2" ],
+			msg => 'external-only message');
+	lei_ok(qw(ls-label));
+	is($lei_out, "nope\nqp\nurgent\n", 'ls-label shows qp');
+
+	if (0) { # TODO label+kw search w/ externals
+		lei_ok(qw(q L:qp), "mid:$mid", '--only', "$ro_home/t2");
+	}
 });
 done_testing;

  parent reply	other threads:[~2021-03-26  4:29 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-26  4:29 [PATCH 0/3] lei labels support Eric Wong
2021-03-26  4:29 ` [PATCH 1/3] lei_xsearch: wait for kw updates for non-threaded case, too Eric Wong
2021-03-26  4:29 ` [PATCH 2/3] lei: _lei_store: use default even if unconfigured Eric Wong
2021-03-26  5:01   ` [SQUASH 4/3] lei: account for unconfigured leistore.dir Eric Wong
2021-03-26  4:29 ` Eric Wong [this message]
2021-03-26 10:31 ` labels for externals [was: lei labels support] Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210326042937.15913-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).