user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei rm: new command to remove messages from index
Date: Wed, 26 May 2021 23:50:07 +0000	[thread overview]
Message-ID: <20210526235007.3881-1-e@80x24.org> (raw)

This is similar to "public-inbox-learn rm", but it's
possible to point an entire Maildir/IMAP/mbox*/newsgroup
at it.
---
 MANIFEST                    |  1 +
 lib/PublicInbox/LEI.pm      |  5 +++-
 lib/PublicInbox/LeiRm.pm    | 50 +++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiStore.pm | 29 ++++++++++++++++++++-
 t/lei-import-maildir.t      |  7 ++++++
 5 files changed, 90 insertions(+), 2 deletions(-)
 create mode 100644 lib/PublicInbox/LeiRm.pm

diff --git a/MANIFEST b/MANIFEST
index 23423e0b..0b4bb380 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -223,6 +223,7 @@ lib/PublicInbox/LeiP2q.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
 lib/PublicInbox/LeiRemote.pm
+lib/PublicInbox/LeiRm.pm
 lib/PublicInbox/LeiSavedSearch.pm
 lib/PublicInbox/LeiSearch.pm
 lib/PublicInbox/LeiStore.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 6ff249d0..7acc05bf 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -206,7 +206,10 @@ our %CMD = ( # sorted in order of importance/use:
 		qw(verbose|v+), @c_opt ],
 'edit-search' => [ 'OUTPUT', "edit saved search via `git config --edit'",
 			@c_opt ],
-
+'rm' => [ '--stdin|LOCATION...',
+	'remove a message from the index and prevent reindexing',
+	'stdin|', # /|\z/ must be first for lone dash
+	@c_opt ],
 'plonk' => [ '--threads|--from=IDENT',
 	'exclude mail matching From: or threads from non-Message-ID searches',
 	qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
diff --git a/lib/PublicInbox/LeiRm.pm b/lib/PublicInbox/LeiRm.pm
new file mode 100644
index 00000000..185b6a15
--- /dev/null
+++ b/lib/PublicInbox/LeiRm.pm
@@ -0,0 +1,50 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# implements the "lei rm" command, you can point this at
+# an entire spam mailbox or read a message from stdin
+package PublicInbox::LeiRm;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+	my ($self, $eml) = @_;
+	$self->{lei}->{sto}->ipc_do('remove_eml', $eml);
+}
+
+sub input_mbox_cb { # MboxReader callback
+	my ($eml, $self) = @_;
+	input_eml_cb($self, $eml);
+}
+
+sub input_net_cb { # callback for ->imap_each, ->nntp_each
+	my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored
+	input_eml_cb($self, $eml);
+}
+
+sub input_maildir_cb {
+	my (undef, $kw, $eml, $self) = @_; # $_[0] $filename ignored
+	input_eml_cb($self, $eml);
+}
+
+sub lei_rm {
+	my ($lei, @inputs) = @_;
+	$lei->_lei_store(1)->write_prepare($lei);
+	$lei->{opt}->{stdin} = 1 if !@inputs;
+	$lei->{opt}->{'in-format'} //= 'eml';
+	my $self = bless { -wq_nr_workers => 1 }, __PACKAGE__;
+	$self->prepare_inputs($lei, \@inputs) or return;
+	my ($op_c, $ops) = $lei->workers_start($self, 1);
+	$lei->{wq1} = $self;
+	$lei->{-err_type} = 'non-fatal';
+	net_merge_all_done($self) unless $lei->{auth};
+	$op_c->op_wait_event($ops);
+}
+
+no warnings 'once';
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all;
+
+1;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index af5edbc2..6888afb4 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -183,7 +183,7 @@ sub add_eml_vmd {
 	\@docids;
 }
 
-sub remove_eml_vmd {
+sub remove_eml_vmd { # remove just the VMD
 	my ($self, $eml, $vmd) = @_;
 	my ($eidx, $tl) = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
@@ -204,6 +204,33 @@ sub set_sync_info {
 	})->set_src($oidhex, $folder, $id);
 }
 
+sub _remove_if_local { # git->cat_async arg
+	my ($bref, $oidhex, $type, $size, $self) = @_;
+	$self->{im}->remove($bref) if $bref;
+}
+
+# remove the entire message from the index, does not touch mail_sync.sqlite3
+sub remove_eml {
+	my ($self, $eml) = @_;
+	my $im = $self->importer; # may create new epoch
+	my ($eidx, $tl) = eidx_init($self);
+	my $oidx = $eidx->{oidx};
+	my @docids = _docids_for($self, $eml);
+	my $git = $eidx->git;
+	for my $docid (@docids) {
+		my $xr3 = $oidx->get_xref3($docid, 1);
+		for my $row (@$xr3) {
+			my (undef, undef, $oidbin) = @$row;
+			my $oidhex = unpack('H*', $oidbin);
+			$git->cat_async($oidhex, \&_remove_if_local, $self);
+		}
+		$eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+		$oidx->delete_by_num($docid);
+	}
+	$git->cat_async_wait;
+	\@docids;
+}
+
 sub add_eml {
 	my ($self, $eml, $vmd, $xoids) = @_;
 	my $im = $self->{-fake_im} // $self->importer; # may create new epoch
diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t
index f813440a..688b10ce 100644
--- a/t/lei-import-maildir.t
+++ b/t/lei-import-maildir.t
@@ -68,5 +68,12 @@ test_lei(sub {
 	$res = json_utf8->decode($lei_out);
 	is_deeply($res, [ undef ], 'trashed message not imported')
 			or diag explain($imp_err, $res);
+
+	lei_ok qw(rm t/data/0001.patch);
+	lei_ok(qw(q s:boolean));
+	is($lei_out, "[null]\n", 'removed message gone from results');
+	my $g0 = "$ENV{HOME}/.local/share/lei/store/local/0.git";
+	my $x = xqx(['git', "--git-dir=$g0", qw(cat-file blob HEAD:d)]);
+	is($?, 0, "git cat-file shows file is `d'");
 });
 done_testing;

                 reply	other threads:[~2021-05-26 23:50 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210526235007.3881-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH] lei rm: new command to remove messages from index' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git