* [PATCH 7/7] implement public-inbox-purge tool
2019-01-11 4:10 7% [PATCH 0/7] implement purge tool Eric Wong
@ 2019-01-11 4:10 4% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2019-01-11 4:10 UTC (permalink / raw)
To: meta
Expose the ->purge functionality of V2Writable for rewriting
git history to permanently purge messages from history. This
may be necessary for legal reasons.
Usage:
# requires ~/.public-inbox/config
public-inbox-purge --all </path/to/message-to-purge
# good for testing with unconfigured inboxes:
public-inbox-purge $INBOX_DIR </path/to/message-to-purge
---
MANIFEST | 2 +
script/public-inbox-purge | 111 ++++++++++++++++++++++++++++++++++++++
t/purge.t | 97 +++++++++++++++++++++++++++++++++
3 files changed, 210 insertions(+)
create mode 100755 script/public-inbox-purge
create mode 100644 t/purge.t
diff --git a/MANIFEST b/MANIFEST
index 5ac85c3..886ae6b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -128,6 +128,7 @@ script/public-inbox-init
script/public-inbox-learn
script/public-inbox-mda
script/public-inbox-nntpd
+script/public-inbox-purge
script/public-inbox-watch
script/public-inbox.cgi
scripts/dc-dlvr
@@ -198,6 +199,7 @@ t/psgi_multipart_not.t
t/psgi_search.t
t/psgi_text.t
t/psgi_v2.t
+t/purge.t
t/qspawn.t
t/reply.t
t/search-thr-index.t
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
new file mode 100755
index 0000000..688dd95
--- /dev/null
+++ b/script/public-inbox-purge
@@ -0,0 +1,111 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Used for purging messages entirely from a public-inbox. Currently
+# supports v2 inboxes only, for now.
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::Config;
+use PublicInbox::MIME;
+use PublicInbox::Admin qw(resolve_repo_dir);
+use PublicInbox::Filter::Base;
+*REJECT = *PublicInbox::Filter::Base::REJECT;
+
+my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
+
+eval { require PublicInbox::V2Writable } or die
+ "DBI, DBD::SQLite and Search::Xapian required for purge\n";
+my $config = eval { PublicInbox::Config->new };
+my $cfgfile = PublicInbox::Config::default_file();
+my ($all, $force);
+my $verbose = 1;
+my %opts = (
+ 'all' => \$all,
+ 'force|f' => \$force,
+ 'verbose|v!' => \$verbose,
+);
+GetOptions(%opts) or die "bad command-line args\n", $usage, "\n";
+
+# TODO: clean this up and share code with -index via ::Admin
+my %dir2ibx; # ( path => Inbox object )
+my @inboxes;
+$config and $config->each_inbox(sub {
+ my ($ibx) = @_;
+ push @inboxes, $ibx if $all && $ibx->{version} != 1;
+ $dir2ibx{$ibx->{mainrepo}} = $ibx;
+});
+
+if ($all) {
+ $config or die "--all specified, but $cfgfile not readable\n";
+ @ARGV and die "--all specified, but directories specified\n";
+} else {
+ my @err;
+ my @dirs = scalar(@ARGV) ? @ARGV : ('.');
+ my $u = 0;
+
+ foreach my $dir (@dirs) {
+ my $v;
+ my $dir = resolve_repo_dir($dir, \$v);
+ if ($v == 1) {
+ push @err, $dir;
+ next;
+ }
+ my $ibx = $dir2ibx{$dir} ||= do {
+ warn "$dir not configured in $cfgfile\n";
+ $u++;
+ my $name = "unconfigured-$u";
+ PublicInbox::Inbox->new({
+ version => 2,
+ name => $name,
+ -primary_address => "$name\@example.com",
+ mainrepo => $dir,
+ });
+ };
+ push @inboxes, $ibx;
+ }
+
+ if (@err) {
+ die "v1 inboxes currently not supported by -purge\n\t",
+ join("\n\t", @err), "\n";
+ }
+}
+
+my $data = do { local $/; scalar <STDIN> };
+$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+my $n_purged = 0;
+
+foreach my $ibx (@inboxes) {
+ my $mime = PublicInbox::MIME->new($data);
+ my $v2w = PublicInbox::V2Writable->new($ibx, 0);
+
+ my $commits = $v2w->purge($mime) || [];
+
+ if (my $scrub = $ibx->filter($v2w)) {
+ my $scrubbed = $scrub->scrub($mime, 1);
+
+ if ($scrubbed && $scrubbed != REJECT()) {
+ my $scrub_commits = $v2w->purge($scrubbed);
+ push @$commits, @$scrub_commits if $scrub_commits;
+ }
+ }
+
+ $v2w->done;
+
+ if ($verbose) { # should we consider this machine-parseable?
+ print "$ibx->{mainrepo}:";
+ if (scalar @$commits) {
+ print join("\n\t", '', @$commits), "\n";
+ } else {
+ print " NONE\n";
+ }
+ }
+ $n_purged += scalar @$commits;
+}
+
+# behave like "rm -f"
+exit(0) if ($force || $n_purged);
+
+warn "Not found\n" if $verbose;
+exit(1);
diff --git a/t/purge.t b/t/purge.t
new file mode 100644
index 0000000..9406005
--- /dev/null
+++ b/t/purge.t
@@ -0,0 +1,97 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+require './t/common.perl';
+require_git(2.6);
+my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "missing $_ for t/purge.t" if $@;
+};
+use Cwd qw(abs_path);
+my $purge = abs_path('blib/script/public-inbox-purge');
+my $tmpdir = tempdir('pi-purge-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+use_ok 'PublicInbox::V2Writable';
+my $mainrepo = "$tmpdir/v2";
+my $ibx = PublicInbox::Inbox->new({
+ mainrepo => $mainrepo,
+ name => 'test-v2purge',
+ version => 2,
+ -primary_address => 'test@example.com',
+ indexlevel => 'basic',
+});
+
+my $raw = <<'EOF';
+From: a@example.com
+To: test@example.com
+Subject: this is a subject
+Message-ID: <a-mid@b>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+Hello World
+
+EOF
+
+local $ENV{NPROC} = '1';
+my $cfgfile = "$tmpdir/config";
+local $ENV{PI_CONFIG} = $cfgfile;
+open my $cfg_fh, '>', $cfgfile or die "open: $!";
+
+my $v2w = PublicInbox::V2Writable->new($ibx, 1);
+my $mime = PublicInbox::MIME->new($raw);
+ok($v2w->add($mime), 'add message to be purged');
+$v2w->done;
+
+# failing cases, first:
+my $in = "$raw\nMOAR\n";
+my ($out, $err) = ('', '');
+ok(IPC::Run::run([$purge, '-f', $mainrepo], \$in, \$out, \$err),
+ 'purge -f OK');
+
+$out = $err = '';
+ok(!IPC::Run::run([$purge, $mainrepo], \$in, \$out, \$err),
+ 'mismatch fails without -f');
+is($? >> 8, 1, 'missed purge exits with 1');
+
+# a successful case:
+ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK');
+like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted');
+
+# add (old) vger filter to config file
+print $cfg_fh <<EOF or die "print $!";
+[publicinbox "test-v2purge"]
+ mainrepo = $mainrepo
+ address = test\@example.com
+ indexlevel = basic
+ filter = PublicInbox::Filter::Vger
+EOF
+close $cfg_fh or die "close: $!";
+
+ok($v2w->add($mime), 'add vger-signatured message to be purged');
+$v2w->done;
+
+my $pre_scrub = $raw . <<'EOF';
+
+--
+To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at http://vger.kernel.org/majordomo-info.html
+Please read the FAQ at http://www.tux.org/lkml/
+EOF
+
+$out = $err = '';
+ok(chdir('/'), "chdir / OK for --all test");
+ok(IPC::Run::run([$purge, '--all'], \$pre_scrub, \$out, \$err),
+ 'scrub purge OK');
+like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted');
+# diag "out: $out"; diag "err: $err";
+
+$out = $err = '';
+ok(!IPC::Run::run([$purge, '--all' ], \$pre_scrub, \$out, \$err),
+ 'scrub purge not idempotent without -f');
+# diag "out: $out"; diag "err: $err";
+
+done_testing();
--
EW
^ permalink raw reply related [relevance 4%]
* [PATCH 0/7] implement purge tool
@ 2019-01-11 4:10 7% Eric Wong
2019-01-11 4:10 4% ` [PATCH 7/7] implement public-inbox-purge tool Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2019-01-11 4:10 UTC (permalink / raw)
To: meta
Of course, I found and fixed a bunch of little purge bugs
in the process :x
Still need to WTFM so I can tell others to RTFM :>
The following changes since commit b0e5062d43a96372801713ef78a78d6a1bc852bc:
Merge commit 'mem' (2019-01-10 21:41:55 +0000)
are available in the Git repository at:
https://public-inbox.org/ purge
for you to fetch changes up to 440b0feaa209e12e4bcb8ef16a95041fce71e7dc:
implement public-inbox-purge tool (2019-01-11 04:07:17 +0000)
----------------------------------------------------------------
Eric Wong (7):
hoist out resolve_repo_dir from -index
import: purge: reap fast-export process
v2writable: ->purge returns undef on no-op
v2writable: purge ignores non-existent git epoch directories
v2writable: cleanup processes when done
v2writable: read epoch on purge
implement public-inbox-purge tool
MANIFEST | 4 ++
lib/PublicInbox/Admin.pm | 44 +++++++++++++++++
lib/PublicInbox/Import.pm | 3 +-
lib/PublicInbox/V2Writable.pm | 16 ++++--
script/public-inbox-index | 32 +-----------
script/public-inbox-purge | 111 ++++++++++++++++++++++++++++++++++++++++++
t/admin.t | 81 ++++++++++++++++++++++++++++++
t/purge.t | 97 ++++++++++++++++++++++++++++++++++++
t/v2writable.t | 3 ++
9 files changed, 357 insertions(+), 34 deletions(-)
create mode 100644 lib/PublicInbox/Admin.pm
create mode 100755 script/public-inbox-purge
create mode 100644 t/admin.t
create mode 100644 t/purge.t
--
EW
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2019-01-11 4:10 7% [PATCH 0/7] implement purge tool Eric Wong
2019-01-11 4:10 4% ` [PATCH 7/7] implement public-inbox-purge tool Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).