From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A742F2141F for ; Fri, 11 Jan 2019 04:10:11 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 7/7] implement public-inbox-purge tool Date: Fri, 11 Jan 2019 04:10:08 +0000 Message-Id: <20190111041008.24361-8-e@80x24.org> In-Reply-To: <20190111041008.24361-1-e@80x24.org> References: <20190111041008.24361-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Expose the ->purge functionality of V2Writable for rewriting git history to permanently purge messages from history. This may be necessary for legal reasons. Usage: # requires ~/.public-inbox/config public-inbox-purge --all +# License: AGPL-3.0+ +# +# Used for purging messages entirely from a public-inbox. Currently +# supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::Config; +use PublicInbox::MIME; +use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; + +my $usage = "$0 [--all] [INBOX_DIRS] new }; +my $cfgfile = PublicInbox::Config::default_file(); +my ($all, $force); +my $verbose = 1; +my %opts = ( + 'all' => \$all, + 'force|f' => \$force, + 'verbose|v!' => \$verbose, +); +GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; + +# TODO: clean this up and share code with -index via ::Admin +my %dir2ibx; # ( path => Inbox object ) +my @inboxes; +$config and $config->each_inbox(sub { + my ($ibx) = @_; + push @inboxes, $ibx if $all && $ibx->{version} != 1; + $dir2ibx{$ibx->{mainrepo}} = $ibx; +}); + +if ($all) { + $config or die "--all specified, but $cfgfile not readable\n"; + @ARGV and die "--all specified, but directories specified\n"; +} else { + my @err; + my @dirs = scalar(@ARGV) ? @ARGV : ('.'); + my $u = 0; + + foreach my $dir (@dirs) { + my $v; + my $dir = resolve_repo_dir($dir, \$v); + if ($v == 1) { + push @err, $dir; + next; + } + my $ibx = $dir2ibx{$dir} ||= do { + warn "$dir not configured in $cfgfile\n"; + $u++; + my $name = "unconfigured-$u"; + PublicInbox::Inbox->new({ + version => 2, + name => $name, + -primary_address => "$name\@example.com", + mainrepo => $dir, + }); + }; + push @inboxes, $ibx; + } + + if (@err) { + die "v1 inboxes currently not supported by -purge\n\t", + join("\n\t", @err), "\n"; + } +} + +my $data = do { local $/; scalar }; +$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $n_purged = 0; + +foreach my $ibx (@inboxes) { + my $mime = PublicInbox::MIME->new($data); + my $v2w = PublicInbox::V2Writable->new($ibx, 0); + + my $commits = $v2w->purge($mime) || []; + + if (my $scrub = $ibx->filter($v2w)) { + my $scrubbed = $scrub->scrub($mime, 1); + + if ($scrubbed && $scrubbed != REJECT()) { + my $scrub_commits = $v2w->purge($scrubbed); + push @$commits, @$scrub_commits if $scrub_commits; + } + } + + $v2w->done; + + if ($verbose) { # should we consider this machine-parseable? + print "$ibx->{mainrepo}:"; + if (scalar @$commits) { + print join("\n\t", '', @$commits), "\n"; + } else { + print " NONE\n"; + } + } + $n_purged += scalar @$commits; +} + +# behave like "rm -f" +exit(0) if ($force || $n_purged); + +warn "Not found\n" if $verbose; +exit(1); diff --git a/t/purge.t b/t/purge.t new file mode 100644 index 0000000..9406005 --- /dev/null +++ b/t/purge.t @@ -0,0 +1,97 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); +my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "missing $_ for t/purge.t" if $@; +}; +use Cwd qw(abs_path); +my $purge = abs_path('blib/script/public-inbox-purge'); +my $tmpdir = tempdir('pi-purge-XXXXXX', TMPDIR => 1, CLEANUP => 1); +use_ok 'PublicInbox::V2Writable'; +my $mainrepo = "$tmpdir/v2"; +my $ibx = PublicInbox::Inbox->new({ + mainrepo => $mainrepo, + name => 'test-v2purge', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', +}); + +my $raw = <<'EOF'; +From: a@example.com +To: test@example.com +Subject: this is a subject +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +Hello World + +EOF + +local $ENV{NPROC} = '1'; +my $cfgfile = "$tmpdir/config"; +local $ENV{PI_CONFIG} = $cfgfile; +open my $cfg_fh, '>', $cfgfile or die "open: $!"; + +my $v2w = PublicInbox::V2Writable->new($ibx, 1); +my $mime = PublicInbox::MIME->new($raw); +ok($v2w->add($mime), 'add message to be purged'); +$v2w->done; + +# failing cases, first: +my $in = "$raw\nMOAR\n"; +my ($out, $err) = ('', ''); +ok(IPC::Run::run([$purge, '-f', $mainrepo], \$in, \$out, \$err), + 'purge -f OK'); + +$out = $err = ''; +ok(!IPC::Run::run([$purge, $mainrepo], \$in, \$out, \$err), + 'mismatch fails without -f'); +is($? >> 8, 1, 'missed purge exits with 1'); + +# a successful case: +ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK'); +like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); + +# add (old) vger filter to config file +print $cfg_fh <add($mime), 'add vger-signatured message to be purged'); +$v2w->done; + +my $pre_scrub = $raw . <<'EOF'; + +-- +To unsubscribe from this list: send the line "unsubscribe linux-kernel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html +Please read the FAQ at http://www.tux.org/lkml/ +EOF + +$out = $err = ''; +ok(chdir('/'), "chdir / OK for --all test"); +ok(IPC::Run::run([$purge, '--all'], \$pre_scrub, \$out, \$err), + 'scrub purge OK'); +like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); +# diag "out: $out"; diag "err: $err"; + +$out = $err = ''; +ok(!IPC::Run::run([$purge, '--all' ], \$pre_scrub, \$out, \$err), + 'scrub purge not idempotent without -f'); +# diag "out: $out"; diag "err: $err"; + +done_testing(); -- EW