diff options
author | Eric Wong <e@80x24.org> | 2019-01-31 21:08:48 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2019-01-31 21:08:48 +0000 |
commit | 65323f060a3db731bb9fafa004336eeb4bbb8f00 (patch) | |
tree | ceaff52229c942314524f4bcac650e1db175c773 /script | |
parent | ce4fe8f5144f7555ddd42b6a94ec602e042c6e43 (diff) | |
parent | 440b0feaa209e12e4bcb8ef16a95041fce71e7dc (diff) | |
download | public-inbox-65323f060a3db731bb9fafa004336eeb4bbb8f00.tar.gz |
* origin/purge: implement public-inbox-purge tool v2writable: read epoch on purge v2writable: cleanup processes when done v2writable: purge ignores non-existent git epoch directories v2writable: ->purge returns undef on no-op import: purge: reap fast-export process hoist out resolve_repo_dir from -index
Diffstat (limited to 'script')
-rwxr-xr-x | script/public-inbox-index | 32 | ||||
-rwxr-xr-x | script/public-inbox-purge | 111 |
2 files changed, 113 insertions, 30 deletions
diff --git a/script/public-inbox-index b/script/public-inbox-index index 2ae92757..5adb6e74 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -9,9 +9,10 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use Cwd 'abs_path'; my $usage = "public-inbox-index REPO_DIR"; use PublicInbox::Config; +use PublicInbox::Admin qw(resolve_repo_dir); + my $config = eval { PublicInbox::Config->new } || eval { warn "public-inbox unconfigured for serving, indexing anyways...\n"; undef; @@ -35,35 +36,6 @@ die "--jobs must be positive\n" if defined $jobs && $jobs < 0; my @dirs; -sub resolve_repo_dir { - my ($cd) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - return abs_path($prefix); - } - - my @cmd = qw(git rev-parse --git-dir); - my $cmd = join(' ', @cmd); - my $pid = open my $fh, '-|'; - defined $pid or die "forking $cmd failed: $!\n"; - if ($pid == 0) { - if (defined $cd) { - chdir $cd or die "chdir $cd failed: $!\n"; - } - exec @cmd; - die "Failed to exec $cmd: $!\n"; - } else { - my $dir = eval { - local $/; - <$fh>; - }; - close $fh or die "error in $cmd: $!\n"; - chomp $dir; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); - } -} - if (@ARGV) { @dirs = map { resolve_repo_dir($_) } @ARGV; } else { diff --git a/script/public-inbox-purge b/script/public-inbox-purge new file mode 100755 index 00000000..688dd950 --- /dev/null +++ b/script/public-inbox-purge @@ -0,0 +1,111 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Used for purging messages entirely from a public-inbox. Currently +# supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::Config; +use PublicInbox::MIME; +use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; + +my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message"; + +eval { require PublicInbox::V2Writable } or die + "DBI, DBD::SQLite and Search::Xapian required for purge\n"; +my $config = eval { PublicInbox::Config->new }; +my $cfgfile = PublicInbox::Config::default_file(); +my ($all, $force); +my $verbose = 1; +my %opts = ( + 'all' => \$all, + 'force|f' => \$force, + 'verbose|v!' => \$verbose, +); +GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; + +# TODO: clean this up and share code with -index via ::Admin +my %dir2ibx; # ( path => Inbox object ) +my @inboxes; +$config and $config->each_inbox(sub { + my ($ibx) = @_; + push @inboxes, $ibx if $all && $ibx->{version} != 1; + $dir2ibx{$ibx->{mainrepo}} = $ibx; +}); + +if ($all) { + $config or die "--all specified, but $cfgfile not readable\n"; + @ARGV and die "--all specified, but directories specified\n"; +} else { + my @err; + my @dirs = scalar(@ARGV) ? @ARGV : ('.'); + my $u = 0; + + foreach my $dir (@dirs) { + my $v; + my $dir = resolve_repo_dir($dir, \$v); + if ($v == 1) { + push @err, $dir; + next; + } + my $ibx = $dir2ibx{$dir} ||= do { + warn "$dir not configured in $cfgfile\n"; + $u++; + my $name = "unconfigured-$u"; + PublicInbox::Inbox->new({ + version => 2, + name => $name, + -primary_address => "$name\@example.com", + mainrepo => $dir, + }); + }; + push @inboxes, $ibx; + } + + if (@err) { + die "v1 inboxes currently not supported by -purge\n\t", + join("\n\t", @err), "\n"; + } +} + +my $data = do { local $/; scalar <STDIN> }; +$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $n_purged = 0; + +foreach my $ibx (@inboxes) { + my $mime = PublicInbox::MIME->new($data); + my $v2w = PublicInbox::V2Writable->new($ibx, 0); + + my $commits = $v2w->purge($mime) || []; + + if (my $scrub = $ibx->filter($v2w)) { + my $scrubbed = $scrub->scrub($mime, 1); + + if ($scrubbed && $scrubbed != REJECT()) { + my $scrub_commits = $v2w->purge($scrubbed); + push @$commits, @$scrub_commits if $scrub_commits; + } + } + + $v2w->done; + + if ($verbose) { # should we consider this machine-parseable? + print "$ibx->{mainrepo}:"; + if (scalar @$commits) { + print join("\n\t", '', @$commits), "\n"; + } else { + print " NONE\n"; + } + } + $n_purged += scalar @$commits; +} + +# behave like "rm -f" +exit(0) if ($force || $n_purged); + +warn "Not found\n" if $verbose; +exit(1); |