about summary refs log tree commit homepage
path: root/script
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-01-31 21:08:48 +0000
committerEric Wong <e@80x24.org>2019-01-31 21:08:48 +0000
commit65323f060a3db731bb9fafa004336eeb4bbb8f00 (patch)
treeceaff52229c942314524f4bcac650e1db175c773 /script
parentce4fe8f5144f7555ddd42b6a94ec602e042c6e43 (diff)
parent440b0feaa209e12e4bcb8ef16a95041fce71e7dc (diff)
downloadpublic-inbox-65323f060a3db731bb9fafa004336eeb4bbb8f00.tar.gz
* origin/purge:
  implement public-inbox-purge tool
  v2writable: read epoch on purge
  v2writable: cleanup processes when done
  v2writable: purge ignores non-existent git epoch directories
  v2writable: ->purge returns undef on no-op
  import: purge: reap fast-export process
  hoist out resolve_repo_dir from -index
Diffstat (limited to 'script')
-rwxr-xr-xscript/public-inbox-index32
-rwxr-xr-xscript/public-inbox-purge111
2 files changed, 113 insertions, 30 deletions
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 2ae92757..5adb6e74 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -9,9 +9,10 @@
 use strict;
 use warnings;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use Cwd 'abs_path';
 my $usage = "public-inbox-index REPO_DIR";
 use PublicInbox::Config;
+use PublicInbox::Admin qw(resolve_repo_dir);
+
 my $config = eval { PublicInbox::Config->new } || eval {
         warn "public-inbox unconfigured for serving, indexing anyways...\n";
         undef;
@@ -35,35 +36,6 @@ die "--jobs must be positive\n" if defined $jobs && $jobs < 0;
 
 my @dirs;
 
-sub resolve_repo_dir {
-        my ($cd) = @_;
-        my $prefix = defined $cd ? $cd : './';
-        if (-d $prefix && -f "$prefix/inbox.lock") { # v2
-                return abs_path($prefix);
-        }
-
-        my @cmd = qw(git rev-parse --git-dir);
-        my $cmd = join(' ', @cmd);
-        my $pid = open my $fh, '-|';
-        defined $pid or die "forking $cmd failed: $!\n";
-        if ($pid == 0) {
-                if (defined $cd) {
-                        chdir $cd or die "chdir $cd failed: $!\n";
-                }
-                exec @cmd;
-                die "Failed to exec $cmd: $!\n";
-        } else {
-                my $dir = eval {
-                        local $/;
-                        <$fh>;
-                };
-                close $fh or die "error in $cmd: $!\n";
-                chomp $dir;
-                return abs_path($cd) if ($dir eq '.' && defined $cd);
-                abs_path($dir);
-        }
-}
-
 if (@ARGV) {
         @dirs = map { resolve_repo_dir($_) } @ARGV;
 } else {
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
new file mode 100755
index 00000000..688dd950
--- /dev/null
+++ b/script/public-inbox-purge
@@ -0,0 +1,111 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Used for purging messages entirely from a public-inbox.  Currently
+# supports v2 inboxes only, for now.
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::Config;
+use PublicInbox::MIME;
+use PublicInbox::Admin qw(resolve_repo_dir);
+use PublicInbox::Filter::Base;
+*REJECT = *PublicInbox::Filter::Base::REJECT;
+
+my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
+
+eval { require PublicInbox::V2Writable } or die
+        "DBI, DBD::SQLite and Search::Xapian required for purge\n";
+my $config = eval { PublicInbox::Config->new };
+my $cfgfile = PublicInbox::Config::default_file();
+my ($all, $force);
+my $verbose = 1;
+my %opts = (
+        'all' => \$all,
+        'force|f' => \$force,
+        'verbose|v!' => \$verbose,
+);
+GetOptions(%opts) or die "bad command-line args\n", $usage, "\n";
+
+# TODO: clean this up and share code with -index via ::Admin
+my %dir2ibx; # ( path => Inbox object )
+my @inboxes;
+$config and $config->each_inbox(sub {
+        my ($ibx) = @_;
+        push @inboxes, $ibx if $all && $ibx->{version} != 1;
+        $dir2ibx{$ibx->{mainrepo}} = $ibx;
+});
+
+if ($all) {
+        $config or die "--all specified, but $cfgfile not readable\n";
+        @ARGV and die "--all specified, but directories specified\n";
+} else {
+        my @err;
+        my @dirs = scalar(@ARGV) ? @ARGV : ('.');
+        my $u = 0;
+
+        foreach my $dir (@dirs) {
+                my $v;
+                my $dir = resolve_repo_dir($dir, \$v);
+                if ($v == 1) {
+                        push @err, $dir;
+                        next;
+                }
+                my $ibx = $dir2ibx{$dir} ||= do {
+                        warn "$dir not configured in $cfgfile\n";
+                        $u++;
+                        my $name = "unconfigured-$u";
+                        PublicInbox::Inbox->new({
+                                version => 2,
+                                name => $name,
+                                -primary_address => "$name\@example.com",
+                                mainrepo => $dir,
+                        });
+                };
+                push @inboxes, $ibx;
+        }
+
+        if (@err) {
+                die "v1 inboxes currently not supported by -purge\n\t",
+                    join("\n\t", @err), "\n";
+        }
+}
+
+my $data = do { local $/; scalar <STDIN> };
+$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+my $n_purged = 0;
+
+foreach my $ibx (@inboxes) {
+        my $mime = PublicInbox::MIME->new($data);
+        my $v2w = PublicInbox::V2Writable->new($ibx, 0);
+
+        my $commits = $v2w->purge($mime) || [];
+
+        if (my $scrub = $ibx->filter($v2w)) {
+                my $scrubbed = $scrub->scrub($mime, 1);
+
+                if ($scrubbed && $scrubbed != REJECT()) {
+                        my $scrub_commits = $v2w->purge($scrubbed);
+                        push @$commits, @$scrub_commits if $scrub_commits;
+                }
+        }
+
+        $v2w->done;
+
+        if ($verbose) { # should we consider this machine-parseable?
+                print "$ibx->{mainrepo}:";
+                if (scalar @$commits) {
+                        print join("\n\t", '', @$commits), "\n";
+                } else {
+                        print " NONE\n";
+                }
+        }
+        $n_purged += scalar @$commits;
+}
+
+# behave like "rm -f"
+exit(0) if ($force || $n_purged);
+
+warn "Not found\n" if $verbose;
+exit(1);