about summary refs log tree commit homepage
path: root/script
diff options
authorEric Wong <e@80x24.org>2019-06-14 16:23:00 +0000
committerEric Wong <e@80x24.org>2019-06-14 16:23:00 +0000
commitd9aad3a7988262b31ab2171f2c982daf04ac822c (patch)
tree180c3cabb5d5ef584606d29136793362184cfb91 /script
parent19629ff99de0eb43a26963b8c276eae54f1247c8 (diff)
parent3fd2143efc1c96f0000af2ae7a6dee0990337fc4 (diff)
* origin/edit:
  edit: unlink temporary file when done
  v2writable: replace: kill git processes before reindexing
  edit: drop unwanted headers before noop check
  edit|purge: improve output on rewrites
  edit: new tool to perform edits
  doc: document the --prune option for -index
  admin: expose ->config
  AdminEdit: move editability checks from -purge
  admin: beef up resolve_inboxes to handle purge options
  purge: start moving common options to AdminEdit module
  admin: remove warning arg for unconfigured inboxes
  v2writable: implement ->replace call
  import: switch to "replace_oids" interface for purge
  import: extract_author_info becomes extract_commit_info
  v2writable: consolidate overview and indexing call
Diffstat (limited to 'script')
2 files changed, 245 insertions, 99 deletions
diff --git a/script/public-inbox-edit b/script/public-inbox-edit
new file mode 100755
index 00000000..2e2c7616
--- /dev/null
+++ b/script/public-inbox-edit
@@ -0,0 +1,234 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Used for editing messages in a public-inbox.
+# Supports v2 inboxes only, for now.
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::AdminEdit;
+use File::Temp qw(tempfile);
+use PublicInbox::ContentId qw(content_id);
+use PublicInbox::MID qw(mid_clean mids);
+require PublicInbox::MIME;
+require PublicInbox::InboxWritable;
+require PublicInbox::Import;
+my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]";
+my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 };
+my @opt = qw(mid|m=s file|F=s raw);
+GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or
+        die "bad command-line args\n$usage\n";
+my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f"
+unless (defined $editor) {
+        my $k = 'publicinbox.mailEditor';
+        if (my $cfg = PublicInbox::Admin::config()) {
+                $editor = $cfg->{lc($k)};
+        }
+        unless (defined $editor) {
+                warn "\`$k' not configured, trying \`git var GIT_EDITOR'\n";
+                chomp($editor = `git var GIT_EDITOR`);
+                warn "Will use $editor to edit mail\n";
+        }
+my $mid = $opt->{mid};
+my $file = $opt->{file};
+if (defined $mid && defined $file) {
+        die "the --mid and --file options are mutually exclusive\n";
+my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
+my $found = {}; # cid => [ [ibx, smsg] [, [ibx, smsg] ] ]
+sub find_mid ($) {
+        my ($mid) = @_;
+        foreach my $ibx (@ibxs) {
+                my $over = $ibx->over;
+                my ($id, $prev);
+                while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
+                        my $ref = $ibx->msg_by_smsg($smsg);
+                        my $mime = PublicInbox::MIME->new($ref);
+                        my $cid = content_id($mime);
+                        my $tuple = [ $ibx, $smsg ];
+                        push @{$found->{$cid} ||= []}, $tuple
+                }
+                delete @$ibx{qw(over mm git search)}; # cleanup
+        }
+        $found;
+sub show_cmd ($$) {
+        my ($ibx, $smsg) = @_;
+        " GIT_DIR=$ibx->{mainrepo}/all.git \\\n    git show $smsg->{blob}\n";
+sub show_found () {
+        foreach my $to_edit (values %$found) {
+                foreach my $tuple (@$to_edit) {
+                        my ($ibx, $smsg) = @$tuple;
+                        warn show_cmd($ibx, $smsg);
+                }
+        }
+if (defined($mid)) {
+        $mid = mid_clean($mid);
+        $found = find_mid($mid);
+        my $nr = scalar(keys %$found);
+        die "No message found for <$mid>\n" unless $nr;
+        if ($nr > 1) {
+                warn <<"";
+Multiple messages with different content found matching
+                show_found();
+                die "Use --force to edit all of them\n" if !$opt->{force};
+                warn "Will edit all of them\n";
+        }
+} else {
+        open my $fh, '<', $file or die "open($file) failed: $!";
+        my $orig = do { local $/; <$fh> };
+        my $mime = PublicInbox::MIME->new(\$orig);
+        my $mids = mids($mime->header_obj);
+        find_mid($_) for (@$mids); # populates $found
+        my $cid = content_id($mime);
+        my $to_edit = $found->{$cid};
+        unless ($to_edit) {
+                my $nr = scalar(keys %$found);
+                if ($nr > 0) {
+                        warn <<"";
+$nr matches to Message-ID(s) in $file, but none matched content
+Partial matches below:
+                        show_found();
+                } elsif ($nr == 0) {
+                        $mids = join('', map { "  <$_>\n" } @$mids);
+                        warn <<"";
+No matching messages found matching Message-ID(s) in $file
+                }
+                exit 1;
+        }
+        $found = { $cid => $to_edit };
+my $tmpl = 'public-inbox-edit-XXXXXX';
+foreach my $to_edit (values %$found) {
+        my ($edit_fh, $edit_fn) = tempfile($tmpl, TMPDIR => 1, UNLINK => 1);
+        $edit_fh->autoflush(1);
+        my ($ibx, $smsg) = @{$to_edit->[0]};
+        my $old_raw = $ibx->msg_by_smsg($smsg);
+        delete @$ibx{qw(over mm git search)}; # cleanup
+        my $tmp = $$old_raw;
+        if (!$opt->{raw}) {
+                my $oid = $smsg->{blob};
+                print $edit_fh "From mboxrd\@$oid Thu Jan  1 00:00:00 1970\n";
+                $tmp =~ s/^(>*From )/>$1/gm;
+        }
+        print $edit_fh $tmp or
+                die "failed to write tempfile for editing: $!";
+        # run the editor, respecting spaces/quote
+        if (system(qw(sh -c), qq(eval "$editor" '"\$@"'), '--', $edit_fn)) {
+                if (!(-t STDIN) && !$opt->{force}) {
+                        die "E: $editor failed: $?\n";
+                }
+                print STDERR "$editor failed, ";
+                print STDERR "continuing as forced\n" if $opt->{force};
+                while (!$opt->{force}) {
+                        print STDERR "(r)etry, (c)ontinue, (q)uit?\n";
+                        chomp(my $op = <STDIN> || '');
+                        $op = lc($op);
+                        goto retry_edit if $op eq 'r';
+                        exit $? if $op eq 'q';
+                        last if $op eq 'c'; # continuing
+                        print STDERR "\`$op' not recognized\n";
+                }
+        }
+        # reread the edited file, not using $edit_fh since $EDITOR may
+        # rename/relink $edit_fn
+        open my $new_fh, '<', $edit_fn or
+                die "can't read edited file ($edit_fn): $!\n";
+        my $new_raw = do { local $/; <$new_fh> };
+        if (!$opt->{raw}) {
+                # get rid of the From we added
+                $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+                # check if user forgot to purge (in mutt) after editing
+                if ($new_raw =~ /^From /sm) {
+                        if (-t STDIN) {
+                                print STDERR <<'';
+Extra "From " lines detected in new mbox.
+Did you forget to purge the original message from the mbox after editing?
+                                while (1) {
+                                        print STDERR <<"";
+(y)es to re-edit, (n)o to continue
+                                        chomp(my $op = <STDIN> || '');
+                                        $op = lc($op);
+                                        goto retry_edit if $op eq 'y';
+                                        last if $op eq 'n'; # continuing
+                                        print STDERR "\`$op' not recognized\n";
+                                }
+                        } else { # non-interactive path
+                                # unlikely to happen, as extra From lines are
+                                # only a common mistake (for me) with
+                                # interactive use
+                                warn <<"";
+W: possible message boundary splitting error
+                        }
+                }
+                # unescape what we escaped:
+                $new_raw =~ s/^>(>*From )/$1/gm;
+        }
+        my $new_mime = PublicInbox::MIME->new(\$new_raw);
+        my $old_mime = PublicInbox::MIME->new($old_raw);
+        # make sure we don't compare unwanted headers, since mutt adds
+        # Content-Length, Status, and Lines headers:
+        PublicInbox::Import::drop_unwanted_headers($new_mime);
+        PublicInbox::Import::drop_unwanted_headers($old_mime);
+        # allow changing Received: and maybe other headers which can
+        # contain sensitive info.
+        my $nhdr = $new_mime->header_obj;
+        my $ohdr = $old_mime->header_obj;
+        if (($nhdr->as_string eq $ohdr->as_string) &&
+            (content_id($new_mime) eq content_id($old_mime))) {
+                warn "No change detected to:\n", show_cmd($ibx, $smsg);
+                next unless $opt->{verbose};
+                # should we consider this machine-parseable?
+                PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, []);
+                next;
+        }
+        foreach my $tuple (@$to_edit) {
+                $ibx = PublicInbox::InboxWritable->new($tuple->[0]);
+                $smsg = $tuple->[1];
+                my $im = $ibx->importer(0);
+                my $commits = $im->replace($old_mime, $new_mime);
+                $im->done;
+                unless ($commits) {
+                        warn "Failed to replace:\n", show_cmd($ibx, $smsg);
+                        next;
+                }
+                next unless $opt->{verbose};
+                # should we consider this machine-parseable?
+                PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits);
+        }
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 25e6cc9b..0705d170 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -7,110 +7,27 @@
 use strict;
 use warnings;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use PublicInbox::Admin qw(resolve_repo_dir);
+use PublicInbox::AdminEdit;
 require PublicInbox::Filter::Base;
-require PublicInbox::Config;
 require PublicInbox::MIME;
 require PublicInbox::V2Writable;
 { no warnings 'once'; *REJECT = *PublicInbox::Filter::Base::REJECT }
 my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
-my $config = eval { PublicInbox::Config->new };
-my $cfgfile = PublicInbox::Config::default_file();
-my ($all, $force);
-my $verbose = 1;
-my %opts = (
-        'all' => \$all,
-        'force|f' => \$force,
-        'verbose|v!' => \$verbose,
-GetOptions(%opts) or die "bad command-line args\n", $usage, "\n";
+my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 };
+GetOptions($opt, @PublicInbox::AdminEdit::OPT) or
+        die "bad command-line args\n$usage\n";
-# TODO: clean this up and share code with -index via ::Admin
-my %dir2ibx; # ( path => Inbox object )
-my @inboxes;
-$config and $config->each_inbox(sub {
-        my ($ibx) = @_;
-        push @inboxes, $ibx if $all && $ibx->{version} != 1;
-        $dir2ibx{$ibx->{mainrepo}} = $ibx;
-if ($all) {
-        $config or die "--all specified, but $cfgfile not readable\n";
-        @ARGV and die "--all specified, but directories specified\n";
-} else {
-        my @err;
-        my @dirs = scalar(@ARGV) ? @ARGV : ('.');
-        my $u = 0;
-        foreach my $dir (@dirs) {
-                my $v;
-                my $dir = resolve_repo_dir($dir, \$v);
-                if ($v == 1) {
-                        push @err, $dir;
-                        next;
-                }
-                my $ibx = $dir2ibx{$dir} ||= do {
-                        warn "$dir not configured in $cfgfile\n";
-                        $u++;
-                        my $name = "unconfigured-$u";
-                        PublicInbox::Inbox->new({
-                                version => 2,
-                                name => $name,
-                                -primary_address => "$name\@example.com",
-                                mainrepo => $dir,
-                        });
-                };
-                push @inboxes, $ibx;
-        }
-        if (@err) {
-                die "v1 inboxes currently not supported by -purge\n\t",
-                    join("\n\t", @err), "\n";
-        }
-foreach my $ibx (@inboxes) {
-        my $lvl = $ibx->{indexlevel};
-        if (defined $lvl) {
-                PublicInbox::Admin::indexlevel_ok_or_die($lvl);
-                next;
-        }
-        # Undefined indexlevel, so `full'...
-        # Search::Xapian exists and the DB can be read, at least, fine
-        $ibx->search and next;
-        # it's possible for a Xapian directory to exist, but Search::Xapian
-        # to go missing/broken.  Make sure it's purged in that case:
-        $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n";
-        # $ibx->{search} is populated by $ibx->over call
-        my $xdir_ro = $ibx->{search}->xdir(1);
-        my $npart = 0;
-        foreach my $part (<$xdir_ro/*>) {
-                if (-d $part && $part =~ m!/[0-9]+\z!) {
-                        my $bytes = 0;
-                        $bytes += -s $_ foreach glob("$part/*");
-                        $npart++ if $bytes;
-                }
-        }
-        if ($npart) {
-                PublicInbox::Admin::require_or_die('-search');
-        } else {
-                # somebody could "rm -r" all the Xapian directories;
-                # let them purge the overview, at least
-                $ibx->{indexlevel} ||= 'basic';
-        }
+my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
 my $data = do { local $/; scalar <STDIN> };
 $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
 my $n_purged = 0;
-foreach my $ibx (@inboxes) {
+foreach my $ibx (@ibxs) {
         my $mime = PublicInbox::MIME->new($data);
         my $v2w = PublicInbox::V2Writable->new($ibx, 0);
@@ -127,19 +44,14 @@ foreach my $ibx (@inboxes) {
-        if ($verbose) { # should we consider this machine-parseable?
-                print "$ibx->{mainrepo}:";
-                if (scalar @$commits) {
-                        print join("\n\t", '', @$commits), "\n";
-                } else {
-                        print " NONE\n";
-                }
+        if ($opt->{verbose}) { # should we consider this machine-parseable?
+                PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits);
         $n_purged += scalar @$commits;
 # behave like "rm -f"
-exit(0) if ($force || $n_purged);
+exit(0) if ($opt->{force} || $n_purged);
-warn "Not found\n" if $verbose;
+warn "Not found\n" if $opt->{verbose};