From 87817450840f67a862c611a13d65e998c70a5743 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 02:51:42 +0000 Subject: purge: start moving common options to AdminEdit module Editing and purging are similar operations involving history rewrites, so there'll be common options and code between them. --- script/public-inbox-purge | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'script') diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 25e6cc9b..d58a9baa 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -7,7 +7,7 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::Admin qw(resolve_repo_dir); +use PublicInbox::AdminEdit; PublicInbox::Admin::check_require('-index'); require PublicInbox::Filter::Base; require PublicInbox::Config; @@ -19,25 +19,20 @@ require PublicInbox::V2Writable; my $usage = "$0 [--all] [INBOX_DIRS] new }; my $cfgfile = PublicInbox::Config::default_file(); -my ($all, $force); -my $verbose = 1; -my %opts = ( - 'all' => \$all, - 'force|f' => \$force, - 'verbose|v!' => \$verbose, -); -GetOptions(%opts) or die "bad command-line args\n", $usage, "\n"; +my $opt = { verbose => 1 }; +GetOptions($opt, @PublicInbox::AdminEdit::OPT) or + die "bad command-line args\n$usage\n"; # TODO: clean this up and share code with -index via ::Admin my %dir2ibx; # ( path => Inbox object ) my @inboxes; $config and $config->each_inbox(sub { my ($ibx) = @_; - push @inboxes, $ibx if $all && $ibx->{version} != 1; + push @inboxes, $ibx if $opt->{all} && $ibx->{version} != 1; $dir2ibx{$ibx->{mainrepo}} = $ibx; }); -if ($all) { +if ($opt->{all}) { $config or die "--all specified, but $cfgfile not readable\n"; @ARGV and die "--all specified, but directories specified\n"; } else { @@ -47,7 +42,7 @@ if ($all) { foreach my $dir (@dirs) { my $v; - my $dir = resolve_repo_dir($dir, \$v); + my $dir = PublicInbox::Admin::resolve_repo_dir($dir, \$v); if ($v == 1) { push @err, $dir; next; @@ -127,7 +122,7 @@ foreach my $ibx (@inboxes) { $v2w->done; - if ($verbose) { # should we consider this machine-parseable? + if ($opt->{verbose}) { # should we consider this machine-parseable? print "$ibx->{mainrepo}:"; if (scalar @$commits) { print join("\n\t", '', @$commits), "\n"; @@ -139,7 +134,7 @@ foreach my $ibx (@inboxes) { } # behave like "rm -f" -exit(0) if ($force || $n_purged); +exit(0) if ($opt->{force} || $n_purged); -warn "Not found\n" if $verbose; +warn "Not found\n" if $opt->{verbose}; exit(1); -- cgit v1.2.3-24-ge0c7 From ce1391fcd1ad6027bbd8413f6c5277b5cc0445d3 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 02:51:43 +0000 Subject: admin: beef up resolve_inboxes to handle purge options We'll be using this in -edit, and maybe other admin-oriented tools for UI-consistency. --- script/public-inbox-purge | 53 ++++------------------------------------------- 1 file changed, 4 insertions(+), 49 deletions(-) (limited to 'script') diff --git a/script/public-inbox-purge b/script/public-inbox-purge index d58a9baa..dc7f89d9 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -10,64 +10,19 @@ use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; PublicInbox::Admin::check_require('-index'); require PublicInbox::Filter::Base; -require PublicInbox::Config; require PublicInbox::MIME; require PublicInbox::V2Writable; { no warnings 'once'; *REJECT = *PublicInbox::Filter::Base::REJECT } my $usage = "$0 [--all] [INBOX_DIRS] new }; -my $cfgfile = PublicInbox::Config::default_file(); -my $opt = { verbose => 1 }; +my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 }; GetOptions($opt, @PublicInbox::AdminEdit::OPT) or die "bad command-line args\n$usage\n"; -# TODO: clean this up and share code with -index via ::Admin -my %dir2ibx; # ( path => Inbox object ) -my @inboxes; -$config and $config->each_inbox(sub { - my ($ibx) = @_; - push @inboxes, $ibx if $opt->{all} && $ibx->{version} != 1; - $dir2ibx{$ibx->{mainrepo}} = $ibx; -}); - -if ($opt->{all}) { - $config or die "--all specified, but $cfgfile not readable\n"; - @ARGV and die "--all specified, but directories specified\n"; -} else { - my @err; - my @dirs = scalar(@ARGV) ? @ARGV : ('.'); - my $u = 0; - - foreach my $dir (@dirs) { - my $v; - my $dir = PublicInbox::Admin::resolve_repo_dir($dir, \$v); - if ($v == 1) { - push @err, $dir; - next; - } - my $ibx = $dir2ibx{$dir} ||= do { - warn "$dir not configured in $cfgfile\n"; - $u++; - my $name = "unconfigured-$u"; - PublicInbox::Inbox->new({ - version => 2, - name => $name, - -primary_address => "$name\@example.com", - mainrepo => $dir, - }); - }; - push @inboxes, $ibx; - } - - if (@err) { - die "v1 inboxes currently not supported by -purge\n\t", - join("\n\t", @err), "\n"; - } -} +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); -foreach my $ibx (@inboxes) { +foreach my $ibx (@ibxs) { my $lvl = $ibx->{indexlevel}; if (defined $lvl) { PublicInbox::Admin::indexlevel_ok_or_die($lvl); @@ -105,7 +60,7 @@ my $data = do { local $/; scalar }; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; -foreach my $ibx (@inboxes) { +foreach my $ibx (@ibxs) { my $mime = PublicInbox::MIME->new($data); my $v2w = PublicInbox::V2Writable->new($ibx, 0); -- cgit v1.2.3-24-ge0c7 From 455a1c0ef0519b2048c34d2fb31c9e9adbe3f507 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 02:51:44 +0000 Subject: AdminEdit: move editability checks from -purge We'll be reusing the same logic for -edit --- script/public-inbox-purge | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) (limited to 'script') diff --git a/script/public-inbox-purge b/script/public-inbox-purge index dc7f89d9..846557ca 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -21,40 +21,7 @@ GetOptions($opt, @PublicInbox::AdminEdit::OPT) or die "bad command-line args\n$usage\n"; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); - -foreach my $ibx (@ibxs) { - my $lvl = $ibx->{indexlevel}; - if (defined $lvl) { - PublicInbox::Admin::indexlevel_ok_or_die($lvl); - next; - } - - # Undefined indexlevel, so `full'... - # Search::Xapian exists and the DB can be read, at least, fine - $ibx->search and next; - - # it's possible for a Xapian directory to exist, but Search::Xapian - # to go missing/broken. Make sure it's purged in that case: - $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n"; - - # $ibx->{search} is populated by $ibx->over call - my $xdir_ro = $ibx->{search}->xdir(1); - my $npart = 0; - foreach my $part (<$xdir_ro/*>) { - if (-d $part && $part =~ m!/[0-9]+\z!) { - my $bytes = 0; - $bytes += -s $_ foreach glob("$part/*"); - $npart++ if $bytes; - } - } - if ($npart) { - PublicInbox::Admin::require_or_die('-search'); - } else { - # somebody could "rm -r" all the Xapian directories; - # let them purge the overview, at least - $ibx->{indexlevel} ||= 'basic'; - } -} +PublicInbox::AdminEdit::check_editable(\@ibxs); my $data = do { local $/; scalar }; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; -- cgit v1.2.3-24-ge0c7 From 45890d532f0ea68f5879b036b22d9dbd4e19754c Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 02:51:47 +0000 Subject: edit: new tool to perform edits This wrapper around V2Writable->replace provides a user-interface for editing messages as single-message mboxes (or the raw text via $EDITOR). --- script/public-inbox-edit | 233 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100755 script/public-inbox-edit (limited to 'script') diff --git a/script/public-inbox-edit b/script/public-inbox-edit new file mode 100755 index 00000000..ff0351a3 --- /dev/null +++ b/script/public-inbox-edit @@ -0,0 +1,233 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +# +# Used for editing messages in a public-inbox. +# Supports v2 inboxes only, for now. +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::AdminEdit; +use File::Temp qw(tempfile); +use PublicInbox::ContentId qw(content_id); +use PublicInbox::MID qw(mid_clean mids); +PublicInbox::Admin::check_require('-index'); +require PublicInbox::MIME; +require PublicInbox::InboxWritable; + +my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]"; +my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; +my @opt = qw(mid|m=s file|F=s raw); +GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or + die "bad command-line args\n$usage\n"; + +my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" +unless (defined $editor) { + my $k = 'publicinbox.mailEditor'; + if (my $cfg = PublicInbox::Admin::config()) { + $editor = $cfg->{lc($k)}; + } + unless (defined $editor) { + warn "\`$k' not configured, trying \`git var GIT_EDITOR'\n"; + chomp($editor = `git var GIT_EDITOR`); + warn "Will use $editor to edit mail\n"; + } +} + +my $mid = $opt->{mid}; +my $file = $opt->{file}; +if (defined $mid && defined $file) { + die "the --mid and --file options are mutually exclusive\n"; +} + +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); +PublicInbox::AdminEdit::check_editable(\@ibxs); + +my $found = {}; # cid => [ [ibx, smsg] [, [ibx, smsg] ] ] + +sub find_mid ($) { + my ($mid) = @_; + foreach my $ibx (@ibxs) { + my $over = $ibx->over; + my ($id, $prev); + while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { + my $ref = $ibx->msg_by_smsg($smsg); + my $mime = PublicInbox::MIME->new($ref); + my $cid = content_id($mime); + my $tuple = [ $ibx, $smsg ]; + push @{$found->{$cid} ||= []}, $tuple + } + delete @$ibx{qw(over mm git search)}; # cleanup + } + $found; +} + +sub show_cmd ($$) { + my ($ibx, $smsg) = @_; + " GIT_DIR=$ibx->{mainrepo}/all.git \\\n git show $smsg->{blob}\n"; +} + +sub show_found () { + foreach my $to_edit (values %$found) { + foreach my $tuple (@$to_edit) { + my ($ibx, $smsg) = @$tuple; + warn show_cmd($ibx, $smsg); + } + } +} + +if (defined($mid)) { + $mid = mid_clean($mid); + $found = find_mid($mid); + my $nr = scalar(keys %$found); + die "No message found for <$mid>\n" unless $nr; + if ($nr > 1) { + warn <<""; +Multiple messages with different content found matching +<$mid>: + + show_found(); + die "Use --force to edit all of them\n" if !$opt->{force}; + warn "Will edit all of them\n"; + } +} else { + open my $fh, '<', $file or die "open($file) failed: $!"; + my $orig = do { local $/; <$fh> }; + my $mime = PublicInbox::MIME->new(\$orig); + my $mids = mids($mime->header_obj); + find_mid($_) for (@$mids); # populates $found + my $cid = content_id($mime); + my $to_edit = $found->{$cid}; + unless ($to_edit) { + my $nr = scalar(keys %$found); + if ($nr > 0) { + warn <<""; +$nr matches to Message-ID(s) in $file, but none matched content +Partial matches below: + + show_found(); + } elsif ($nr == 0) { + $mids = join('', map { " <$_>\n" } @$mids); + warn <<""; +No matching messages found matching Message-ID(s) in $file +$mids + + } + exit 1; + } + $found = { $cid => $to_edit }; +} + +my $tmpl = 'public-inbox-edit-XXXXXX'; +foreach my $to_edit (values %$found) { + my ($edit_fh, $edit_fn) = tempfile($tmpl, TMPDIR => 1); + $edit_fh->autoflush(1); + my ($ibx, $smsg) = @{$to_edit->[0]}; + my $old_raw = $ibx->msg_by_smsg($smsg); + delete @$ibx{qw(over mm git search)}; # cleanup + + my $tmp = $$old_raw; + if (!$opt->{raw}) { + my $oid = $smsg->{blob}; + print $edit_fh "From mboxrd\@$oid Thu Jan 1 00:00:00 1970\n"; + $tmp =~ s/^(>*From )/>$1/gm; + } + print $edit_fh $tmp or + die "failed to write tempfile for editing: $!"; + + # run the editor, respecting spaces/quote +retry_edit: + if (system(qw(sh -c), qq(eval "$editor" '"\$@"'), '--', $edit_fn)) { + if (!(-t STDIN) && !$opt->{force}) { + die "E: $editor failed: $?\n"; + } + print STDERR "$editor failed, "; + print STDERR "continuing as forced\n" if $opt->{force}; + while (!$opt->{force}) { + print STDERR "(r)etry, (c)ontinue, (q)uit?\n"; + chomp(my $op = || ''); + $op = lc($op); + goto retry_edit if $op eq 'r'; + exit $? if $op eq 'q'; + last if $op eq 'c'; # continuing + print STDERR "\`$op' not recognized\n"; + } + } + + # reread the edited file, not using $edit_fh since $EDITOR may + # rename/relink $edit_fn + open my $new_fh, '<', $edit_fn or + die "can't read edited file ($edit_fn): $!\n"; + my $new_raw = do { local $/; <$new_fh> }; + + if (!$opt->{raw}) { + # get rid of the From we added + $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + + # check if user forgot to purge (in mutt) after editing + if ($new_raw =~ /^From /sm) { + if (-t STDIN) { + print STDERR <<''; +Extra "From " lines detected in new mbox. +Did you forget to purge the original message from the mbox after editing? + + while (1) { + print STDERR <<""; +(y)es to re-edit, (n)o to continue + + chomp(my $op = || ''); + $op = lc($op); + goto retry_edit if $op eq 'y'; + last if $op eq 'n'; # continuing + print STDERR "\`$op' not recognized\n"; + } + } else { # non-interactive path + # unlikely to happen, as extra From lines are + # only a common mistake (for me) with + # interactive use + warn <<""; +W: possible message boundary splitting error + + } + } + # unescape what we escaped: + $new_raw =~ s/^>(>*From )/$1/gm; + } + + my $new_mime = PublicInbox::MIME->new(\$new_raw); + my $old_mime = PublicInbox::MIME->new($old_raw); + + # allow changing Received: and maybe other headers which can + # contain sensitive info. + my $nhdr = $new_mime->header_obj; + my $ohdr = $old_mime->header_obj; + if (($nhdr->as_string eq $ohdr->as_string) && + (content_id($new_mime) eq content_id($old_mime))) { + warn "No change detected to:\n", show_cmd($ibx, $smsg); + + next unless $opt->{verbose}; + # should we consider this machine-parseable? + print "$ibx->{mainrepo}:\n\tNONE\n"; + next; + } + + foreach my $tuple (@$to_edit) { + $ibx = PublicInbox::InboxWritable->new($tuple->[0]); + $smsg = $tuple->[1]; + my $im = $ibx->importer(0); + my $commits = $im->replace($old_mime, $new_mime); + $im->done; + unless ($commits) { + warn "Failed to replace:\n", show_cmd($ibx, $smsg); + next; + } + next unless $opt->{verbose}; + # should we consider this machine-parseable? + print "$ibx->{mainrepo}:"; + if (scalar @$commits) { + print join("\n\t", '', @$commits), "\n"; + } else { + print "\tNONE\n"; + } + } +} -- cgit v1.2.3-24-ge0c7 From 6e507c8cb41b0d48963503a88034348d74506211 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 10 Jun 2019 16:58:55 +0000 Subject: edit|purge: improve output on rewrites Fill in undef as "(unchanged)" when displaying commits and prefix the epoch name. --- script/public-inbox-edit | 9 ++------- script/public-inbox-purge | 7 +------ 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'script') diff --git a/script/public-inbox-edit b/script/public-inbox-edit index ff0351a3..7a534cc2 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -207,7 +207,7 @@ W: possible message boundary splitting error next unless $opt->{verbose}; # should we consider this machine-parseable? - print "$ibx->{mainrepo}:\n\tNONE\n"; + PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, []); next; } @@ -223,11 +223,6 @@ W: possible message boundary splitting error } next unless $opt->{verbose}; # should we consider this machine-parseable? - print "$ibx->{mainrepo}:"; - if (scalar @$commits) { - print join("\n\t", '', @$commits), "\n"; - } else { - print "\tNONE\n"; - } + PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } } diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 846557ca..0705d170 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -45,12 +45,7 @@ foreach my $ibx (@ibxs) { $v2w->done; if ($opt->{verbose}) { # should we consider this machine-parseable? - print "$ibx->{mainrepo}:"; - if (scalar @$commits) { - print join("\n\t", '', @$commits), "\n"; - } else { - print " NONE\n"; - } + PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } $n_purged += scalar @$commits; } -- cgit v1.2.3-24-ge0c7 From d377a9d2f972f85edf759a6c152ca49078baba38 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 10 Jun 2019 18:09:27 +0000 Subject: edit: drop unwanted headers before noop check mutt will set Content-Length, Lines, and Status headers unconditionally, so we need to account for that before doing header comparisons to avoid making expensive changes when noop edits are made. --- script/public-inbox-edit | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'script') diff --git a/script/public-inbox-edit b/script/public-inbox-edit index 7a534cc2..16d78529 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -14,6 +14,7 @@ use PublicInbox::MID qw(mid_clean mids); PublicInbox::Admin::check_require('-index'); require PublicInbox::MIME; require PublicInbox::InboxWritable; +require PublicInbox::Import; my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]"; my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; @@ -197,6 +198,11 @@ W: possible message boundary splitting error my $new_mime = PublicInbox::MIME->new(\$new_raw); my $old_mime = PublicInbox::MIME->new($old_raw); + # make sure we don't compare unwanted headers, since mutt adds + # Content-Length, Status, and Lines headers: + PublicInbox::Import::drop_unwanted_headers($new_mime); + PublicInbox::Import::drop_unwanted_headers($old_mime); + # allow changing Received: and maybe other headers which can # contain sensitive info. my $nhdr = $new_mime->header_obj; -- cgit v1.2.3-24-ge0c7 From 3fd2143efc1c96f0000af2ae7a6dee0990337fc4 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Tue, 11 Jun 2019 18:34:58 +0000 Subject: edit: unlink temporary file when done We don't need to leave temporary files lying around. --- script/public-inbox-edit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'script') diff --git a/script/public-inbox-edit b/script/public-inbox-edit index 16d78529..2e2c7616 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -121,7 +121,7 @@ $mids my $tmpl = 'public-inbox-edit-XXXXXX'; foreach my $to_edit (values %$found) { - my ($edit_fh, $edit_fn) = tempfile($tmpl, TMPDIR => 1); + my ($edit_fh, $edit_fn) = tempfile($tmpl, TMPDIR => 1, UNLINK => 1); $edit_fh->autoflush(1); my ($ibx, $smsg) = @{$to_edit->[0]}; my $old_raw = $ibx->msg_by_smsg($smsg); -- cgit v1.2.3-24-ge0c7