From 60321ef7c5e8c2abc395bebadadf77d789a06945 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 02:51:40 +0000 Subject: v2writable: implement ->replace call Much of the existing purge code is repurposed to a general "replace" functionality. ->purge is simpler because it can just drop the information. Unlike ->purge, ->replace needs to edit existing git commits (in case of From: and Subject: headers) and reindex the modified message. We currently disallow editing of References:, In-Reply-To: and Message-ID headers because it can cause bad side effects with our threading (and our lack of rethreading support to deal with excessive matching from incorrect/invalid References). --- t/replace.t | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 t/replace.t (limited to 't') diff --git a/t/replace.t b/t/replace.t new file mode 100644 index 00000000..6fae5511 --- /dev/null +++ b/t/replace.t @@ -0,0 +1,199 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use PublicInbox::MIME; +use PublicInbox::InboxWritable; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); # replace is v2 only, for now... +foreach my $mod (qw(DBD::SQLite)) { + eval "require $mod"; + plan skip_all => "$mod missing for $0" if $@; +} + +sub test_replace ($$$) { + my ($v, $level, $opt) = @_; + diag "v$v $level replace"; + my $this = "pi-$v-$level-replace"; + my $tmpdir = tempdir("$this-tmp-XXXXXX", TMPDIR => 1, CLEANUP => 1); + my $ibx = PublicInbox::Inbox->new({ + mainrepo => "$tmpdir/testbox", + name => $this, + version => $v, + -primary_address => 'test@example.com', + indexlevel => $level, + }); + + my $orig = PublicInbox::MIME->new(<<'EOF'); +From: Barbra Streisand +To: test@example.com +Subject: confidential +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +Top secret info about my house in Malibu... +EOF + my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer; + # fake a bunch of epochs + $im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes}; + + if ($opt->{pre}) { + $opt->{pre}->($im, 1, 2); + $orig->header_set('References', '<1@example.com>'); + } + ok($im->add($orig), 'add message to be replaced'); + if ($opt->{post}) { + $opt->{post}->($im, 3, { 4 => 'replace@example.com' }); + } + $im->done; + my $thread_a = $ibx->over->get_thread('replace@example.com'); + + my %before = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; + my $reject = PublicInbox::MIME->new($orig->as_string); + foreach my $mid (['', ''], + [], ['']) { + $reject->header_set('Message-ID', @$mid); + my $ok = eval { $im->replace($orig, $reject) }; + like($@, qr/Message-ID.*may not be changed/, + '->replace died on Message-ID change'); + ok(!$ok, 'no replacement happened'); + } + + # prepare the replacement + my $expect = "Move along, nothing to see here\n"; + my $repl = PublicInbox::MIME->new($orig->as_string); + $repl->header_set('From', ''); + $repl->header_set('Subject', 'redacted'); + $repl->header_set('Date', 'Sat, 02 Oct 2010 00:00:00 +0000'); + $repl->body_str_set($expect); + + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + ok(my $cmts = $im->replace($orig, $repl), 'replaced message'); + my $changed_epochs = 0; + for my $tip (@$cmts) { + next if !defined $tip; + $changed_epochs++; + like($tip, qr/\A[a-f0-9]{40}\z/, + 'replace returned current commit'); + } + is($changed_epochs, 1, 'only one epoch changed'); + + $im->done; + my $m = PublicInbox::MIME->new($ibx->msg_by_mid('replace@example.com')); + is($m->body, $expect, 'replaced message'); + is_deeply(\@warn, [], 'no warnings on noop'); + + my @cat = qw(cat-file --buffer --batch --batch-all-objects); + my $git = $ibx->git; + my @all = $git->qx(@cat); + is_deeply([grep(/confidential/, @all)], [], 'nothing confidential'); + is_deeply([grep(/Streisand/, @all)], [], 'Streisand who?'); + is_deeply([grep(/\bOct 1993\b/, @all)], [], 'nothing from Oct 1993'); + my $t19931002 = qr/ 749520000 /; + is_deeply([grep(/$t19931002/, @all)], [], "nothing matches $t19931002"); + + for my $dir (glob("$ibx->{mainrepo}/git/*.git")) { + my ($bn) = ($dir =~ m!([^/]+)\z!); + is(system(qw(git --git-dir), $dir, qw(fsck --strict)), 0, + "git fsck is clean in epoch $bn"); + } + + my $thread_b = $ibx->over->get_thread('replace@example.com'); + is_deeply([sort map { $_->{mid} } @$thread_b], + [sort map { $_->{mid} } @$thread_a], 'threading preserved'); + + if (my $srch = $ibx->search) { + for my $q ('f:streisand', 's:confidential', 'malibu') { + my $msgs = $srch->query($q); + is_deeply($msgs, [], "no match for $q"); + } + my @ok = ('f:redactor', 's:redacted', 'nothing to see'); + if ($opt->{pre}) { + push @ok, 'm:1@example.com', 'm:2@example.com', + 's:message2', 's:message1'; + } + if ($opt->{post}) { + push @ok, 'm:3@example.com', 'm:4@example.com', + 's:message3', 's:message4'; + } + for my $q (@ok) { + my $msgs = $srch->query($q); + ok($msgs->[0], "got match for $q"); + } + } + + # check overview matches: + my %after = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; + my @before_blobs = keys %before; + foreach my $blob (@before_blobs) { + delete $before{$blob} if delete $after{$blob}; + } + + is(scalar keys %before, 1, 'one unique blob from before left'); + is(scalar keys %after, 1, 'one unique blob from after left'); + foreach my $blob (keys %before) { + is($git->check($blob), undef, 'old blob not found'); + my $smsg = $before{$blob}; + is($smsg->{subject}, 'confidential', 'before subject'); + is($smsg->{mid}, 'replace@example.com', 'before MID'); + } + foreach my $blob (keys %after) { + ok($git->check($blob), 'new blob found'); + my $smsg = $after{$blob}; + is($smsg->{subject}, 'redacted', 'after subject'); + is($smsg->{mid}, 'replace@example.com', 'before MID'); + } + @warn = (); + is($im->replace($orig, $repl), undef, 'no-op replace returns undef'); + is($im->purge($orig), undef, 'no-op purge returns undef'); + is_deeply(\@warn, [], 'no warnings on noop'); +} + +sub pad_msgs { + my ($im, @range) = @_; + for my $i (@range) { + my $irt; + if (ref($i) eq 'HASH') { + ($i, $irt) = each %$i; + } + my $sec = sprintf('%0d', $i); + my $mime = PublicInbox::MIME->new(< +Date: Fri, 02, Jan 1970 00:00:$sec +0000 +Subject: message$i + +message number$i +EOF + + if (defined($irt)) { + $mime->header_set('References', "<$irt>"); + } + + $im->add($mime); + } +} + +my $opt = { pre => *pad_msgs }; +test_replace(2, 'basic', {}); +test_replace(2, 'basic', $opt); +test_replace(2, 'basic', $opt = { %$opt, post => *pad_msgs }); +test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 }); + +SKIP: if ('test xapian') { + require PublicInbox::Search; + PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 8; + for my $l (qw(medium)) { + test_replace(2, $l, {}); + $opt = { pre => *pad_msgs }; + test_replace(2, $l, $opt); + test_replace(2, $l, $opt = { %$opt, post => *pad_msgs }); + test_replace(2, $l, $opt = { %$opt, rotate_bytes => 1 }); + } +}; + +done_testing(); -- cgit v1.2.3-24-ge0c7 From 45890d532f0ea68f5879b036b22d9dbd4e19754c Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 02:51:47 +0000 Subject: edit: new tool to perform edits This wrapper around V2Writable->replace provides a user-interface for editing messages as single-message mboxes (or the raw text via $EDITOR). --- t/edit.t | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 t/edit.t (limited to 't') diff --git a/t/edit.t b/t/edit.t new file mode 100644 index 00000000..61e90f2f --- /dev/null +++ b/t/edit.t @@ -0,0 +1,178 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +# edit frontend behavior test (t/replace.t for backend) +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +require './t/common.perl'; +require_git(2.6); +require PublicInbox::Inbox; +require PublicInbox::InboxWritable; +require PublicInbox::Config; +use PublicInbox::MID qw(mid_clean); + +my @mods = qw(IPC::Run DBI DBD::SQLite); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "missing $mod for $0" if $@; +}; +IPC::Run->import(qw(run)); + +my $cmd_pfx = 'blib/script/public-inbox'; +my $tmpdir = tempdir('pi-edit-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $mainrepo = "$tmpdir/v2"; +my $ibx = PublicInbox::Inbox->new({ + mainrepo => $mainrepo, + name => 'test-v2edit', + version => 2, + -primary_address => 'test@example.com', + indexlevel => 'basic', +}); +$ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1}); +my $cfgfile = "$tmpdir/config"; +local $ENV{PI_CONFIG} = $cfgfile; +my $file = 't/data/0001.patch'; +open my $fh, '<', $file or die "open: $!"; +my $raw = do { local $/; <$fh> }; +my $im = $ibx->importer(0); +my $mime = PublicInbox::MIME->new($raw); +my $mid = mid_clean($mime->header('Message-Id')); +ok($im->add($mime), 'add message to be edited'); +$im->done; +my ($in, $out, $err, $cmd, $cur, $t); +my $__git_dir = "--git-dir=$ibx->{mainrepo}/git/0.git"; + +$t = '-F FILE'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean prefix/bool pfx/'"; + $cmd = [ "$cmd_pfx-edit", "-F$file", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t edit OK"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); + like($out, qr/[a-f0-9]{40}/, "$t shows commit on success"); +} + +$t = '-m MESSAGE_ID'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t edit OK"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/boolean prefix/, "$t message edited"); + like($out, qr/[a-f0-9]{40}/, "$t shows commit on success"); +} + +$t = 'no-op -m MESSAGE_ID'; { + $in = $out = $err = ''; + my $before = `git $__git_dir rev-parse HEAD`; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + my $prev = $cur; + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + is_deeply($cur, $prev, "$t makes no change"); + like($cur->header('Subject'), qr/boolean prefix/, + "$t does not change message"); + like($out, qr/NONE/, 'noop shows NONE'); + my $after = `git $__git_dir rev-parse HEAD`; + is($after, $before, 'git head unchanged'); +} + +$t = '-m MESSAGE_ID can change Received: headers'; { + $in = $out = $err = ''; + my $before = `git $__git_dir rev-parse HEAD`; + local $ENV{MAIL_EDITOR} = + "$^X -i -p -e 's/^Subject:.*/Received: x\\n\$&/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/boolean prefix/, + "$t does not change Subject"); + is($cur->header('Received'), 'x', 'added Received header'); +} + +$t = '-m miss'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean/FAIL/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid-miss", $mainrepo ]; + ok(!run($cmd, \$in, \$out, \$err), "$t fails on invalid MID"); + like($err, qr/No message found/, "$t shows error"); +} + +$t = 'non-interactive editor failure'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 'END { exit 1 }'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(!run($cmd, \$in, \$out, \$err), "$t detected"); + like($err, qr/END \{ exit 1 \}' failed:/, "$t shows error"); +} + +$t = 'mailEditor set in config'; { + $in = $out = $err = ''; + my $rc = system(qw(git config), "--file=$cfgfile", + 'publicinbox.maileditor', + "$^X -i -p -e 's/boolean prefix/bool pfx/'"); + is($rc, 0, 'set publicinbox.mailEditor'); + local $ENV{MAIL_EDITOR}; + local $ENV{GIT_EDITOR} = 'echo should not run'; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t edited message"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); + unlike($out, qr/should not run/, 'did not run GIT_EDITOR'); +} + +$t = '--raw and mbox escaping'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^\$/\\nFrom not mbox\\n/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", '--raw', $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->body, qr/^From not mbox/sm, 'put "From " line into body'); + + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^>From not/\$& an/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds with mbox escaping"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + like($cur->body, qr/^From not an mbox/sm, + 'changed "From " line unescaped'); + + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^From not an mbox\\n//s'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", '--raw', $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds again"); + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + unlike($cur->body, qr/^From not an mbox/sm, "$t restored body"); +} + +$t = 'reuse Message-ID'; { + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + ok($im->add($mime), "$t and re-add"); + $im->done; + like($warn[0], qr/reused for mismatched content/, "$t got warning"); +} + +$t = 'edit ambiguous Message-ID with -m'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(!run($cmd, \$in, \$out, \$err), "$t fails w/o --force"); + like($err, qr/Multiple messages with different content found matching/, + "$t shows matches"); + like($err, qr/GIT_DIR=.*git show/is, "$t shows git commands"); +} + +$t .= ' and --force'; { + $in = $out = $err = ''; + local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Subject:x/i'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", '--force', $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + like($err, qr/Will edit all of them/, "$t notes all will be edited"); + my @dump = `git $__git_dir cat-file --batch --batch-all-objects`; + chomp @dump; + is_deeply([grep(/^Subject:/i, @dump)], [qw(Subject:x Subject:x)], + "$t edited both messages"); +} + +done_testing(); -- cgit v1.2.3-24-ge0c7 From 6e507c8cb41b0d48963503a88034348d74506211 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 10 Jun 2019 16:58:55 +0000 Subject: edit|purge: improve output on rewrites Fill in undef as "(unchanged)" when displaying commits and prefix the epoch name. --- t/purge.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 't') diff --git a/t/purge.t b/t/purge.t index c1e0e9a0..384f32a6 100644 --- a/t/purge.t +++ b/t/purge.t @@ -57,7 +57,7 @@ is($? >> 8, 1, 'missed purge exits with 1'); # a successful case: ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK'); -like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted'); +like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted'); # add (old) vger filter to config file print $cfg_fh < Date: Mon, 10 Jun 2019 18:09:27 +0000 Subject: edit: drop unwanted headers before noop check mutt will set Content-Length, Lines, and Status headers unconditionally, so we need to account for that before doing header comparisons to avoid making expensive changes when noop edits are made. --- t/edit.t | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 't') diff --git a/t/edit.t b/t/edit.t index 61e90f2f..6b4e35c3 100644 --- a/t/edit.t +++ b/t/edit.t @@ -79,6 +79,24 @@ $t = 'no-op -m MESSAGE_ID'; { is($after, $before, 'git head unchanged'); } +$t = 'no-op -m MESSAGE_ID w/Status: header'; { # because mutt does it + $in = $out = $err = ''; + my $before = `git $__git_dir rev-parse HEAD`; + local $ENV{MAIL_EDITOR} = + "$^X -i -p -e 's/^Subject:.*/Status: RO\\n\$&/'"; + $cmd = [ "$cmd_pfx-edit", "-m$mid", $mainrepo ]; + ok(run($cmd, \$in, \$out, \$err), "$t succeeds"); + my $prev = $cur; + $cur = PublicInbox::MIME->new($ibx->msg_by_mid($mid)); + is_deeply($cur, $prev, "$t makes no change"); + like($cur->header('Subject'), qr/boolean prefix/, + "$t does not change message"); + is($cur->header('Status'), undef, 'Status header not added'); + like($out, qr/NONE/, 'noop shows NONE'); + my $after = `git $__git_dir rev-parse HEAD`; + is($after, $before, 'git head unchanged'); +} + $t = '-m MESSAGE_ID can change Received: headers'; { $in = $out = $err = ''; my $before = `git $__git_dir rev-parse HEAD`; -- cgit v1.2.3-24-ge0c7