From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9CF3A1FAEC for ; Mon, 19 Mar 2018 08:15:01 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 09/27] import: (v2) delete writes the blob into history in subdir Date: Mon, 19 Mar 2018 08:14:41 +0000 Message-Id: <20180319081459.10645-10-e@80x24.org> In-Reply-To: <20180319081459.10645-1-e@80x24.org> References: <20180319081459.10645-1-e@80x24.org> List-Id: This makes it easier to audit deletes with "git log -p" and prevents an unstable specification of "content_id" from being stored in history. This should be cost-free if done in the same partition (and even cheaper than before as it introduces no new blobs). It does have a higher cost across partitions, but is probably irrelevant given the typical ham:spam ratio. --- lib/PublicInbox/Import.pm | 15 ++++++++++----- lib/PublicInbox/V2Writable.pm | 4 +++- t/v2writable.t | 9 +++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index e20c6e0..94a49fe 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -11,7 +11,6 @@ use Fcntl qw(:flock :DEFAULT); use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mid_mime mid2path); use PublicInbox::Address; -use PublicInbox::ContentId qw(content_id); use PublicInbox::MsgTime qw(msg_timestamp); sub new { @@ -163,7 +162,6 @@ sub get_mark { # ('MISMATCH', Email::MIME) on mismatch # (:MARK, Email::MIME) on success # -# For v2 inboxes, the content_id is returned instead of the msg # v2 callers should check with Xapian before calling this as # it is not idempotent. sub remove { @@ -179,10 +177,17 @@ sub remove { ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime); return ($err, $cur) if $err; } else { - $cur = content_id($mime); - my $len = length($cur); + my $sref; + if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable + $sref = $mime; + } else { # XXX should not be necessary: + my $str = $mime->as_string; + $sref = \$str; + } + my $len = length($$sref); $blob = $self->{mark}++; - print $w "blob\nmark :$blob\ndata $len\n$cur\n" or wfail; + print $w "blob\nmark :$blob\ndata $len\n", + $$sref, "\n" or wfail; } my $ref = $self->{ref}; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 656f069..fd9bf61 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -220,6 +220,7 @@ sub remove { warn "broken smsg for $mid\n"; return 1; # continue } + my $orig = $$msg; my $cur = PublicInbox::MIME->new($msg); if (content_id($cur) eq $cid) { $mm->num_delete($smsg->num); @@ -227,7 +228,8 @@ sub remove { # no bugs in our deduplication code: $removed = $smsg; $removed->{mime} = $cur; - $im->remove($cur, $cmt_msg); + $im->remove(\$orig, $cmt_msg); + $orig = undef; $removed->num; # memoize this for callers my $oid = $smsg->{blob}; diff --git a/t/v2writable.t b/t/v2writable.t index 6e37b72..a5c982e 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -191,6 +191,7 @@ EOF { local $ENV{NPROC} = 2; my @before = $git0->qx(qw(log --pretty=oneline)); + my $before = $git0->qx(qw(log --pretty=raw --raw -r --no-abbrev)); $im = PublicInbox::V2Writable->new($ibx, 1); is($im->{partitions}, 1, 'detected single partition from previous'); my $smsg = $im->remove($mime, 'test removal'); @@ -207,6 +208,14 @@ EOF my @found = (); $srch->each_smsg_by_mid($smsg->mid, sub { push @found, @_; 1 }); is(scalar(@found), 0, 'no longer found in Xapian skeleton'); + + my $after = $git0->qx(qw(log -1 --pretty=raw --raw -r --no-abbrev)); + if ($after =~ m!( [a-f0-9]+ )A\td$!) { + my $oid = $1; + ok(index($before, $oid) > 0, 'no new blob introduced'); + } else { + fail('failed to extract blob from log output'); + } } done_testing(); -- EW