From 7a3a4b9d310876f68f4ba788afaef77ad15fc62b Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 19 Mar 2018 08:14:41 +0000 Subject: import: (v2) delete writes the blob into history in subdir This makes it easier to audit deletes with "git log -p" and prevents an unstable specification of "content_id" from being stored in history. This should be cost-free if done in the same partition (and even cheaper than before as it introduces no new blobs). It does have a higher cost across partitions, but is probably irrelevant given the typical ham:spam ratio. --- lib/PublicInbox/Import.pm | 15 ++++++++++----- lib/PublicInbox/V2Writable.pm | 4 +++- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index e20c6e03..94a49fe6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -11,7 +11,6 @@ use Fcntl qw(:flock :DEFAULT); use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mid_mime mid2path); use PublicInbox::Address; -use PublicInbox::ContentId qw(content_id); use PublicInbox::MsgTime qw(msg_timestamp); sub new { @@ -163,7 +162,6 @@ sub get_mark { # ('MISMATCH', Email::MIME) on mismatch # (:MARK, Email::MIME) on success # -# For v2 inboxes, the content_id is returned instead of the msg # v2 callers should check with Xapian before calling this as # it is not idempotent. sub remove { @@ -179,10 +177,17 @@ sub remove { ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime); return ($err, $cur) if $err; } else { - $cur = content_id($mime); - my $len = length($cur); + my $sref; + if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable + $sref = $mime; + } else { # XXX should not be necessary: + my $str = $mime->as_string; + $sref = \$str; + } + my $len = length($$sref); $blob = $self->{mark}++; - print $w "blob\nmark :$blob\ndata $len\n$cur\n" or wfail; + print $w "blob\nmark :$blob\ndata $len\n", + $$sref, "\n" or wfail; } my $ref = $self->{ref}; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 656f0693..fd9bf615 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -220,6 +220,7 @@ sub remove { warn "broken smsg for $mid\n"; return 1; # continue } + my $orig = $$msg; my $cur = PublicInbox::MIME->new($msg); if (content_id($cur) eq $cid) { $mm->num_delete($smsg->num); @@ -227,7 +228,8 @@ sub remove { # no bugs in our deduplication code: $removed = $smsg; $removed->{mime} = $cur; - $im->remove($cur, $cmt_msg); + $im->remove(\$orig, $cmt_msg); + $orig = undef; $removed->num; # memoize this for callers my $oid = $smsg->{blob}; -- cgit v1.2.3-24-ge0c7