From 7a3a4b9d310876f68f4ba788afaef77ad15fc62b Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 19 Mar 2018 08:14:41 +0000 Subject: import: (v2) delete writes the blob into history in subdir This makes it easier to audit deletes with "git log -p" and prevents an unstable specification of "content_id" from being stored in history. This should be cost-free if done in the same partition (and even cheaper than before as it introduces no new blobs). It does have a higher cost across partitions, but is probably irrelevant given the typical ham:spam ratio. --- lib/PublicInbox/Import.pm | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib/PublicInbox/Import.pm') diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index e20c6e03..94a49fe6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -11,7 +11,6 @@ use Fcntl qw(:flock :DEFAULT); use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mid_mime mid2path); use PublicInbox::Address; -use PublicInbox::ContentId qw(content_id); use PublicInbox::MsgTime qw(msg_timestamp); sub new { @@ -163,7 +162,6 @@ sub get_mark { # ('MISMATCH', Email::MIME) on mismatch # (:MARK, Email::MIME) on success # -# For v2 inboxes, the content_id is returned instead of the msg # v2 callers should check with Xapian before calling this as # it is not idempotent. sub remove { @@ -179,10 +177,17 @@ sub remove { ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime); return ($err, $cur) if $err; } else { - $cur = content_id($mime); - my $len = length($cur); + my $sref; + if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable + $sref = $mime; + } else { # XXX should not be necessary: + my $str = $mime->as_string; + $sref = \$str; + } + my $len = length($$sref); $blob = $self->{mark}++; - print $w "blob\nmark :$blob\ndata $len\n$cur\n" or wfail; + print $w "blob\nmark :$blob\ndata $len\n", + $$sref, "\n" or wfail; } my $ref = $self->{ref}; -- cgit v1.2.3-24-ge0c7