about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-19 08:14:41 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-19 08:16:34 +0000
commit7a3a4b9d310876f68f4ba788afaef77ad15fc62b (patch)
treee9d12c733cbc51cfb8499f09ff60cda23999662b /lib
parentcf1e5bcfeacd5b2a3b8e82052a65e69cd1e0cc57 (diff)
downloadpublic-inbox-7a3a4b9d310876f68f4ba788afaef77ad15fc62b.tar.gz
This makes it easier to audit deletes with "git log -p" and
prevents an unstable specification of "content_id" from being
stored in history.

This should be cost-free if done in the same partition (and even
cheaper than before as it introduces no new blobs).  It does
have a higher cost across partitions, but is probably irrelevant
given the typical ham:spam ratio.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Import.pm15
-rw-r--r--lib/PublicInbox/V2Writable.pm4
2 files changed, 13 insertions, 6 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index e20c6e03..94a49fe6 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -11,7 +11,6 @@ use Fcntl qw(:flock :DEFAULT);
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::MID qw(mid_mime mid2path);
 use PublicInbox::Address;
-use PublicInbox::ContentId qw(content_id);
 use PublicInbox::MsgTime qw(msg_timestamp);
 
 sub new {
@@ -163,7 +162,6 @@ sub get_mark {
 # ('MISMATCH', Email::MIME) on mismatch
 # (:MARK, Email::MIME) on success
 #
-# For v2 inboxes, the content_id is returned instead of the msg
 # v2 callers should check with Xapian before calling this as
 # it is not idempotent.
 sub remove {
@@ -179,10 +177,17 @@ sub remove {
                 ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
                 return ($err, $cur) if $err;
         } else {
-                $cur = content_id($mime);
-                my $len = length($cur);
+                my $sref;
+                if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable
+                        $sref = $mime;
+                } else { # XXX should not be necessary:
+                        my $str = $mime->as_string;
+                        $sref = \$str;
+                }
+                my $len = length($$sref);
                 $blob = $self->{mark}++;
-                print $w "blob\nmark :$blob\ndata $len\n$cur\n" or wfail;
+                print $w "blob\nmark :$blob\ndata $len\n",
+                        $$sref, "\n" or wfail;
         }
 
         my $ref = $self->{ref};
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 656f0693..fd9bf615 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -220,6 +220,7 @@ sub remove {
                                 warn "broken smsg for $mid\n";
                                 return 1; # continue
                         }
+                        my $orig = $$msg;
                         my $cur = PublicInbox::MIME->new($msg);
                         if (content_id($cur) eq $cid) {
                                 $mm->num_delete($smsg->num);
@@ -227,7 +228,8 @@ sub remove {
                                 # no bugs in our deduplication code:
                                 $removed = $smsg;
                                 $removed->{mime} = $cur;
-                                $im->remove($cur, $cmt_msg);
+                                $im->remove(\$orig, $cmt_msg);
+                                $orig = undef;
                                 $removed->num; # memoize this for callers
 
                                 my $oid = $smsg->{blob};