* [PATCH 1/2] v2writable: make remove return-compatible w/ Import::remove
2020-02-24 8:08 [PATCH 0/2] v2writable: reduce smsg->{mime} impact Eric Wong
@ 2020-02-24 8:08 ` Eric Wong
2020-02-24 8:08 ` [PATCH 2/2] v2writable: lookup_content => content_exists Eric Wong
2020-02-24 8:08 ` [PATCH] " Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2020-02-24 8:08 UTC (permalink / raw)
To: meta
Import::remove is a documented interface, and the return
value of the V2Writable work-alike should try to be compatible
with what Import implements.
---
lib/PublicInbox/V2Writable.pm | 23 +++++++++++++----------
t/v2writable.t | 7 +++++--
2 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index fc2f33f9..573a92aa 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -381,7 +381,7 @@ sub rewrite_internal ($$;$$$) {
}
my $over = $self->{over};
my $cids = content_ids($old_mime);
- my $removed;
+ my @removed;
my $mids = mids($old_mime->header_obj);
# We avoid introducing new blobs into git since the raw content
@@ -391,7 +391,7 @@ sub rewrite_internal ($$;$$$) {
my $mark;
foreach my $mid (@$mids) {
- my %gone; # num => [ smsg, raw ]
+ my %gone; # num => [ smsg, $mime, raw ]
my ($id, $prev);
while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
my $msg = get_blob($self, $smsg);
@@ -402,8 +402,7 @@ sub rewrite_internal ($$;$$$) {
my $orig = $$msg;
my $cur = PublicInbox::MIME->new($msg);
if (content_matches($cids, $cur)) {
- $smsg->{mime} = $cur;
- $gone{$smsg->{num}} = [ $smsg, \$orig ];
+ $gone{$smsg->{num}} = [ $smsg, $cur, \$orig ];
}
}
my $n = scalar keys %gone;
@@ -413,15 +412,16 @@ sub rewrite_internal ($$;$$$) {
join(',', sort keys %gone), "\n";
}
foreach my $num (keys %gone) {
- my ($smsg, $orig) = @{$gone{$num}};
- # $removed should only be set once assuming
+ my ($smsg, $mime, $orig) = @{$gone{$num}};
+ # @removed should only be set once assuming
# no bugs in our deduplication code:
- $removed = $smsg;
+ @removed = (undef, $mime, $smsg);
my $oid = $smsg->{blob};
if ($replace_map) {
$replace_map->{$oid} = $sref;
} else {
($mark, undef) = $im->remove($orig, $cmt_msg);
+ $removed[0] = $mark;
}
$orig = undef;
if ($need_reindex) { # ->replace
@@ -441,15 +441,18 @@ sub rewrite_internal ($$;$$$) {
my $rewrites = _replace_oids($self, $new_mime, $replace_map);
return { rewrites => $rewrites, need_reindex => $need_reindex };
}
- $removed;
+ defined($mark) ? @removed : undef;
}
-# public
+# public (see PublicInbox::Import->remove), but note the 3rd element
+# (retval[2]) is not part of the stable API shared with Import->remove
sub remove {
my ($self, $mime, $cmt_msg) = @_;
+ my @ret;
$self->{-inbox}->with_umask(sub {
- rewrite_internal($self, $mime, $cmt_msg);
+ @ret = rewrite_internal($self, $mime, $cmt_msg);
});
+ defined($ret[0]) ? @ret : undef;
}
sub _replace ($$;$$) {
diff --git a/t/v2writable.t b/t/v2writable.t
index 77bd68d4..cdcfe4d0 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -4,7 +4,7 @@ use strict;
use warnings;
use Test::More;
use PublicInbox::MIME;
-use PublicInbox::ContentId qw(content_digest);
+use PublicInbox::ContentId qw(content_digest content_id);
use PublicInbox::TestCommon;
use Cwd qw(abs_path);
require_git(2.6);
@@ -206,7 +206,10 @@ EOF
my $before = $git0->qx(@log, qw(--pretty=raw --raw -r));
$im = PublicInbox::V2Writable->new($ibx, {nproc => 2});
is($im->{shards}, 1, 'detected single shard from previous');
- my $smsg = $im->remove($mime, 'test removal');
+ my ($mark, $rm_mime, $smsg) = $im->remove($mime, 'test removal');
+ is(content_id($rm_mime), content_id($mime),
+ 'removed object returned matches');
+ ok(defined($mark), 'mark set');
$im->done;
my @after = $git0->qx(@log, qw(--pretty=oneline));
my $tip = shift @after;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] v2writable: lookup_content => content_exists
2020-02-24 8:08 [PATCH 0/2] v2writable: reduce smsg->{mime} impact Eric Wong
2020-02-24 8:08 ` [PATCH 1/2] v2writable: make remove return-compatible w/ Import::remove Eric Wong
@ 2020-02-24 8:08 ` Eric Wong
2020-02-24 8:08 ` [PATCH] " Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2020-02-24 8:08 UTC (permalink / raw)
To: meta
It only needs to return a boolean, since none of the current
callers care about the return value. Thus avoid a hash table
assignment and use of `$smsg->{mime}', here.
---
lib/PublicInbox/V2Writable.pm | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 573a92aa..b42e6a13 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -201,11 +201,10 @@ sub v2_num_for {
# crap, Message-ID is already known, hope somebody just resent:
foreach my $m (@$mids) {
# read-only lookup now safe to do after above barrier
- my $existing = lookup_content($self, $mime, $m);
# easy, don't store duplicates
# note: do not add more diagnostic info here since
# it gets noisy on public-inbox-watch restarts
- return () if $existing;
+ return () if content_exists($self, $mime, $m);
}
# AltId may pre-populate article numbers (e.g. X-Mail-Count
@@ -824,7 +823,7 @@ sub get_blob ($$) {
$ibx->msg_by_smsg($smsg);
}
-sub lookup_content ($$$) {
+sub content_exists ($$$) {
my ($self, $mime, $mid) = @_;
my $over = $self->{over};
my $cids = content_ids($mime);
@@ -836,11 +835,7 @@ sub lookup_content ($$$) {
next;
}
my $cur = PublicInbox::MIME->new($msg);
- if (content_matches($cids, $cur)) {
- $smsg->{mime} = $cur;
- return $smsg;
- }
-
+ return 1 if content_matches($cids, $cur);
# XXX DEBUG_DIFF is experimental and may be removed
diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF};
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH] v2writable: lookup_content => content_exists
2020-02-24 8:08 [PATCH 0/2] v2writable: reduce smsg->{mime} impact Eric Wong
2020-02-24 8:08 ` [PATCH 1/2] v2writable: make remove return-compatible w/ Import::remove Eric Wong
2020-02-24 8:08 ` [PATCH 2/2] v2writable: lookup_content => content_exists Eric Wong
@ 2020-02-24 8:08 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2020-02-24 8:08 UTC (permalink / raw)
To: meta
It only needs to return a boolean, since none of the current
callers care about the return value, so avoid a hash assignment
and use of `$smsg->{mime}', here.
---
lib/PublicInbox/V2Writable.pm | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 573a92aa..b42e6a13 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -201,11 +201,10 @@ sub v2_num_for {
# crap, Message-ID is already known, hope somebody just resent:
foreach my $m (@$mids) {
# read-only lookup now safe to do after above barrier
- my $existing = lookup_content($self, $mime, $m);
# easy, don't store duplicates
# note: do not add more diagnostic info here since
# it gets noisy on public-inbox-watch restarts
- return () if $existing;
+ return () if content_exists($self, $mime, $m);
}
# AltId may pre-populate article numbers (e.g. X-Mail-Count
@@ -824,7 +823,7 @@ sub get_blob ($$) {
$ibx->msg_by_smsg($smsg);
}
-sub lookup_content ($$$) {
+sub content_exists ($$$) {
my ($self, $mime, $mid) = @_;
my $over = $self->{over};
my $cids = content_ids($mime);
@@ -836,11 +835,7 @@ sub lookup_content ($$$) {
next;
}
my $cur = PublicInbox::MIME->new($msg);
- if (content_matches($cids, $cur)) {
- $smsg->{mime} = $cur;
- return $smsg;
- }
-
+ return 1 if content_matches($cids, $cur);
# XXX DEBUG_DIFF is experimental and may be removed
diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF};
^ permalink raw reply related [flat|nested] 4+ messages in thread