user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 1/2] v2writable: make remove return-compatible w/ Import::remove
  2020-02-24  8:08  6% [PATCH 0/2] v2writable: reduce smsg->{mime} impact Eric Wong
@ 2020-02-24  8:08  7% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-02-24  8:08 UTC (permalink / raw)
  To: meta

Import::remove is a documented interface, and the return
value of the V2Writable work-alike should try to be compatible
with what Import implements.
---
 lib/PublicInbox/V2Writable.pm | 23 +++++++++++++----------
 t/v2writable.t                |  7 +++++--
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index fc2f33f9..573a92aa 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -381,7 +381,7 @@ sub rewrite_internal ($$;$$$) {
 	}
 	my $over = $self->{over};
 	my $cids = content_ids($old_mime);
-	my $removed;
+	my @removed;
 	my $mids = mids($old_mime->header_obj);
 
 	# We avoid introducing new blobs into git since the raw content
@@ -391,7 +391,7 @@ sub rewrite_internal ($$;$$$) {
 	my $mark;
 
 	foreach my $mid (@$mids) {
-		my %gone; # num => [ smsg, raw ]
+		my %gone; # num => [ smsg, $mime, raw ]
 		my ($id, $prev);
 		while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
 			my $msg = get_blob($self, $smsg);
@@ -402,8 +402,7 @@ sub rewrite_internal ($$;$$$) {
 			my $orig = $$msg;
 			my $cur = PublicInbox::MIME->new($msg);
 			if (content_matches($cids, $cur)) {
-				$smsg->{mime} = $cur;
-				$gone{$smsg->{num}} = [ $smsg, \$orig ];
+				$gone{$smsg->{num}} = [ $smsg, $cur, \$orig ];
 			}
 		}
 		my $n = scalar keys %gone;
@@ -413,15 +412,16 @@ sub rewrite_internal ($$;$$$) {
 				join(',', sort keys %gone), "\n";
 		}
 		foreach my $num (keys %gone) {
-			my ($smsg, $orig) = @{$gone{$num}};
-			# $removed should only be set once assuming
+			my ($smsg, $mime, $orig) = @{$gone{$num}};
+			# @removed should only be set once assuming
 			# no bugs in our deduplication code:
-			$removed = $smsg;
+			@removed = (undef, $mime, $smsg);
 			my $oid = $smsg->{blob};
 			if ($replace_map) {
 				$replace_map->{$oid} = $sref;
 			} else {
 				($mark, undef) = $im->remove($orig, $cmt_msg);
+				$removed[0] = $mark;
 			}
 			$orig = undef;
 			if ($need_reindex) { # ->replace
@@ -441,15 +441,18 @@ sub rewrite_internal ($$;$$$) {
 		my $rewrites = _replace_oids($self, $new_mime, $replace_map);
 		return { rewrites => $rewrites, need_reindex => $need_reindex };
 	}
-	$removed;
+	defined($mark) ? @removed : undef;
 }
 
-# public
+# public (see PublicInbox::Import->remove), but note the 3rd element
+# (retval[2]) is not part of the stable API shared with Import->remove
 sub remove {
 	my ($self, $mime, $cmt_msg) = @_;
+	my @ret;
 	$self->{-inbox}->with_umask(sub {
-		rewrite_internal($self, $mime, $cmt_msg);
+		@ret = rewrite_internal($self, $mime, $cmt_msg);
 	});
+	defined($ret[0]) ? @ret : undef;
 }
 
 sub _replace ($$;$$) {
diff --git a/t/v2writable.t b/t/v2writable.t
index 77bd68d4..cdcfe4d0 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -4,7 +4,7 @@ use strict;
 use warnings;
 use Test::More;
 use PublicInbox::MIME;
-use PublicInbox::ContentId qw(content_digest);
+use PublicInbox::ContentId qw(content_digest content_id);
 use PublicInbox::TestCommon;
 use Cwd qw(abs_path);
 require_git(2.6);
@@ -206,7 +206,10 @@ EOF
 	my $before = $git0->qx(@log, qw(--pretty=raw --raw -r));
 	$im = PublicInbox::V2Writable->new($ibx, {nproc => 2});
 	is($im->{shards}, 1, 'detected single shard from previous');
-	my $smsg = $im->remove($mime, 'test removal');
+	my ($mark, $rm_mime, $smsg) = $im->remove($mime, 'test removal');
+	is(content_id($rm_mime), content_id($mime),
+			'removed object returned matches');
+	ok(defined($mark), 'mark set');
 	$im->done;
 	my @after = $git0->qx(@log, qw(--pretty=oneline));
 	my $tip = shift @after;

^ permalink raw reply related	[relevance 7%]

* [PATCH 0/2] v2writable: reduce smsg->{mime} impact
@ 2020-02-24  8:08  6% Eric Wong
  2020-02-24  8:08  7% ` [PATCH 1/2] v2writable: make remove return-compatible w/ Import::remove Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-02-24  8:08 UTC (permalink / raw)
  To: meta

Stuffing a full MIME object into $smsg is probably a bad idea
as witnessed by the memory bloat fixed with:
https://public-inbox.org/meta/20190108004606.23760-1-e@80x24.org/
("view: stop storing all MIME objects on large threads")

So slowly start getting rid of smsg->{mime} and improve some
v2writable behaviors while we're at it.

Eric Wong (2):
  v2writable: make remove return-compatible w/ Import::remove
  v2writable: lookup_content => content_exists

 lib/PublicInbox/V2Writable.pm | 34 ++++++++++++++++------------------
 t/v2writable.t                |  7 +++++--
 2 files changed, 21 insertions(+), 20 deletions(-)


^ permalink raw reply	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-02-24  8:08  6% [PATCH 0/2] v2writable: reduce smsg->{mime} impact Eric Wong
2020-02-24  8:08  7% ` [PATCH 1/2] v2writable: make remove return-compatible w/ Import::remove Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).