user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 03/11] import: switch to "replace_oids" interface for purge
  @ 2019-06-09  2:51  2% ` Eric Wong (Contractor, The Linux Foundation)
  0 siblings, 0 replies; 4+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2019-06-09  2:51 UTC (permalink / raw)
  To: meta

Continuing the work by Eric Biederman in commit a118d58a402bd31b
("Import.pm: When purging replace a purged file with a zero length file"),
we can use a generic OID replacement mechanism to implement
purge.
---
 lib/PublicInbox/Import.pm     | 33 +++++++++++++++++++--------------
 lib/PublicInbox/V2Writable.pm |  6 +++---
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 6ee1935..2c8fe84 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -501,16 +501,16 @@ sub clean_purge_buffer {
 	}
 }
 
-sub purge_oids {
-	my ($self, $purge) = @_;
-	my $tmp = "refs/heads/purge-".((keys %$purge)[0]);
+sub replace_oids {
+	my ($self, $replace) = @_; # oid => raw string
+	my $tmp = "refs/heads/replace-".((keys %$replace)[0]);
 	my $old = $self->{'ref'};
 	my $git = $self->{git};
 	my @export = (qw(fast-export --no-data --use-done-feature), $old);
 	my $rd = $git->popen(@export);
 	my ($r, $w) = $self->gfi_start;
 	my @buf;
-	my $npurge = 0;
+	my $nreplace = 0;
 	my @oids;
 	my ($done, $mark);
 	my $tree = $self->{-tree};
@@ -533,10 +533,13 @@ sub purge_oids {
 		} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
 			my ($oid, $path) = ($1, $2);
 			$tree->{$path} = 1;
-			if ($purge->{$oid}) {
+			my $sref = $replace->{$oid};
+			if (defined $sref) {
 				push @oids, $oid;
-				my $cmd = "M 100644 inline $path\ndata 0\n\n";
-				push @buf, $cmd;
+				my $n = length($$sref);
+				push @buf, "M 100644 inline $path\ndata $n\n";
+				push @buf, $$sref; # hope CoW works...
+				push @buf, "\n";
 			} else {
 				push @buf, $_;
 			}
@@ -549,7 +552,7 @@ sub purge_oids {
 				$out =~ s/^/# /sgm;
 				warn "purge rewriting\n", $out, "\n";
 				clean_purge_buffer(\@oids, \@buf);
-				$npurge++;
+				$nreplace++;
 			}
 			$w->print(@buf, "\n") or wfail;
 			@buf = ();
@@ -567,28 +570,30 @@ sub purge_oids {
 		$w->print(@buf) or wfail;
 	}
 	die 'done\n not seen from fast-export' unless $done;
-	chomp(my $cmt = $self->get_mark(":$mark")) if $npurge;
+	chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace;
 	$self->{nchg} = 0; # prevent _update_git_info until update-ref:
 	$self->done;
 	my @git = ('git', "--git-dir=$git->{git_dir}");
 
-	run_die([@git, qw(update-ref), $old, $tmp]) if $npurge;
+	run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace;
 
 	run_die([@git, qw(update-ref -d), $tmp]);
 
-	return if $npurge == 0;
+	return if $nreplace == 0;
 
 	run_die([@git, qw(-c gc.reflogExpire=now gc --prune=all)]);
+
+	# check that old OIDs are gone
 	my $err = 0;
-	foreach my $oid (keys %$purge) {
+	foreach my $oid (keys %$replace) {
 		my @info = $git->check($oid);
 		if (@info) {
-			warn "$oid not purged\n";
+			warn "$oid not replaced\n";
 			$err++;
 		}
 	}
 	_update_git_info($self, 0);
-	die "Failed to purge $err object(s)\n" if $err;
+	die "Failed to replace $err object(s)\n" if $err;
 	$cmt;
 }
 
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index a435814..d6f72b0 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -298,7 +298,7 @@ sub idx_init {
 }
 
 sub purge_oids ($$) {
-	my ($self, $purge) = @_; # $purge = { $object_id => 1, ... }
+	my ($self, $purge) = @_; # $purge = { $object_id => \'', ... }
 	$self->done;
 	my $pfx = "$self->{-inbox}->{mainrepo}/git";
 	my $purges = [];
@@ -313,7 +313,7 @@ sub purge_oids ($$) {
 		-d $git_dir or next;
 		my $git = PublicInbox::Git->new($git_dir);
 		my $im = $self->import_init($git, 0, 1);
-		$purges->[$i] = $im->purge_oids($purge);
+		$purges->[$i] = $im->replace_oids($purge);
 		$im->done;
 	}
 	$purges;
@@ -386,7 +386,7 @@ sub remove_internal ($$$$) {
 			$removed = $smsg;
 			my $oid = $smsg->{blob};
 			if ($purge) {
-				$purge->{$oid} = 1;
+				$purge->{$oid} = \'';
 			} else {
 				($mark, undef) = $im->remove($orig, $cmt_msg);
 			}
-- 
EW


^ permalink raw reply related	[relevance 2%]

* Re: [PATCH] Import.pm: When purging replace a purged file with a zero length file
  2018-08-10 17:47  7% ` Eric Wong
@ 2018-08-11  1:12  7%   ` Eric W. Biederman
  0 siblings, 0 replies; 4+ results
From: Eric W. Biederman @ 2018-08-11  1:12 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

Eric Wong <e@80x24.org> writes:

> "Eric W. Biederman" <ebiederm@xmission.com> wrote:
>> 
>> This ensures that the number of added files remains the same and thus
>> the article numbers derived from a repository will remain the same.
>> 
>> I think this is the last place in public-inbox that has to be tweaked to
>> guarantee the generated article number will remain the same in an public
>> inbox archive.
>
> OK, definitely desirable.
>
>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> ---
>>  lib/PublicInbox/Import.pm | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>> 
>> diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
>> index bfa7a8053297..3df7d98f298b 100644
>> --- a/lib/PublicInbox/Import.pm
>> +++ b/lib/PublicInbox/Import.pm
>> @@ -519,11 +519,12 @@ sub purge_oids {
>>  			push @buf, $buf;
>>  		} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
>>  			my ($oid, $path) = ($1, $2);
>> +			$tree->{$path} = 1;
>>  			if ($purge->{$oid}) {
>>  				push @oids, $oid;
>> -				delete $tree->{$path};
>> +				my $cmd = "M 100644 inline $path\ndata 0\n\n";
>> +				push @buf, $cmd;
>>  			} else {
>> -				$tree->{$path} = 1;
>>  				push @buf, $_;
>>  			}
>>  		} elsif (/^D (\w+)/) {
>> -- 
>
> OK.  I haven't checked, but is the indexing/re-indexing code
> able to deal with zero-byte messages?  Thanks.

The v2mirror test covers this case and it doesn't seem to have any
problems.  The v2mirror performs an inex_sync after the purge and looks
for warnings and doesn't get any.  So I think we are ok.  Skimming
through the code I don't see any obvious issues either.

Eric


^ permalink raw reply	[relevance 7%]

* Re: [PATCH] Import.pm: When purging replace a purged file with a zero length file
  2018-08-10  0:08  7% [PATCH] Import.pm: When purging replace a purged file with a zero length file Eric W. Biederman
@ 2018-08-10 17:47  7% ` Eric Wong
  2018-08-11  1:12  7%   ` Eric W. Biederman
  0 siblings, 1 reply; 4+ results
From: Eric Wong @ 2018-08-10 17:47 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: meta

"Eric W. Biederman" <ebiederm@xmission.com> wrote:
> 
> This ensures that the number of added files remains the same and thus
> the article numbers derived from a repository will remain the same.
> 
> I think this is the last place in public-inbox that has to be tweaked to
> guarantee the generated article number will remain the same in an public
> inbox archive.

OK, definitely desirable.

> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>  lib/PublicInbox/Import.pm | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
> index bfa7a8053297..3df7d98f298b 100644
> --- a/lib/PublicInbox/Import.pm
> +++ b/lib/PublicInbox/Import.pm
> @@ -519,11 +519,12 @@ sub purge_oids {
>  			push @buf, $buf;
>  		} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
>  			my ($oid, $path) = ($1, $2);
> +			$tree->{$path} = 1;
>  			if ($purge->{$oid}) {
>  				push @oids, $oid;
> -				delete $tree->{$path};
> +				my $cmd = "M 100644 inline $path\ndata 0\n\n";
> +				push @buf, $cmd;
>  			} else {
> -				$tree->{$path} = 1;
>  				push @buf, $_;
>  			}
>  		} elsif (/^D (\w+)/) {
> -- 

OK.  I haven't checked, but is the indexing/re-indexing code
able to deal with zero-byte messages?  Thanks.

^ permalink raw reply	[relevance 7%]

* [PATCH] Import.pm: When purging replace a purged file with a zero length file
@ 2018-08-10  0:08  7% Eric W. Biederman
  2018-08-10 17:47  7% ` Eric Wong
  0 siblings, 1 reply; 4+ results
From: Eric W. Biederman @ 2018-08-10  0:08 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta


This ensures that the number of added files remains the same and thus
the article numbers derived from a repository will remain the same.

I think this is the last place in public-inbox that has to be tweaked to
guarantee the generated article number will remain the same in an public
inbox archive.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 lib/PublicInbox/Import.pm | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index bfa7a8053297..3df7d98f298b 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -519,11 +519,12 @@ sub purge_oids {
 			push @buf, $buf;
 		} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
 			my ($oid, $path) = ($1, $2);
+			$tree->{$path} = 1;
 			if ($purge->{$oid}) {
 				push @oids, $oid;
-				delete $tree->{$path};
+				my $cmd = "M 100644 inline $path\ndata 0\n\n";
+				push @buf, $cmd;
 			} else {
-				$tree->{$path} = 1;
 				push @buf, $_;
 			}
 		} elsif (/^D (\w+)/) {
-- 
2.17.1


^ permalink raw reply related	[relevance 7%]

Results 1-4 of 4 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2018-08-10  0:08  7% [PATCH] Import.pm: When purging replace a purged file with a zero length file Eric W. Biederman
2018-08-10 17:47  7% ` Eric Wong
2018-08-11  1:12  7%   ` Eric W. Biederman
2019-06-09  2:51     [PATCH 00/11] v2: implement message editing Eric Wong (Contractor, The Linux Foundation)
2019-06-09  2:51  2% ` [PATCH 03/11] import: switch to "replace_oids" interface for purge Eric Wong (Contractor, The Linux Foundation)

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).