From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7B10A1FAE9; Thu, 29 Mar 2018 10:28:30 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Cc: "Eric Wong (Contractor, The Linux Foundation)" Subject: [PATCH 06/14] public-inbox-convert: tool for converting old to new inboxes Date: Thu, 29 Mar 2018 10:28:11 +0000 Message-Id: <20180329102819.15234-7-e@80x24.org> In-Reply-To: <20180329102819.15234-1-e@80x24.org> References: <20180329102819.15234-1-e@80x24.org> List-Id: This should make it easier to let users perform comparisons and migrate to v2 if needed. --- Documentation/public-inbox-config.pod | 2 +- Documentation/public-inbox-convert.pod | 45 ++++++++++ MANIFEST | 2 + script/public-inbox-convert | 109 +++++++++++++++++++++++++ 4 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 Documentation/public-inbox-convert.pod create mode 100755 script/public-inbox-convert diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index 8250b45..22ee909 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -40,7 +40,7 @@ Default: none, required =item publicinbox..mainrepo -The absolute path to the git repository which hosts the +The absolute path to the directory which hosts the public-inbox. This must be specified once. Default: none, required diff --git a/Documentation/public-inbox-convert.pod b/Documentation/public-inbox-convert.pod new file mode 100644 index 0000000..1e16ea4 --- /dev/null +++ b/Documentation/public-inbox-convert.pod @@ -0,0 +1,45 @@ +=head1 NAME + +public-inbox-convert - convert v1 inboxes to v2 + +=head1 SYNOPSIS + + public-inbox-convert OLD_DIR NEW_DIR + +=head1 DESCRIPTION + +public-inbox-convert copies the contents of an old "v1" inbox +into a new "v2" inbox. It makes no changes to the old inbox +and users are expected to update the "mainrepo" path in +L to point to the path of NEW_DIR +once they are satisfied with the conversion. + +=head1 ENVIRONMENT + +=over 8 + +=item PI_CONFIG + +The default config file, normally "~/.public-inbox/config". +See L + +=back + +=head1 UPGRADING + +=head1 CONTACT + +Feedback welcome via plain-text mail to L + +The mail archives are hosted at L +and L + +=head1 COPYRIGHT + +Copyright 2013-2018 all contributors L + +License: AGPL-3.0+ L + +=head1 SEE ALSO + +L, L diff --git a/MANIFEST b/MANIFEST index 8b2b10b..1e48d3a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -8,6 +8,7 @@ Documentation/design_www.txt Documentation/hosted.txt Documentation/include.mk Documentation/public-inbox-config.pod +Documentation/public-inbox-convert.pod Documentation/public-inbox-daemon.pod Documentation/public-inbox-httpd.pod Documentation/public-inbox-index.pod @@ -109,6 +110,7 @@ sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs +script/public-inbox-convert script/public-inbox-httpd script/public-inbox-index script/public-inbox-init diff --git a/script/public-inbox-convert b/script/public-inbox-convert new file mode 100755 index 0000000..2b0a385 --- /dev/null +++ b/script/public-inbox-convert @@ -0,0 +1,109 @@ +#!/usr/bin/perl -w +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::MIME; +use PublicInbox::Inbox; +use PublicInbox::Config; +use PublicInbox::V2Writable; +use PublicInbox::Spawn qw(spawn); +use Cwd 'abs_path'; +my $usage = "Usage: public-inbox-convert OLD NEW\n"; +my $jobs; +my $index = 1; +my %opts = ( + '--jobs|j=i' => \$jobs, + '--index!' => \$index, +); +GetOptions(%opts) or die "bad command-line args\n$usage"; +GetOptions(%opts) or die "bad command-line args\n$usage"; +my $old_dir = shift or die $usage; +my $new_dir = shift or die $usage; +die "$new_dir exists\n" if -d $new_dir; +die "$old_dir not a directory\n" unless -d $old_dir; +my $config = PublicInbox::Config->new; +$old_dir = abs_path($old_dir); +my $old; +$config->each_inbox(sub { + $old = $_[0] if abs_path($_[0]->{mainrepo}) eq $old_dir; +}); +unless ($old) { + warn "W: $old_dir not configured in " . + PublicInbox::Config::default_file() . "\n"; + $old = { + mainrepo => $old_dir, + name => 'ignored', + address => [ 'old@example.com' ], + }; + $old = PublicInbox::Inbox->new($old); +} +if (($old->{version} || 1) >= 2) { + die "Only conversion from v1 inboxes is supported\n"; +} +my $new = { %$old }; +delete $new->{altid}; # TODO: support altid for v2 +$new->{mainrepo} = $new_dir; +$new->{version} = 2; +$new = PublicInbox::Inbox->new($new); +my $v2w = PublicInbox::V2Writable->new($new, 1); +$v2w->init_inbox($jobs); +my $state = ''; +my ($prev, $from); +my $head = $old->{ref_head} || 'HEAD'; +my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); +$v2w->idx_init; +my $im = $v2w->importer; +my ($r, $w) = $im->gfi_start; +my $h = '[0-9a-f]'; +my %D; +while (<$rd>) { + if ($_ eq "blob\n") { + $state = 'blob'; + } elsif (/^commit /) { + $state = 'commit'; + } elsif (/^data (\d+)/) { + my $len = $1; + $w->print($_) or $im->wfail; + while ($len) { + my $n = read($rd, my $tmp, $len) or die "read: $!"; + warn "$n != $len\n" if $n != $len; + $len -= $n; + $w->print($tmp) or $im->wfail; + } + next; + } elsif ($state eq 'commit') { + if (m{^M 100644 :(\d+) (${h}{2}/${h}{38})}o) { + my ($mark, $path) = ($1, $2); + $D{$path} = $mark; + $w->print("M 100644 :$mark m\n") or $im->wfail; + next; + } + if (m{^D (${h}{2}/${h}{38})}o) { + my $mark = delete $D{$1}; + defined $mark or die "undeleted path: $1\n"; + $w->print("M 100644 :$mark _/D\n") or $im->wfail; + next; + } + if (m{^from (:\d+)}) { + $prev = $from; + $from = $1; + # no next + } + } elsif ($_ eq "done\n") { + last; + } + $w->print($_) or $im->wfail; +} +$w = $r = undef; +close $rd or die "close fast-export: $!\n"; +waitpid($pid, 0) or die "waitpid failed: $!\n"; +$? == 0 or die "fast-export failed: $?\n"; +my $mm = $old->mm; +$mm->{dbh}->sqlite_backup_to_file("$new_dir/msgmap.sqlite3") if $mm; +$v2w->done; +if ($index) { + $v2w->reindex; + $v2w->done; +} -- EW