about summary refs log tree commit homepage
path: root/script
diff options
context:
space:
mode:
Diffstat (limited to 'script')
-rwxr-xr-xscript/public-inbox-compact97
-rwxr-xr-xscript/public-inbox-convert139
-rwxr-xr-xscript/public-inbox-index59
-rwxr-xr-xscript/public-inbox-init62
-rwxr-xr-xscript/public-inbox-learn2
-rwxr-xr-xscript/public-inbox-mda18
6 files changed, 346 insertions, 31 deletions
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
new file mode 100755
index 00000000..5f18497e
--- /dev/null
+++ b/script/public-inbox-compact
@@ -0,0 +1,97 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::Search;
+use PublicInbox::Config;
+use PublicInbox::InboxWritable;
+use Cwd 'abs_path';
+use File::Temp qw(tempdir);
+use File::Path qw(remove_tree);
+use PublicInbox::Spawn qw(spawn);
+my $usage = "Usage: public-inbox-compact REPO_DIR\n";
+my $dir = shift or die $usage;
+my $config = PublicInbox::Config->new;
+my $ibx;
+$dir = abs_path($dir);
+$config->each_inbox(sub {
+        $ibx = $_[0] if abs_path($_[0]->{mainrepo}) eq $dir
+});
+unless ($ibx) {
+        warn "W: $dir not configured in ".
+                PublicInbox::Config::default_file() . "\n";
+        $ibx = {
+                mainrepo => $dir,
+                name => 'ignored',
+                address => [ 'old@example.com' ],
+        };
+        $ibx = PublicInbox::Inbox->new($ibx);
+}
+my $v = ($ibx->{version} || 1);
+$ibx = PublicInbox::InboxWritable->new($ibx);
+$ibx->umask_prepare;
+
+sub commit_changes ($$$) {
+        my ($im, $old, $new) = @_;
+        my @st = stat($old) or die "failed to stat($old): $!\n";
+
+        my $over = "$old/over.sqlite3";
+        if (-f $over) {
+                require PublicInbox::Over;
+                $over = PublicInbox::Over->new($over);
+                $over->connect->sqlite_backup_to_file("$new/over.sqlite3");
+        }
+        rename($old, "$new/old") or die "rename $old => $new/old: $!\n";
+        chmod($st[2] & 07777, $new) or die "chmod $old: $!\n";
+        rename($new, $old) or die "rename $new => $old: $!\n";
+        $im->lock_release;
+        remove_tree("$old/old") or die "failed to remove $old/old: $!\n";
+}
+my @compact = qw(xapian-compact --no-renumber);
+if ($v == 2) {
+        require PublicInbox::V2Writable;
+        my $v2w = PublicInbox::V2Writable->new($ibx);
+        my $xap_v = 'xap'.PublicInbox::Search::SCHEMA_VERSION;
+        my $old = "$dir/$xap_v";
+        opendir my $dh, $old or die "Failed to opendir $old: $!\n";
+        my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $dir);
+        $ibx->with_umask(sub {
+                $v2w->lock_acquire;
+                my %pids;
+                while (defined(my $dn = readdir($dh))) {
+                        if ($dn =~ /\A\d+\z/) {
+                                my $cmd = [ @compact, "$old/$dn", "$new/$dn" ];
+                                $pids{spawn($cmd)} = join(' ', @$cmd);
+                        } elsif ($dn eq '.' || $dn eq '..') {
+                        } elsif ($dn =~ /\Aover\.sqlite3/) {
+                        } else {
+                                warn "W: skipping unknown Xapian DB: $old/$dn\n"
+                        }
+                }
+                close $dh;
+                die "No Xapian parts found in $old\n" unless keys %pids;
+                while (scalar keys %pids) {
+                        my $pid = waitpid(-1, 0);
+                        my $desc = delete $pids{$pid};
+                        die "$desc failed: $?\n" if $?;
+                }
+                commit_changes($v2w, $old, $new);
+        });
+} elsif ($v == 1) {
+        require PublicInbox::Import;
+        my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
+        my $xap_v = 'xapian'.PublicInbox::Search::SCHEMA_VERSION;
+        my $v1_root = "$dir/public-inbox";
+        my $old = "$v1_root/$xap_v";
+        -d $old or die "$old does not exist\n";
+        my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $v1_root);
+        $ibx->with_umask(sub {
+                $im->lock_acquire;
+                PublicInbox::Import::run_die([@compact, $old, $new]);
+                commit_changes($im, $old, $new);
+        });
+} else {
+        die "Unsupported inbox version: $v\n";
+}
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
new file mode 100755
index 00000000..2742be79
--- /dev/null
+++ b/script/public-inbox-convert
@@ -0,0 +1,139 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::MIME;
+use PublicInbox::InboxWritable;
+use PublicInbox::Config;
+use PublicInbox::V2Writable;
+use PublicInbox::Import;
+use PublicInbox::Spawn qw(spawn);
+use Cwd 'abs_path';
+my $usage = "Usage: public-inbox-convert OLD NEW\n";
+my $jobs;
+my $index = 1;
+my %opts = (
+        '--jobs|j=i' => \$jobs,
+        '--index!' => \$index,
+);
+GetOptions(%opts) or die "bad command-line args\n$usage";
+GetOptions(%opts) or die "bad command-line args\n$usage";
+my $old_dir = shift or die $usage;
+my $new_dir = shift or die $usage;
+die "$new_dir exists\n" if -d $new_dir;
+die "$old_dir not a directory\n" unless -d $old_dir;
+my $config = PublicInbox::Config->new;
+$old_dir = abs_path($old_dir);
+my $old;
+$config->each_inbox(sub {
+        $old = $_[0] if abs_path($_[0]->{mainrepo}) eq $old_dir;
+});
+unless ($old) {
+        warn "W: $old_dir not configured in " .
+                PublicInbox::Config::default_file() . "\n";
+        $old = {
+                mainrepo => $old_dir,
+                name => 'ignored',
+                address => [ 'old@example.com' ],
+        };
+        $old = PublicInbox::Inbox->new($old);
+}
+$old = PublicInbox::InboxWritable->new($old);
+if (($old->{version} || 1) >= 2) {
+        die "Only conversion from v1 inboxes is supported\n";
+}
+my $new = { %$old };
+$new->{mainrepo} = abs_path($new_dir);
+$new->{version} = 2;
+$new = PublicInbox::InboxWritable->new($new);
+my $v2w;
+$old->umask_prepare;
+$old->with_umask(sub {
+        local $ENV{GIT_CONFIG} = "$old->{mainrepo}/config";
+        $v2w = PublicInbox::V2Writable->new($new, 1);
+        $v2w->init_inbox($jobs);
+        chomp(my $sr = $old->git->qx('config', 'core.sharedRepository'));
+        if ($sr ne '') {
+                PublicInbox::Import::run_die(['git', 'config',
+                        "--file=$new->{mainrepo}/all.git/config",
+                        'core.sharedRepository', $sr]);
+        }
+        if (my $alt = $new->{altid}) {
+                require PublicInbox::AltId;
+                foreach my $i (0..$#$alt) {
+                        my $src = PublicInbox::AltId->new($old, $alt->[$i], 0);
+                        $src->mm_alt or next;
+                        my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1);
+                        $dst = $dst->{filename};
+                        $src->mm_alt->{dbh}->sqlite_backup_to_file($dst);
+                }
+        }
+});
+my $state = '';
+my ($prev, $from);
+my $head = $old->{ref_head} || 'HEAD';
+my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);
+$v2w->idx_init;
+my $im = $v2w->importer;
+my ($r, $w) = $im->gfi_start;
+my $h = '[0-9a-f]';
+my %D;
+my $last;
+while (<$rd>) {
+        if ($_ eq "blob\n") {
+                $state = 'blob';
+        } elsif (/^commit /) {
+                $state = 'commit';
+        } elsif (/^data (\d+)/) {
+                my $len = $1;
+                $w->print($_) or $im->wfail;
+                while ($len) {
+                        my $n = read($rd, my $tmp, $len) or die "read: $!";
+                        warn "$n != $len\n" if $n != $len;
+                        $len -= $n;
+                        $w->print($tmp) or $im->wfail;
+                }
+                next;
+        } elsif ($state eq 'commit') {
+                if (m{^M 100644 :(\d+) (${h}{2}/${h}{38})}o) {
+                        my ($mark, $path) = ($1, $2);
+                        $D{$path} = $mark;
+                        if ($last && $last ne 'm') {
+                                $w->print("D $last\n") or $im->wfail;
+                        }
+                        $w->print("M 100644 :$mark m\n") or $im->wfail;
+                        $last = 'm';
+                        next;
+                }
+                if (m{^D (${h}{2}/${h}{38})}o) {
+                        my $mark = delete $D{$1};
+                        defined $mark or die "undeleted path: $1\n";
+                        if ($last && $last ne 'd') {
+                                $w->print("D $last\n") or $im->wfail;
+                        }
+                        $w->print("M 100644 :$mark d\n") or $im->wfail;
+                        $last = 'd';
+                        next;
+                }
+                if (m{^from (:\d+)}) {
+                        $prev = $from;
+                        $from = $1;
+                        # no next
+                }
+        }
+        last if $_ eq "done\n";
+        $w->print($_) or $im->wfail;
+}
+$w = $r = undef;
+close $rd or die "close fast-export: $!\n";
+waitpid($pid, 0) or die "waitpid failed: $!\n";
+$? == 0 or die "fast-export failed: $?\n";
+my $mm = $old->mm;
+$mm->{dbh}->sqlite_backup_to_file("$new_dir/msgmap.sqlite3") if $mm;
+$v2w->done;
+if ($index) {
+        $v2w->index_sync;
+        $v2w->done;
+}
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 594a3d9c..db7ebbab 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -4,13 +4,13 @@
 # Basic tool to create a Xapian search index for a git repository
 # configured for public-inbox.
 # Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
-# highly recommended: eatmydata public-inbox-index GIT_DIR
+# highly recommended: eatmydata public-inbox-index REPO_DIR
 
 use strict;
 use warnings;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 use Cwd 'abs_path';
-my $usage = "public-inbox-index GIT_DIR";
+my $usage = "public-inbox-index REPO_DIR";
 use PublicInbox::Config;
 my $config = eval { PublicInbox::Config->new } || eval {
         warn "public-inbox unconfigured for serving, indexing anyways...\n";
@@ -23,13 +23,25 @@ if ($@) {
 }
 
 my $reindex;
-my %opts = ( '--reindex' => \$reindex );
+my $prune;
+my $jobs = undef;
+my %opts = (
+        '--reindex' => \$reindex,
+        '--jobs|j=i' => \$jobs,
+        '--prune' => \$prune,
+);
 GetOptions(%opts) or die "bad command-line args\n$usage";
+die "--jobs must be positive\n" if defined $jobs && $jobs < 0;
 
 my @dirs;
 
-sub resolve_git_dir {
+sub resolve_repo_dir {
         my ($cd) = @_;
+        my $prefix = defined $cd ? $cd : './';
+        if (-d $prefix && -f "$prefix/inbox.lock") { # v2
+                return abs_path($prefix);
+        }
+
         my @cmd = qw(git rev-parse --git-dir);
         my $cmd = join(' ', @cmd);
         my $pid = open my $fh, '-|';
@@ -53,9 +65,9 @@ sub resolve_git_dir {
 }
 
 if (@ARGV) {
-        @dirs = map { resolve_git_dir($_) } @ARGV;
+        @dirs = map { resolve_repo_dir($_) } @ARGV;
 } else {
-        @dirs = (resolve_git_dir());
+        @dirs = (resolve_repo_dir());
 }
 
 sub usage { print STDERR "Usage: $usage\n"; exit 1 }
@@ -73,14 +85,39 @@ foreach my $k (keys %$config) {
 }
 
 foreach my $dir (@dirs) {
+        if (!ref($dir) && -f "$dir/inbox.lock") { # v2
+                my $ibx = { mainrepo => $dir, name => 'unnamed' };
+                $dir = PublicInbox::Inbox->new($ibx);
+        }
         index_dir($dir);
 }
 
 sub index_dir {
-        my ($git_dir) = @_;
-        if (!ref $git_dir && ! -d $git_dir) {
-                die "$git_dir does not appear to be a git repository\n";
+        my ($repo) = @_;
+        if (!ref $repo && ! -d $repo) {
+                die "$repo does not appear to be an inbox repository\n";
+        }
+        if (ref($repo) && ($repo->{version} || 1) == 2) {
+                eval { require PublicInbox::V2Writable };
+                die "v2 requirements not met: $@\n" if $@;
+                my $v2w = eval {
+                        local $ENV{NPROC} = $jobs if $jobs;
+                        PublicInbox::V2Writable->new($repo);
+                };
+                if (defined $jobs) {
+                        if ($jobs == 0) {
+                                $v2w->{parallel} = 0;
+                        } else {
+                                my $n = $v2w->{partitions};
+                                if ($jobs != $n) {
+                                        warn
+"Unable to respect --jobs=$jobs, inbox was created with $n partitions\n";
+                                }
+                        }
+                }
+                $v2w->index_sync({ reindex => $reindex, prune => $prune });
+        } else {
+                my $s = PublicInbox::SearchIdx->new($repo, 1);
+                $s->index_sync({ reindex => $reindex });
         }
-        my $s = PublicInbox::SearchIdx->new($git_dir, 1);
-        $s->index_sync({ reindex => $reindex });
 }
diff --git a/script/public-inbox-init b/script/public-inbox-init
index 2f33c9ef..3ef6c3bd 100755
--- a/script/public-inbox-init
+++ b/script/public-inbox-init
@@ -5,7 +5,8 @@
 # Initializes a public-inbox, basically a wrapper for git-init(1)
 use strict;
 use warnings;
-my $usage = "public-inbox-init NAME GIT_DIR HTTP_URL ADDRESS [ADDRESS..]";
+my $usage = "public-inbox-init NAME REPO_DIR HTTP_URL ADDRESS [ADDRESS..]";
+use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
 use PublicInbox::Config;
 use File::Temp qw/tempfile/;
 use File::Basename qw/dirname/;
@@ -14,9 +15,11 @@ use Cwd qw/abs_path/;
 
 sub x { system(@_) and die join(' ', @_). " failed: $?\n" }
 sub usage { print STDERR "Usage: $usage\n"; exit 1 }
-
+my $version = undef;
+my %opts = ( 'V|version=i' => \$version );
+GetOptions(%opts) or usage();
 my $name = shift @ARGV or usage();
-my $git_dir = shift @ARGV or usage();
+my $mainrepo = shift @ARGV or usage();
 my $http_url = shift @ARGV or usage();
 my (@address) = @ARGV;
 @address or usage();
@@ -25,7 +28,7 @@ my %seen;
 my $pi_config = PublicInbox::Config->default_file;
 my $dir = dirname($pi_config);
 mkpath($dir); # will croak on fatal errors
-my ($fh, $filename) = tempfile('pi-init-XXXXXXXX', DIR => $dir);
+my ($fh, $pi_config_tmp) = tempfile('pi-init-XXXXXXXX', DIR => $dir);
 if (-e $pi_config) {
         open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n";
         my @st = stat($oh);
@@ -62,22 +65,55 @@ if (-e $pi_config) {
 
         exit(1) if $conflict;
 }
-close $fh or die "failed to close $filename: $!\n";
+close $fh or die "failed to close $pi_config_tmp: $!\n";
 
 my $pfx = "publicinbox.$name";
-my @x = (qw/git config/, "--file=$filename");
-$git_dir = abs_path($git_dir);
-x(qw(git init -q --bare), $git_dir);
+my @x = (qw/git config/, "--file=$pi_config_tmp");
+
+$mainrepo = abs_path($mainrepo);
+if (-f "$mainrepo/inbox.lock") {
+        if (!defined $version) {
+                $version = 2;
+        } elsif ($version != 2) {
+                die "$mainrepo is a -V2 repo, -V$version specified\n"
+        }
+} elsif (-d "$mainrepo/objects") {
+        if (!defined $version) {
+                $version = 1;
+        } elsif ($version != 1) {
+                die "$mainrepo is a -V1 repo, -V$version specified\n"
+        }
+}
+
+$version = 1 unless defined $version;
 
-# set a reasonable default:
-x(qw/git config/, "--file=$git_dir/config", 'repack.writeBitmaps', 'true');
+if ($version >= 2) {
+        require PublicInbox::V2Writable;
+        require PublicInbox::Inbox;
+        my $ibx = {
+                mainrepo => $mainrepo,
+                name => $name,
+                version => $version,
+                -primary_address => $address[0],
+        };
+        $ibx = PublicInbox::Inbox->new($ibx);
+        PublicInbox::V2Writable->new($ibx, 1)->init_inbox(0);
+} elsif ($version == 1) {
+        x(qw(git init -q --bare), $mainrepo);
+
+        # set a reasonable default:
+        x(qw/git config/, "--file=$mainrepo/config",
+                'repack.writeBitmaps', 'true');
+} else {
+        die "Unsupported -V/--version: $version\n";
+}
 
 foreach my $addr (@address) {
         next if $seen{lc($addr)};
         x(@x, "--add", "$pfx.address", $addr);
 }
 x(@x, "$pfx.url", $http_url);
-x(@x, "$pfx.mainrepo", $git_dir);
+x(@x, "$pfx.mainrepo", $mainrepo);
 
-rename $filename, $pi_config or
-        die "failed to rename `$filename' to `$pi_config': $!\n";
+rename $pi_config_tmp, $pi_config or
+        die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n";
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index bdc72e08..c51f9585 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -11,8 +11,6 @@ use PublicInbox::Config;
 use PublicInbox::Git;
 use PublicInbox::Import;
 use PublicInbox::MIME;
-use Email::MIME::ContentType;
-$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Address;
 use PublicInbox::Spamcheck::Spamc;
 my $train = shift or die "usage: $usage\n";
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 8cf44195..766d58a6 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -15,9 +15,7 @@ sub do_exit {
 }
 
 use Email::Simple;
-use Email::MIME;
-use Email::MIME::ContentType;
-$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
+use PublicInbox::MIME;
 use PublicInbox::MDA;
 use PublicInbox::Config;
 use PublicInbox::Import;
@@ -80,8 +78,18 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
 } # else { accept
 
 PublicInbox::MDA->set_list_headers($mime, $dst);
-my $git = PublicInbox::Git->new($main_repo);
-my $im = PublicInbox::Import->new($git, $dst->{name}, $recipient);
+my $v = $dst->{version} || 1;
+my $im;
+if ($v == 2) {
+        require PublicInbox::V2Writable;
+        $im = PublicInbox::V2Writable->new($dst);
+        $im->{parallel} = 0; # pointless to be parallel for a single message
+} elsif ($v == 1) {
+        my $git = $dst->git;
+        $im = PublicInbox::Import->new($git, $dst->{name}, $recipient, $dst);
+} else {
+        die "Unsupported inbox version: $v\n";
+}
 if (defined $im->add($mime)) {
         $emm = $emm->abort;
 } else {