diff options
Diffstat (limited to 'script')
-rwxr-xr-x | script/public-inbox-compact | 97 | ||||
-rwxr-xr-x | script/public-inbox-convert | 139 | ||||
-rwxr-xr-x | script/public-inbox-index | 59 | ||||
-rwxr-xr-x | script/public-inbox-init | 62 | ||||
-rwxr-xr-x | script/public-inbox-learn | 2 | ||||
-rwxr-xr-x | script/public-inbox-mda | 18 |
6 files changed, 346 insertions, 31 deletions
diff --git a/script/public-inbox-compact b/script/public-inbox-compact new file mode 100755 index 00000000..5f18497e --- /dev/null +++ b/script/public-inbox-compact @@ -0,0 +1,97 @@ +#!/usr/bin/perl -w +# Copyright (C) 2018 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::Search; +use PublicInbox::Config; +use PublicInbox::InboxWritable; +use Cwd 'abs_path'; +use File::Temp qw(tempdir); +use File::Path qw(remove_tree); +use PublicInbox::Spawn qw(spawn); +my $usage = "Usage: public-inbox-compact REPO_DIR\n"; +my $dir = shift or die $usage; +my $config = PublicInbox::Config->new; +my $ibx; +$dir = abs_path($dir); +$config->each_inbox(sub { + $ibx = $_[0] if abs_path($_[0]->{mainrepo}) eq $dir +}); +unless ($ibx) { + warn "W: $dir not configured in ". + PublicInbox::Config::default_file() . "\n"; + $ibx = { + mainrepo => $dir, + name => 'ignored', + address => [ 'old@example.com' ], + }; + $ibx = PublicInbox::Inbox->new($ibx); +} +my $v = ($ibx->{version} || 1); +$ibx = PublicInbox::InboxWritable->new($ibx); +$ibx->umask_prepare; + +sub commit_changes ($$$) { + my ($im, $old, $new) = @_; + my @st = stat($old) or die "failed to stat($old): $!\n"; + + my $over = "$old/over.sqlite3"; + if (-f $over) { + require PublicInbox::Over; + $over = PublicInbox::Over->new($over); + $over->connect->sqlite_backup_to_file("$new/over.sqlite3"); + } + rename($old, "$new/old") or die "rename $old => $new/old: $!\n"; + chmod($st[2] & 07777, $new) or die "chmod $old: $!\n"; + rename($new, $old) or die "rename $new => $old: $!\n"; + $im->lock_release; + remove_tree("$old/old") or die "failed to remove $old/old: $!\n"; +} +my @compact = qw(xapian-compact --no-renumber); +if ($v == 2) { + require PublicInbox::V2Writable; + my $v2w = PublicInbox::V2Writable->new($ibx); + my $xap_v = 'xap'.PublicInbox::Search::SCHEMA_VERSION; + my $old = "$dir/$xap_v"; + opendir my $dh, $old or die "Failed to opendir $old: $!\n"; + my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $dir); + $ibx->with_umask(sub { + $v2w->lock_acquire; + my %pids; + while (defined(my $dn = readdir($dh))) { + if ($dn =~ /\A\d+\z/) { + my $cmd = [ @compact, "$old/$dn", "$new/$dn" ]; + $pids{spawn($cmd)} = join(' ', @$cmd); + } elsif ($dn eq '.' || $dn eq '..') { + } elsif ($dn =~ /\Aover\.sqlite3/) { + } else { + warn "W: skipping unknown Xapian DB: $old/$dn\n" + } + } + close $dh; + die "No Xapian parts found in $old\n" unless keys %pids; + while (scalar keys %pids) { + my $pid = waitpid(-1, 0); + my $desc = delete $pids{$pid}; + die "$desc failed: $?\n" if $?; + } + commit_changes($v2w, $old, $new); + }); +} elsif ($v == 1) { + require PublicInbox::Import; + my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); + my $xap_v = 'xapian'.PublicInbox::Search::SCHEMA_VERSION; + my $v1_root = "$dir/public-inbox"; + my $old = "$v1_root/$xap_v"; + -d $old or die "$old does not exist\n"; + my $new = tempdir('compact-XXXXXXXX', CLEANUP => 1, DIR => $v1_root); + $ibx->with_umask(sub { + $im->lock_acquire; + PublicInbox::Import::run_die([@compact, $old, $new]); + commit_changes($im, $old, $new); + }); +} else { + die "Unsupported inbox version: $v\n"; +} diff --git a/script/public-inbox-convert b/script/public-inbox-convert new file mode 100755 index 00000000..2742be79 --- /dev/null +++ b/script/public-inbox-convert @@ -0,0 +1,139 @@ +#!/usr/bin/perl -w +# Copyright (C) 2018 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use PublicInbox::MIME; +use PublicInbox::InboxWritable; +use PublicInbox::Config; +use PublicInbox::V2Writable; +use PublicInbox::Import; +use PublicInbox::Spawn qw(spawn); +use Cwd 'abs_path'; +my $usage = "Usage: public-inbox-convert OLD NEW\n"; +my $jobs; +my $index = 1; +my %opts = ( + '--jobs|j=i' => \$jobs, + '--index!' => \$index, +); +GetOptions(%opts) or die "bad command-line args\n$usage"; +GetOptions(%opts) or die "bad command-line args\n$usage"; +my $old_dir = shift or die $usage; +my $new_dir = shift or die $usage; +die "$new_dir exists\n" if -d $new_dir; +die "$old_dir not a directory\n" unless -d $old_dir; +my $config = PublicInbox::Config->new; +$old_dir = abs_path($old_dir); +my $old; +$config->each_inbox(sub { + $old = $_[0] if abs_path($_[0]->{mainrepo}) eq $old_dir; +}); +unless ($old) { + warn "W: $old_dir not configured in " . + PublicInbox::Config::default_file() . "\n"; + $old = { + mainrepo => $old_dir, + name => 'ignored', + address => [ 'old@example.com' ], + }; + $old = PublicInbox::Inbox->new($old); +} +$old = PublicInbox::InboxWritable->new($old); +if (($old->{version} || 1) >= 2) { + die "Only conversion from v1 inboxes is supported\n"; +} +my $new = { %$old }; +$new->{mainrepo} = abs_path($new_dir); +$new->{version} = 2; +$new = PublicInbox::InboxWritable->new($new); +my $v2w; +$old->umask_prepare; +$old->with_umask(sub { + local $ENV{GIT_CONFIG} = "$old->{mainrepo}/config"; + $v2w = PublicInbox::V2Writable->new($new, 1); + $v2w->init_inbox($jobs); + chomp(my $sr = $old->git->qx('config', 'core.sharedRepository')); + if ($sr ne '') { + PublicInbox::Import::run_die(['git', 'config', + "--file=$new->{mainrepo}/all.git/config", + 'core.sharedRepository', $sr]); + } + if (my $alt = $new->{altid}) { + require PublicInbox::AltId; + foreach my $i (0..$#$alt) { + my $src = PublicInbox::AltId->new($old, $alt->[$i], 0); + $src->mm_alt or next; + my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1); + $dst = $dst->{filename}; + $src->mm_alt->{dbh}->sqlite_backup_to_file($dst); + } + } +}); +my $state = ''; +my ($prev, $from); +my $head = $old->{ref_head} || 'HEAD'; +my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); +$v2w->idx_init; +my $im = $v2w->importer; +my ($r, $w) = $im->gfi_start; +my $h = '[0-9a-f]'; +my %D; +my $last; +while (<$rd>) { + if ($_ eq "blob\n") { + $state = 'blob'; + } elsif (/^commit /) { + $state = 'commit'; + } elsif (/^data (\d+)/) { + my $len = $1; + $w->print($_) or $im->wfail; + while ($len) { + my $n = read($rd, my $tmp, $len) or die "read: $!"; + warn "$n != $len\n" if $n != $len; + $len -= $n; + $w->print($tmp) or $im->wfail; + } + next; + } elsif ($state eq 'commit') { + if (m{^M 100644 :(\d+) (${h}{2}/${h}{38})}o) { + my ($mark, $path) = ($1, $2); + $D{$path} = $mark; + if ($last && $last ne 'm') { + $w->print("D $last\n") or $im->wfail; + } + $w->print("M 100644 :$mark m\n") or $im->wfail; + $last = 'm'; + next; + } + if (m{^D (${h}{2}/${h}{38})}o) { + my $mark = delete $D{$1}; + defined $mark or die "undeleted path: $1\n"; + if ($last && $last ne 'd') { + $w->print("D $last\n") or $im->wfail; + } + $w->print("M 100644 :$mark d\n") or $im->wfail; + $last = 'd'; + next; + } + if (m{^from (:\d+)}) { + $prev = $from; + $from = $1; + # no next + } + } + last if $_ eq "done\n"; + $w->print($_) or $im->wfail; +} +$w = $r = undef; +close $rd or die "close fast-export: $!\n"; +waitpid($pid, 0) or die "waitpid failed: $!\n"; +$? == 0 or die "fast-export failed: $?\n"; +my $mm = $old->mm; +$mm->{dbh}->sqlite_backup_to_file("$new_dir/msgmap.sqlite3") if $mm; +$v2w->done; +if ($index) { + $v2w->index_sync; + $v2w->done; +} diff --git a/script/public-inbox-index b/script/public-inbox-index index 594a3d9c..db7ebbab 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -4,13 +4,13 @@ # Basic tool to create a Xapian search index for a git repository # configured for public-inbox. # Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/> -# highly recommended: eatmydata public-inbox-index GIT_DIR +# highly recommended: eatmydata public-inbox-index REPO_DIR use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use Cwd 'abs_path'; -my $usage = "public-inbox-index GIT_DIR"; +my $usage = "public-inbox-index REPO_DIR"; use PublicInbox::Config; my $config = eval { PublicInbox::Config->new } || eval { warn "public-inbox unconfigured for serving, indexing anyways...\n"; @@ -23,13 +23,25 @@ if ($@) { } my $reindex; -my %opts = ( '--reindex' => \$reindex ); +my $prune; +my $jobs = undef; +my %opts = ( + '--reindex' => \$reindex, + '--jobs|j=i' => \$jobs, + '--prune' => \$prune, +); GetOptions(%opts) or die "bad command-line args\n$usage"; +die "--jobs must be positive\n" if defined $jobs && $jobs < 0; my @dirs; -sub resolve_git_dir { +sub resolve_repo_dir { my ($cd) = @_; + my $prefix = defined $cd ? $cd : './'; + if (-d $prefix && -f "$prefix/inbox.lock") { # v2 + return abs_path($prefix); + } + my @cmd = qw(git rev-parse --git-dir); my $cmd = join(' ', @cmd); my $pid = open my $fh, '-|'; @@ -53,9 +65,9 @@ sub resolve_git_dir { } if (@ARGV) { - @dirs = map { resolve_git_dir($_) } @ARGV; + @dirs = map { resolve_repo_dir($_) } @ARGV; } else { - @dirs = (resolve_git_dir()); + @dirs = (resolve_repo_dir()); } sub usage { print STDERR "Usage: $usage\n"; exit 1 } @@ -73,14 +85,39 @@ foreach my $k (keys %$config) { } foreach my $dir (@dirs) { + if (!ref($dir) && -f "$dir/inbox.lock") { # v2 + my $ibx = { mainrepo => $dir, name => 'unnamed' }; + $dir = PublicInbox::Inbox->new($ibx); + } index_dir($dir); } sub index_dir { - my ($git_dir) = @_; - if (!ref $git_dir && ! -d $git_dir) { - die "$git_dir does not appear to be a git repository\n"; + my ($repo) = @_; + if (!ref $repo && ! -d $repo) { + die "$repo does not appear to be an inbox repository\n"; + } + if (ref($repo) && ($repo->{version} || 1) == 2) { + eval { require PublicInbox::V2Writable }; + die "v2 requirements not met: $@\n" if $@; + my $v2w = eval { + local $ENV{NPROC} = $jobs if $jobs; + PublicInbox::V2Writable->new($repo); + }; + if (defined $jobs) { + if ($jobs == 0) { + $v2w->{parallel} = 0; + } else { + my $n = $v2w->{partitions}; + if ($jobs != $n) { + warn +"Unable to respect --jobs=$jobs, inbox was created with $n partitions\n"; + } + } + } + $v2w->index_sync({ reindex => $reindex, prune => $prune }); + } else { + my $s = PublicInbox::SearchIdx->new($repo, 1); + $s->index_sync({ reindex => $reindex }); } - my $s = PublicInbox::SearchIdx->new($git_dir, 1); - $s->index_sync({ reindex => $reindex }); } diff --git a/script/public-inbox-init b/script/public-inbox-init index 2f33c9ef..3ef6c3bd 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -5,7 +5,8 @@ # Initializes a public-inbox, basically a wrapper for git-init(1) use strict; use warnings; -my $usage = "public-inbox-init NAME GIT_DIR HTTP_URL ADDRESS [ADDRESS..]"; +my $usage = "public-inbox-init NAME REPO_DIR HTTP_URL ADDRESS [ADDRESS..]"; +use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use PublicInbox::Config; use File::Temp qw/tempfile/; use File::Basename qw/dirname/; @@ -14,9 +15,11 @@ use Cwd qw/abs_path/; sub x { system(@_) and die join(' ', @_). " failed: $?\n" } sub usage { print STDERR "Usage: $usage\n"; exit 1 } - +my $version = undef; +my %opts = ( 'V|version=i' => \$version ); +GetOptions(%opts) or usage(); my $name = shift @ARGV or usage(); -my $git_dir = shift @ARGV or usage(); +my $mainrepo = shift @ARGV or usage(); my $http_url = shift @ARGV or usage(); my (@address) = @ARGV; @address or usage(); @@ -25,7 +28,7 @@ my %seen; my $pi_config = PublicInbox::Config->default_file; my $dir = dirname($pi_config); mkpath($dir); # will croak on fatal errors -my ($fh, $filename) = tempfile('pi-init-XXXXXXXX', DIR => $dir); +my ($fh, $pi_config_tmp) = tempfile('pi-init-XXXXXXXX', DIR => $dir); if (-e $pi_config) { open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; my @st = stat($oh); @@ -62,22 +65,55 @@ if (-e $pi_config) { exit(1) if $conflict; } -close $fh or die "failed to close $filename: $!\n"; +close $fh or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; -my @x = (qw/git config/, "--file=$filename"); -$git_dir = abs_path($git_dir); -x(qw(git init -q --bare), $git_dir); +my @x = (qw/git config/, "--file=$pi_config_tmp"); + +$mainrepo = abs_path($mainrepo); +if (-f "$mainrepo/inbox.lock") { + if (!defined $version) { + $version = 2; + } elsif ($version != 2) { + die "$mainrepo is a -V2 repo, -V$version specified\n" + } +} elsif (-d "$mainrepo/objects") { + if (!defined $version) { + $version = 1; + } elsif ($version != 1) { + die "$mainrepo is a -V1 repo, -V$version specified\n" + } +} + +$version = 1 unless defined $version; -# set a reasonable default: -x(qw/git config/, "--file=$git_dir/config", 'repack.writeBitmaps', 'true'); +if ($version >= 2) { + require PublicInbox::V2Writable; + require PublicInbox::Inbox; + my $ibx = { + mainrepo => $mainrepo, + name => $name, + version => $version, + -primary_address => $address[0], + }; + $ibx = PublicInbox::Inbox->new($ibx); + PublicInbox::V2Writable->new($ibx, 1)->init_inbox(0); +} elsif ($version == 1) { + x(qw(git init -q --bare), $mainrepo); + + # set a reasonable default: + x(qw/git config/, "--file=$mainrepo/config", + 'repack.writeBitmaps', 'true'); +} else { + die "Unsupported -V/--version: $version\n"; +} foreach my $addr (@address) { next if $seen{lc($addr)}; x(@x, "--add", "$pfx.address", $addr); } x(@x, "$pfx.url", $http_url); -x(@x, "$pfx.mainrepo", $git_dir); +x(@x, "$pfx.mainrepo", $mainrepo); -rename $filename, $pi_config or - die "failed to rename `$filename' to `$pi_config': $!\n"; +rename $pi_config_tmp, $pi_config or + die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n"; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index bdc72e08..c51f9585 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -11,8 +11,6 @@ use PublicInbox::Config; use PublicInbox::Git; use PublicInbox::Import; use PublicInbox::MIME; -use Email::MIME::ContentType; -$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect use PublicInbox::Address; use PublicInbox::Spamcheck::Spamc; my $train = shift or die "usage: $usage\n"; diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 8cf44195..766d58a6 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -15,9 +15,7 @@ sub do_exit { } use Email::Simple; -use Email::MIME; -use Email::MIME::ContentType; -$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect +use PublicInbox::MIME; use PublicInbox::MDA; use PublicInbox::Config; use PublicInbox::Import; @@ -80,8 +78,18 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message } # else { accept PublicInbox::MDA->set_list_headers($mime, $dst); -my $git = PublicInbox::Git->new($main_repo); -my $im = PublicInbox::Import->new($git, $dst->{name}, $recipient); +my $v = $dst->{version} || 1; +my $im; +if ($v == 2) { + require PublicInbox::V2Writable; + $im = PublicInbox::V2Writable->new($dst); + $im->{parallel} = 0; # pointless to be parallel for a single message +} elsif ($v == 1) { + my $git = $dst->git; + $im = PublicInbox::Import->new($git, $dst->{name}, $recipient, $dst); +} else { + die "Unsupported inbox version: $v\n"; +} if (defined $im->add($mime)) { $emm = $emm->abort; } else { |