diff options
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | lib/PublicInbox/AltId.pm | 38 | ||||
-rw-r--r-- | lib/PublicInbox/Config.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Msgmap.pm | 20 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 16 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 24 | ||||
-rwxr-xr-x | script/public-inbox-index | 21 | ||||
-rwxr-xr-x | scripts/xhdr-num2mid | 27 | ||||
-rw-r--r-- | t/altid.t | 61 |
10 files changed, 206 insertions, 11 deletions
@@ -35,6 +35,7 @@ examples/unsubscribe.milter examples/unsubscribe.psgi examples/varnish-4.vcl lib/PublicInbox/Address.pm +lib/PublicInbox/AltId.pm lib/PublicInbox/Config.pm lib/PublicInbox/Daemon.pm lib/PublicInbox/Emergency.pm @@ -104,6 +105,7 @@ scripts/slrnspool2maildir scripts/ssoma-replay scripts/xhdr-num2mid t/address.t +t/altid.t t/cgi.t t/check-www-inbox.perl t/common.perl diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm new file mode 100644 index 00000000..6fdc3a2d --- /dev/null +++ b/lib/PublicInbox/AltId.pm @@ -0,0 +1,38 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +package PublicInbox::AltId; +use strict; +use warnings; +use URI::Escape qw(uri_unescape); + +# spec: TYPE:PREFIX:param1=value1¶m2=value2&... +# Example: serial:gmane:file=/path/to/altmsgmap.sqlite3 +sub new { + my ($class, $inbox, $spec) = @_; + my ($type, $prefix, $query) = split(/:/, $spec, 3); + $type eq 'serial' or die "non-serial not supported, yet\n"; + + require PublicInbox::Msgmap; + + my %params = map { + my ($k, $v) = split(/=/, uri_unescape($_), 2); + $v = '' unless defined $v; + ($k, $v); + } split(/[&;]/, $query); + my $f = $params{file} or die "file: required for $type spec $spec\n"; + unless (index($f, '/') == 0) { + $f = "$inbox->{mainrepo}/public-inbox/$f"; + } + bless { + mm_alt => PublicInbox::Msgmap->new_file($f), + xprefix => 'X'.uc($prefix), + }, $class; +} + +sub mid2alt { + my ($self, $mid) = @_; + $self->{mm_alt}->num_for($mid); +} + +1; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 1256fb1e..cd885488 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -145,6 +145,12 @@ sub _fill { my $v = $self->{"$pfx.$k"}; $rv->{$k} = $v if defined $v; } + foreach my $k (qw(altid)) { # TODO: more arrays + if (defined(my $v = $self->{"$pfx.$k"})) { + $rv->{$k} = [ $v ]; + } + } + return unless $rv->{mainrepo}; my $name = $pfx; $name =~ s/\Apublicinbox\.//; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index e552cd4f..922ca9bb 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -87,7 +87,7 @@ sub search { my ($self) = @_; $self->{search} ||= eval { _weaken_later($self); - PublicInbox::Search->new($self->{mainrepo}); + PublicInbox::Search->new($self->{mainrepo}, $self->{altid}); }; } diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index 2583ff47..3fb3805f 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -20,7 +20,12 @@ sub new { my $err = $!; -d $d or die "$d not created: $err"; } - my $f = "$d/msgmap.sqlite3"; + new_file($class, "$d/msgmap.sqlite3", $writable); +} + +sub new_file { + my ($class, $f, $writable) = @_; + my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', { AutoCommit => 1, RaiseError => 1, @@ -40,6 +45,7 @@ sub new { $self; } +# n.b. invoked directly by scripts/xhdr-num2mid sub meta_accessor { my ($self, $key, $value) = @_; use constant { @@ -154,6 +160,7 @@ sub create_tables { 'val VARCHAR(255) NOT NULL)'); } +# used by NNTP.pm sub id_batch { my ($self, $num, $cb) = @_; my $dbh = $self->{dbh}; @@ -167,4 +174,15 @@ sub id_batch { $nr; } +# only used for mapping external serial numbers (e.g. articles from gmane) +# see scripts/xhdr-num2mid for usage +sub mid_set { + my ($self, $num, $mid) = @_; + my $sth = $self->{mid_set} ||= do { + my $sql = 'INSERT INTO msgmap (num, mid) VALUES (?,?)'; + $self->{dbh}->prepare($sql); + }; + $sth->execute($num, $mid); +} + 1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 3a908ac6..018fcb55 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -79,10 +79,10 @@ sub xdir { } sub new { - my ($class, $git_dir) = @_; + my ($class, $git_dir, $altid) = @_; my $dir = $class->xdir($git_dir); my $db = Search::Xapian::Database->new($dir); - bless { xdb => $db, git_dir => $git_dir }, $class; + bless { xdb => $db, git_dir => $git_dir, altid => $altid }, $class; } sub reopen { $_[0]->{xdb}->reopen } @@ -186,6 +186,18 @@ sub qp { $qp->add_boolean_prefix($name, $prefix); } + # we do not actually create AltId objects, + # just parse the spec to avoid the extra DB handles for now. + if (my $altid = $self->{altid}) { + for (@$altid) { + # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3' + /\Aserial:(\w+):/ or next; + my $pfx = $1; + # gmane => XGMANE + $qp->add_boolean_prefix($pfx, 'X'.uc($pfx)); + } + } + while (my ($name, $prefix) = each %prob_prefix) { $qp->add_prefix($name, $prefix); } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index a18a2148..0eb07a1c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -30,9 +30,21 @@ use constant { }; sub new { - my ($class, $git_dir, $creat) = @_; + my ($class, $inbox, $creat) = @_; + my $git_dir = $inbox; + my $altid; + if (ref $inbox) { + $git_dir = $inbox->{mainrepo}; + $altid = $inbox->{altid}; + if ($altid) { + require PublicInbox::AltId; + $altid = [ map { + PublicInbox::AltId->new($inbox, $_); + } @$altid ]; + } + } require Search::Xapian::WritableDatabase; - my $self = bless { git_dir => $git_dir }, $class; + my $self = bless { git_dir => $git_dir, -altid => $altid }, $class; my $perm = $self->_git_config_perm; my $umask = _umask_for($perm); $self->{umask} = $umask; @@ -171,6 +183,14 @@ sub add_message { link_message($self, $smsg, $old_tid); $tg->index_text($mid, 1); $doc->set_data($smsg->to_doc_data($blob)); + + if (my $altid = $self->{-altid}) { + foreach my $alt (@$altid) { + my $id = $alt->mid2alt($mid); + next unless defined $id; + $doc->add_term($alt->{xprefix} . $id); + } + } if (defined $doc_id) { $db->replace_document($doc_id, $doc); } else { diff --git a/script/public-inbox-index b/script/public-inbox-index index 61f21d70..1431b99e 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -9,8 +9,10 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use Cwd 'abs_path'; my $usage = "public-inbox-index GIT_DIR"; use PublicInbox::Config; +my $config = PublicInbox::Config->new; eval { require PublicInbox::SearchIdx }; if ($@) { print STDERR "Search::Xapian required for $0\n"; @@ -42,8 +44,8 @@ sub resolve_git_dir { }; close $fh or die "error in $cmd: $!\n"; chomp $dir; - return $cd if ($dir eq '.' && defined $cd); - $dir; + return abs_path($cd) if ($dir eq '.' && defined $cd); + abs_path($dir); } } @@ -56,13 +58,26 @@ if (@ARGV) { sub usage { print STDERR "Usage: $usage\n"; exit 1 } usage() unless @dirs; +foreach my $k (keys %$config) { + $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next; + my $name = $1; + my $v = $config->{$k}; + for my $i (0..$#dirs) { + next if $dirs[$i] ne $v; + my $ibx = $config->lookup_name($name); + $dirs[$i] = $ibx if $ibx; + } +} + foreach my $dir (@dirs) { index_dir($dir); } sub index_dir { my ($git_dir) = @_; - -d $git_dir or die "$git_dir does not appear to be a git repository\n"; + if (!ref $git_dir && ! -d $git_dir) { + die "$git_dir does not appear to be a git repository\n"; + } my $s = PublicInbox::SearchIdx->new($git_dir, 1); $s->index_sync({ reindex => $reindex }); } diff --git a/scripts/xhdr-num2mid b/scripts/xhdr-num2mid index f1e7ea34..bc3ede60 100755 --- a/scripts/xhdr-num2mid +++ b/scripts/xhdr-num2mid @@ -5,8 +5,18 @@ use strict; use warnings; use Net::NNTP; -use Data::Dumper; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $usage = "usage: NNTPSERVER=news.example.org $0 GROUP [FIRST_NUM]\n"; +my ($msgmap, $mm); +my %opts = ( '--msgmap=s' => \$msgmap ); +GetOptions(%opts) or die "bad command-line args\n$usage"; + +if ($msgmap) { + require PublicInbox::Msgmap; + require PublicInbox::MID; # mid_clean + $mm = PublicInbox::Msgmap->new_file($msgmap, 1); +} + my $group = shift or die $usage; my $nntp = Net::NNTP->new($ENV{NNTPSERVER} || '127.0.0.1'); my ($num, $first, $last) = $nntp->group($group); @@ -15,16 +25,29 @@ my $arg_first = shift; if (defined $arg_first) { $arg_first =~ /\A\d+\z/ or die $usage; $first = $arg_first; +} elsif ($mm) { + my $last_article = $mm->meta_accessor('last_article'); + $first = $last_article + 1 if defined $last_article; } my $batch = 1000; my $i; for ($i = $first; $i < $last; $i += $batch) { - my $j = $i + $batch; + my $j = $i + $batch - 1; $j = $last if $j > $last; my $num2mid = $nntp->xhdr('Message-ID', "$i-$j"); + + $mm->{dbh}->begin_work if $mm; for my $n ($i..$j) { defined(my $mid = $num2mid->{$n}) or next; print "$n $mid\n"; + if ($mm) { + $mid = PublicInbox::MID::mid_clean($mid); + $mm->mid_set($n, $mid); + } + } + if ($mm) { + $mm->meta_accessor('last_article', $j); + $mm->{dbh}->commit; } } diff --git a/t/altid.t b/t/altid.t new file mode 100644 index 00000000..887d548f --- /dev/null +++ b/t/altid.t @@ -0,0 +1,61 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +foreach my $mod (qw(DBD::SQLite Search::Xapian)) { + eval "require $mod"; + plan skip_all => "$mod missing for altid.t" if $@; +} + +use_ok 'PublicInbox::Msgmap'; +use_ok 'PublicInbox::SearchIdx'; +use_ok 'PublicInbox::Import'; +use_ok 'PublicInbox::Inbox'; +my $tmpdir = tempdir('pi-altid-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $git_dir = "$tmpdir/a.git"; +my $alt_file = "$tmpdir/another-nntp.sqlite3"; +my $altid = [ "serial:gmane:file=$alt_file" ]; + +{ + my $mm = PublicInbox::Msgmap->new_file($alt_file, 1); + $mm->mid_set(1234, 'a@example.com'); +} + +{ + is(system(qw(git init -q --bare), $git_dir), 0, 'git init ok'); + my $git = PublicInbox::Git->new($git_dir); + my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); + $im->add(Email::MIME->create( + header => [ + From => 'a@example.com', + To => 'b@example.com', + 'Content-Type' => 'text/plain', + Subject => 'boo!', + 'Message-ID' => '<a@example.com>', + ], + body => "hello world gmane:666\n", + )); + $im->done; +} +{ + my $inbox = PublicInbox::Inbox->new({mainrepo=>$git_dir}); + $inbox->{altid} = $altid; + my $rw = PublicInbox::SearchIdx->new($inbox, 1); + $rw->index_sync; +} + +{ + my $ro = PublicInbox::Search->new($git_dir, $altid); + my $res = $ro->query("gmane:1234"); + is($res->{total}, 1, 'got one match'); + is($res->{msgs}->[0]->mid, 'a@example.com'); + + $res = $ro->query("gmane:666"); + is($res->{total}, 0, 'body did NOT match'); +}; + +done_testing(); + +1; |