about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--MANIFEST2
-rw-r--r--lib/PublicInbox/AltId.pm38
-rw-r--r--lib/PublicInbox/Config.pm6
-rw-r--r--lib/PublicInbox/Inbox.pm2
-rw-r--r--lib/PublicInbox/Msgmap.pm20
-rw-r--r--lib/PublicInbox/Search.pm16
-rw-r--r--lib/PublicInbox/SearchIdx.pm24
-rwxr-xr-xscript/public-inbox-index21
-rwxr-xr-xscripts/xhdr-num2mid27
-rw-r--r--t/altid.t61
10 files changed, 206 insertions, 11 deletions
diff --git a/MANIFEST b/MANIFEST
index 308da062..f5ea455e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -35,6 +35,7 @@ examples/unsubscribe.milter
 examples/unsubscribe.psgi
 examples/varnish-4.vcl
 lib/PublicInbox/Address.pm
+lib/PublicInbox/AltId.pm
 lib/PublicInbox/Config.pm
 lib/PublicInbox/Daemon.pm
 lib/PublicInbox/Emergency.pm
@@ -104,6 +105,7 @@ scripts/slrnspool2maildir
 scripts/ssoma-replay
 scripts/xhdr-num2mid
 t/address.t
+t/altid.t
 t/cgi.t
 t/check-www-inbox.perl
 t/common.perl
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
new file mode 100644
index 00000000..6fdc3a2d
--- /dev/null
+++ b/lib/PublicInbox/AltId.pm
@@ -0,0 +1,38 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::AltId;
+use strict;
+use warnings;
+use URI::Escape qw(uri_unescape);
+
+# spec: TYPE:PREFIX:param1=value1&param2=value2&...
+# Example: serial:gmane:file=/path/to/altmsgmap.sqlite3
+sub new {
+        my ($class, $inbox, $spec) = @_;
+        my ($type, $prefix, $query) = split(/:/, $spec, 3);
+        $type eq 'serial' or die "non-serial not supported, yet\n";
+
+        require PublicInbox::Msgmap;
+
+        my %params = map {
+                my ($k, $v) = split(/=/, uri_unescape($_), 2);
+                $v = '' unless defined $v;
+                ($k, $v);
+        } split(/[&;]/, $query);
+        my $f = $params{file} or die "file: required for $type spec $spec\n";
+        unless (index($f, '/') == 0) {
+                $f = "$inbox->{mainrepo}/public-inbox/$f";
+        }
+        bless {
+                mm_alt => PublicInbox::Msgmap->new_file($f),
+                xprefix => 'X'.uc($prefix),
+        }, $class;
+}
+
+sub mid2alt {
+        my ($self, $mid) = @_;
+        $self->{mm_alt}->num_for($mid);
+}
+
+1;
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 1256fb1e..cd885488 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -145,6 +145,12 @@ sub _fill {
                 my $v = $self->{"$pfx.$k"};
                 $rv->{$k} = $v if defined $v;
         }
+        foreach my $k (qw(altid)) { # TODO: more arrays
+                if (defined(my $v = $self->{"$pfx.$k"})) {
+                        $rv->{$k} = [ $v ];
+                }
+        }
+
         return unless $rv->{mainrepo};
         my $name = $pfx;
         $name =~ s/\Apublicinbox\.//;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index e552cd4f..922ca9bb 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -87,7 +87,7 @@ sub search {
         my ($self) = @_;
         $self->{search} ||= eval {
                 _weaken_later($self);
-                PublicInbox::Search->new($self->{mainrepo});
+                PublicInbox::Search->new($self->{mainrepo}, $self->{altid});
         };
 }
 
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 2583ff47..3fb3805f 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -20,7 +20,12 @@ sub new {
                 my $err = $!;
                 -d $d or die "$d not created: $err";
         }
-        my $f = "$d/msgmap.sqlite3";
+        new_file($class, "$d/msgmap.sqlite3", $writable);
+}
+
+sub new_file {
+        my ($class, $f, $writable) = @_;
+
         my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
                 AutoCommit => 1,
                 RaiseError => 1,
@@ -40,6 +45,7 @@ sub new {
         $self;
 }
 
+# n.b. invoked directly by scripts/xhdr-num2mid
 sub meta_accessor {
         my ($self, $key, $value) = @_;
         use constant {
@@ -154,6 +160,7 @@ sub create_tables {
                         'val VARCHAR(255) NOT NULL)');
 }
 
+# used by NNTP.pm
 sub id_batch {
         my ($self, $num, $cb) = @_;
         my $dbh = $self->{dbh};
@@ -167,4 +174,15 @@ sub id_batch {
         $nr;
 }
 
+# only used for mapping external serial numbers (e.g. articles from gmane)
+# see scripts/xhdr-num2mid for usage
+sub mid_set {
+        my ($self, $num, $mid) = @_;
+        my $sth = $self->{mid_set} ||= do {
+                my $sql = 'INSERT INTO msgmap (num, mid) VALUES (?,?)';
+                $self->{dbh}->prepare($sql);
+        };
+        $sth->execute($num, $mid);
+}
+
 1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 3a908ac6..018fcb55 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -79,10 +79,10 @@ sub xdir {
 }
 
 sub new {
-        my ($class, $git_dir) = @_;
+        my ($class, $git_dir, $altid) = @_;
         my $dir = $class->xdir($git_dir);
         my $db = Search::Xapian::Database->new($dir);
-        bless { xdb => $db, git_dir => $git_dir }, $class;
+        bless { xdb => $db, git_dir => $git_dir, altid => $altid }, $class;
 }
 
 sub reopen { $_[0]->{xdb}->reopen }
@@ -186,6 +186,18 @@ sub qp {
                 $qp->add_boolean_prefix($name, $prefix);
         }
 
+        # we do not actually create AltId objects,
+        # just parse the spec to avoid the extra DB handles for now.
+        if (my $altid = $self->{altid}) {
+                for (@$altid) {
+                        # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3'
+                        /\Aserial:(\w+):/ or next;
+                        my $pfx = $1;
+                        # gmane => XGMANE
+                        $qp->add_boolean_prefix($pfx, 'X'.uc($pfx));
+                }
+        }
+
         while (my ($name, $prefix) = each %prob_prefix) {
                 $qp->add_prefix($name, $prefix);
         }
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index a18a2148..0eb07a1c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -30,9 +30,21 @@ use constant {
 };
 
 sub new {
-        my ($class, $git_dir, $creat) = @_;
+        my ($class, $inbox, $creat) = @_;
+        my $git_dir = $inbox;
+        my $altid;
+        if (ref $inbox) {
+                $git_dir = $inbox->{mainrepo};
+                $altid = $inbox->{altid};
+                if ($altid) {
+                        require PublicInbox::AltId;
+                        $altid = [ map {
+                                PublicInbox::AltId->new($inbox, $_);
+                        } @$altid ];
+                }
+        }
         require Search::Xapian::WritableDatabase;
-        my $self = bless { git_dir => $git_dir }, $class;
+        my $self = bless { git_dir => $git_dir, -altid => $altid }, $class;
         my $perm = $self->_git_config_perm;
         my $umask = _umask_for($perm);
         $self->{umask} = $umask;
@@ -171,6 +183,14 @@ sub add_message {
                 link_message($self, $smsg, $old_tid);
                 $tg->index_text($mid, 1);
                 $doc->set_data($smsg->to_doc_data($blob));
+
+                if (my $altid = $self->{-altid}) {
+                        foreach my $alt (@$altid) {
+                                my $id = $alt->mid2alt($mid);
+                                next unless defined $id;
+                                $doc->add_term($alt->{xprefix} . $id);
+                        }
+                }
                 if (defined $doc_id) {
                         $db->replace_document($doc_id, $doc);
                 } else {
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 61f21d70..1431b99e 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -9,8 +9,10 @@
 use strict;
 use warnings;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use Cwd 'abs_path';
 my $usage = "public-inbox-index GIT_DIR";
 use PublicInbox::Config;
+my $config = PublicInbox::Config->new;
 eval { require PublicInbox::SearchIdx };
 if ($@) {
         print STDERR "Search::Xapian required for $0\n";
@@ -42,8 +44,8 @@ sub resolve_git_dir {
                 };
                 close $fh or die "error in $cmd: $!\n";
                 chomp $dir;
-                return $cd if ($dir eq '.' && defined $cd);
-                $dir;
+                return abs_path($cd) if ($dir eq '.' && defined $cd);
+                abs_path($dir);
         }
 }
 
@@ -56,13 +58,26 @@ if (@ARGV) {
 sub usage { print STDERR "Usage: $usage\n"; exit 1 }
 usage() unless @dirs;
 
+foreach my $k (keys %$config) {
+        $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next;
+        my $name = $1;
+        my $v = $config->{$k};
+        for my $i (0..$#dirs) {
+                next if $dirs[$i] ne $v;
+                my $ibx = $config->lookup_name($name);
+                $dirs[$i] = $ibx if $ibx;
+        }
+}
+
 foreach my $dir (@dirs) {
         index_dir($dir);
 }
 
 sub index_dir {
         my ($git_dir) = @_;
-        -d $git_dir or die "$git_dir does not appear to be a git repository\n";
+        if (!ref $git_dir && ! -d $git_dir) {
+                die "$git_dir does not appear to be a git repository\n";
+        }
         my $s = PublicInbox::SearchIdx->new($git_dir, 1);
         $s->index_sync({ reindex => $reindex });
 }
diff --git a/scripts/xhdr-num2mid b/scripts/xhdr-num2mid
index f1e7ea34..bc3ede60 100755
--- a/scripts/xhdr-num2mid
+++ b/scripts/xhdr-num2mid
@@ -5,8 +5,18 @@
 use strict;
 use warnings;
 use Net::NNTP;
-use Data::Dumper;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 my $usage = "usage: NNTPSERVER=news.example.org $0 GROUP [FIRST_NUM]\n";
+my ($msgmap, $mm);
+my %opts = ( '--msgmap=s' => \$msgmap );
+GetOptions(%opts) or die "bad command-line args\n$usage";
+
+if ($msgmap) {
+        require PublicInbox::Msgmap;
+        require PublicInbox::MID; # mid_clean
+        $mm = PublicInbox::Msgmap->new_file($msgmap, 1);
+}
+
 my $group = shift or die $usage;
 my $nntp = Net::NNTP->new($ENV{NNTPSERVER} || '127.0.0.1');
 my ($num, $first, $last) = $nntp->group($group);
@@ -15,16 +25,29 @@ my $arg_first = shift;
 if (defined $arg_first) {
         $arg_first =~ /\A\d+\z/ or die $usage;
         $first = $arg_first;
+} elsif ($mm) {
+        my $last_article = $mm->meta_accessor('last_article');
+        $first = $last_article + 1 if defined $last_article;
 }
 
 my $batch = 1000;
 my $i;
 for ($i = $first; $i < $last; $i += $batch) {
-        my $j = $i + $batch;
+        my $j = $i + $batch - 1;
         $j = $last if $j > $last;
         my $num2mid = $nntp->xhdr('Message-ID', "$i-$j");
+
+        $mm->{dbh}->begin_work if $mm;
         for my $n ($i..$j) {
                 defined(my $mid = $num2mid->{$n}) or next;
                 print "$n $mid\n";
+                if ($mm) {
+                        $mid = PublicInbox::MID::mid_clean($mid);
+                        $mm->mid_set($n, $mid);
+                }
+        }
+        if ($mm) {
+                $mm->meta_accessor('last_article', $j);
+                $mm->{dbh}->commit;
         }
 }
diff --git a/t/altid.t b/t/altid.t
new file mode 100644
index 00000000..887d548f
--- /dev/null
+++ b/t/altid.t
@@ -0,0 +1,61 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
+        eval "require $mod";
+        plan skip_all => "$mod missing for altid.t" if $@;
+}
+
+use_ok 'PublicInbox::Msgmap';
+use_ok 'PublicInbox::SearchIdx';
+use_ok 'PublicInbox::Import';
+use_ok 'PublicInbox::Inbox';
+my $tmpdir = tempdir('pi-altid-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $git_dir = "$tmpdir/a.git";
+my $alt_file = "$tmpdir/another-nntp.sqlite3";
+my $altid = [ "serial:gmane:file=$alt_file" ];
+
+{
+        my $mm = PublicInbox::Msgmap->new_file($alt_file, 1);
+        $mm->mid_set(1234, 'a@example.com');
+}
+
+{
+        is(system(qw(git init -q --bare), $git_dir), 0, 'git init ok');
+        my $git = PublicInbox::Git->new($git_dir);
+        my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
+        $im->add(Email::MIME->create(
+                header => [
+                        From => 'a@example.com',
+                        To => 'b@example.com',
+                        'Content-Type' => 'text/plain',
+                        Subject => 'boo!',
+                        'Message-ID' => '<a@example.com>',
+                ],
+                body => "hello world gmane:666\n",
+        ));
+        $im->done;
+}
+{
+        my $inbox = PublicInbox::Inbox->new({mainrepo=>$git_dir});
+        $inbox->{altid} = $altid;
+        my $rw = PublicInbox::SearchIdx->new($inbox, 1);
+        $rw->index_sync;
+}
+
+{
+        my $ro = PublicInbox::Search->new($git_dir, $altid);
+        my $res = $ro->query("gmane:1234");
+        is($res->{total}, 1, 'got one match');
+        is($res->{msgs}->[0]->mid, 'a@example.com');
+
+        $res = $ro->query("gmane:666");
+        is($res->{total}, 0, 'body did NOT match');
+};
+
+done_testing();
+
+1;