about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-08-11 00:23:48 +0000
committerEric Wong <e@80x24.org>2016-08-11 02:28:40 +0000
commit58a5bb3e18901237b1ca34ef8f03f696be27d305 (patch)
tree1725220ed46271629d71b3c5af3b8b2b328755a1 /lib/PublicInbox
parentc357e8699d99e20e1033e13bd1e2faa9856fb456 (diff)
downloadpublic-inbox-58a5bb3e18901237b1ca34ef8f03f696be27d305.tar.gz
For some existing mailing list archives, messages are identified
by serial number (such as NNTP article numbers in gmane).  Those
links may become inaccessible (as is the current case for
gmane), so ensure users can still search based on old serial
numbers.

Now, I run the following periodically to get article numbers
from gmane (while news.gmane.org remains):

	NNTPSERVER=news.gmane.org
	export NNTPSERVER
	GROUP=gmane.comp.version-control.git
	perl -I lib scripts/xhdr-num2mid $GROUP --msgmap=/path/to/gmane.sqlite3

(I might integrate this further with public-inbox-* scripts one day).

My ~/.public-inbox/config as an added "altid" snippet which now
looks like this:

[publicinbox "git"]
	address = git@vger.kernel.org
	mainrepo = /path/to/git.vger.git
	newsgroup = inbox.comp.version-control.git

	; relative pathnames expand to $mainrepo/public-inbox/$file
	altid = serial:gmane:file=gmane.sqlite3

And run "public-inbox-index --reindex /path/to/git.vger.git"
periodically.

This ought to allow searching for "gmane:12345" to work for
Xapian-enabled instances.

Disclaimer: while public-inbox supports NNTP and stable article
serial numbers, use of those for public links is discouraged
since it encourages centralization.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/AltId.pm38
-rw-r--r--lib/PublicInbox/Config.pm6
-rw-r--r--lib/PublicInbox/Inbox.pm2
-rw-r--r--lib/PublicInbox/Msgmap.pm20
-rw-r--r--lib/PublicInbox/Search.pm16
-rw-r--r--lib/PublicInbox/SearchIdx.pm24
6 files changed, 100 insertions, 6 deletions
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
new file mode 100644
index 00000000..6fdc3a2d
--- /dev/null
+++ b/lib/PublicInbox/AltId.pm
@@ -0,0 +1,38 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::AltId;
+use strict;
+use warnings;
+use URI::Escape qw(uri_unescape);
+
+# spec: TYPE:PREFIX:param1=value1&param2=value2&...
+# Example: serial:gmane:file=/path/to/altmsgmap.sqlite3
+sub new {
+        my ($class, $inbox, $spec) = @_;
+        my ($type, $prefix, $query) = split(/:/, $spec, 3);
+        $type eq 'serial' or die "non-serial not supported, yet\n";
+
+        require PublicInbox::Msgmap;
+
+        my %params = map {
+                my ($k, $v) = split(/=/, uri_unescape($_), 2);
+                $v = '' unless defined $v;
+                ($k, $v);
+        } split(/[&;]/, $query);
+        my $f = $params{file} or die "file: required for $type spec $spec\n";
+        unless (index($f, '/') == 0) {
+                $f = "$inbox->{mainrepo}/public-inbox/$f";
+        }
+        bless {
+                mm_alt => PublicInbox::Msgmap->new_file($f),
+                xprefix => 'X'.uc($prefix),
+        }, $class;
+}
+
+sub mid2alt {
+        my ($self, $mid) = @_;
+        $self->{mm_alt}->num_for($mid);
+}
+
+1;
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 1256fb1e..cd885488 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -145,6 +145,12 @@ sub _fill {
                 my $v = $self->{"$pfx.$k"};
                 $rv->{$k} = $v if defined $v;
         }
+        foreach my $k (qw(altid)) { # TODO: more arrays
+                if (defined(my $v = $self->{"$pfx.$k"})) {
+                        $rv->{$k} = [ $v ];
+                }
+        }
+
         return unless $rv->{mainrepo};
         my $name = $pfx;
         $name =~ s/\Apublicinbox\.//;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index e552cd4f..922ca9bb 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -87,7 +87,7 @@ sub search {
         my ($self) = @_;
         $self->{search} ||= eval {
                 _weaken_later($self);
-                PublicInbox::Search->new($self->{mainrepo});
+                PublicInbox::Search->new($self->{mainrepo}, $self->{altid});
         };
 }
 
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 2583ff47..3fb3805f 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -20,7 +20,12 @@ sub new {
                 my $err = $!;
                 -d $d or die "$d not created: $err";
         }
-        my $f = "$d/msgmap.sqlite3";
+        new_file($class, "$d/msgmap.sqlite3", $writable);
+}
+
+sub new_file {
+        my ($class, $f, $writable) = @_;
+
         my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
                 AutoCommit => 1,
                 RaiseError => 1,
@@ -40,6 +45,7 @@ sub new {
         $self;
 }
 
+# n.b. invoked directly by scripts/xhdr-num2mid
 sub meta_accessor {
         my ($self, $key, $value) = @_;
         use constant {
@@ -154,6 +160,7 @@ sub create_tables {
                         'val VARCHAR(255) NOT NULL)');
 }
 
+# used by NNTP.pm
 sub id_batch {
         my ($self, $num, $cb) = @_;
         my $dbh = $self->{dbh};
@@ -167,4 +174,15 @@ sub id_batch {
         $nr;
 }
 
+# only used for mapping external serial numbers (e.g. articles from gmane)
+# see scripts/xhdr-num2mid for usage
+sub mid_set {
+        my ($self, $num, $mid) = @_;
+        my $sth = $self->{mid_set} ||= do {
+                my $sql = 'INSERT INTO msgmap (num, mid) VALUES (?,?)';
+                $self->{dbh}->prepare($sql);
+        };
+        $sth->execute($num, $mid);
+}
+
 1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 3a908ac6..018fcb55 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -79,10 +79,10 @@ sub xdir {
 }
 
 sub new {
-        my ($class, $git_dir) = @_;
+        my ($class, $git_dir, $altid) = @_;
         my $dir = $class->xdir($git_dir);
         my $db = Search::Xapian::Database->new($dir);
-        bless { xdb => $db, git_dir => $git_dir }, $class;
+        bless { xdb => $db, git_dir => $git_dir, altid => $altid }, $class;
 }
 
 sub reopen { $_[0]->{xdb}->reopen }
@@ -186,6 +186,18 @@ sub qp {
                 $qp->add_boolean_prefix($name, $prefix);
         }
 
+        # we do not actually create AltId objects,
+        # just parse the spec to avoid the extra DB handles for now.
+        if (my $altid = $self->{altid}) {
+                for (@$altid) {
+                        # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3'
+                        /\Aserial:(\w+):/ or next;
+                        my $pfx = $1;
+                        # gmane => XGMANE
+                        $qp->add_boolean_prefix($pfx, 'X'.uc($pfx));
+                }
+        }
+
         while (my ($name, $prefix) = each %prob_prefix) {
                 $qp->add_prefix($name, $prefix);
         }
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index a18a2148..0eb07a1c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -30,9 +30,21 @@ use constant {
 };
 
 sub new {
-        my ($class, $git_dir, $creat) = @_;
+        my ($class, $inbox, $creat) = @_;
+        my $git_dir = $inbox;
+        my $altid;
+        if (ref $inbox) {
+                $git_dir = $inbox->{mainrepo};
+                $altid = $inbox->{altid};
+                if ($altid) {
+                        require PublicInbox::AltId;
+                        $altid = [ map {
+                                PublicInbox::AltId->new($inbox, $_);
+                        } @$altid ];
+                }
+        }
         require Search::Xapian::WritableDatabase;
-        my $self = bless { git_dir => $git_dir }, $class;
+        my $self = bless { git_dir => $git_dir, -altid => $altid }, $class;
         my $perm = $self->_git_config_perm;
         my $umask = _umask_for($perm);
         $self->{umask} = $umask;
@@ -171,6 +183,14 @@ sub add_message {
                 link_message($self, $smsg, $old_tid);
                 $tg->index_text($mid, 1);
                 $doc->set_data($smsg->to_doc_data($blob));
+
+                if (my $altid = $self->{-altid}) {
+                        foreach my $alt (@$altid) {
+                                my $id = $alt->mid2alt($mid);
+                                next unless defined $id;
+                                $doc->add_term($alt->{xprefix} . $id);
+                        }
+                }
                 if (defined $doc_id) {
                         $db->replace_document($doc_id, $doc);
                 } else {