public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob d43c23e64adfd60d968ba3512d359d22d929c896 3516 bytes (raw)
$ git show HEAD:lib/PublicInbox/ExtSearch.pm	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
 
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# Read-only external (detached) index for cross inbox search.
# This is a read-only counterpart to PublicInbox::ExtSearchIdx
# and behaves like PublicInbox::Inbox AND PublicInbox::Search
package PublicInbox::ExtSearch;
use strict;
use v5.10.1;
use PublicInbox::Over;
use PublicInbox::Inbox;
use PublicInbox::MiscSearch;
use DBI qw(:sql_types); # SQL_BLOB

# for ->reopen, ->mset, ->mset_to_artnums
use parent qw(PublicInbox::Search);

sub new {
	my ($class, $topdir) = @_;
	bless {
		topdir => $topdir,
		-primary_address => 'unknown@example.com',
		# xpfx => 'ei15'
		xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
	}, $class;
}

sub misc {
	my ($self) = @_;
	$self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc");
}

# same as per-inbox ->over, for now...
sub over {
	my ($self) = @_;
	$self->{over} // eval {
		PublicInbox::Inbox::_cleanup_later($self);
		my $over = PublicInbox::Over->new("$self->{xpfx}/over.sqlite3");
		$over->dbh; # may die
		$self->{over} = $over;
	};
}

sub git {
	my ($self) = @_;
	$self->{git} //= do {
		PublicInbox::Inbox::_cleanup_later($self);
		PublicInbox::Git->new("$self->{topdir}/ALL.git");
	};
}

# returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table
sub nntp_xref_for { # NNTP only
	my ($self, $xibx, $xsmsg) = @_;
	my $dbh = over($self)->dbh;

	my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1

	$sth->execute($xibx->{newsgroup});
	my $xibx_id = $sth->fetchrow_array // do {
		warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n";
		return;
	};

	$sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1

	$sth->bind_param(1, $xsmsg->oidbin, SQL_BLOB);

	# NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2
	$sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num});
	$sth->bind_param(3, $xibx_id);
	$sth->execute;
	my $docid = $sth->fetchrow_array // do {
		warn <<EOF;
W: `$xibx->{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}"
EOF
		return;
	};

	# LIMIT is number of newsgroups on server:
	$sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ?

	$sth->execute($docid, $xibx_id);
	my $rows = $sth->fetchall_arrayref;

	my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1

	my %xref = map {
		my ($ibx_id, $xnum) = @$_;

		$eidx_key_sth->execute($ibx_id);
		my $eidx_key = $eidx_key_sth->fetchrow_array;

		# only include if there's a newsgroup name
		$eidx_key && index($eidx_key, '/') >= 0 ?
			() : ($eidx_key => $xnum)
	} @$rows;
	$xref{$xibx->{newsgroup}} = $xsmsg->{num};
	\%xref;
}

sub mm { undef }

sub altid_map { {} }

sub description {
	my ($self) = @_;
	($self->{description} //=
		PublicInbox::Git::cat_desc("$self->{topdir}/description")) //
		'$EXTINDEX_DIR/description missing';
}

sub search {
	PublicInbox::Inbox::_cleanup_later($_[0]);
	$_[0];
}

sub thing_type { 'external index' }

no warnings 'once';
*base_url = \&PublicInbox::Inbox::base_url;
*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
*smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
*msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
*modified = \&PublicInbox::Inbox::modified;

*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
*isrch = \&search;

1;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git