user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
blob 8ba4d396a444473134916d5f137d899945ec4f6a 3338 bytes (raw)
name: lib/PublicInbox/ExtSearch.pm 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
 
# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# Read-only external (detached) index for cross inbox search.
# This is a read-only counterpart to PublicInbox::ExtSearchIdx
# and behaves like PublicInbox::Inbox AND PublicInbox::Search
package PublicInbox::ExtSearch;
use strict;
use v5.10.1;
use PublicInbox::Over;
use PublicInbox::Inbox;
use PublicInbox::MiscSearch;
use DBI qw(:sql_types); # SQL_BLOB

# for ->reopen, ->mset, ->mset_to_artnums
use parent qw(PublicInbox::Search);

sub new {
	my ($class, $topdir) = @_;
	bless {
		topdir => $topdir,
		# xpfx => 'ei15'
		xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
	}, $class;
}

sub misc {
	my ($self) = @_;
	$self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc");
}

# same as per-inbox ->over, for now...
sub over {
	my ($self) = @_;
	$self->{over} //= PublicInbox::Over->new("$self->{xpfx}/over.sqlite3");
}

sub git {
	my ($self) = @_;
	$self->{git} //= PublicInbox::Git->new("$self->{topdir}/ALL.git");
}

# returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table
sub nntp_xref_for { # NNTP only
	my ($self, $xibx, $xsmsg) = @_;
	my $dbh = over($self)->dbh;

	my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1

	$sth->execute($xibx->{newsgroup});
	my $xibx_id = $sth->fetchrow_array // do {
		warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n";
		return;
	};

	$sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1

	$sth->bind_param(1, pack('H*', $xsmsg->{blob}), SQL_BLOB);

	# NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2
	$sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num});
	$sth->bind_param(3, $xibx_id);
	$sth->execute;
	my $docid = $sth->fetchrow_array // do {
		warn <<EOF;
W: `$xibx->{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}"
EOF
		return;
	};

	# LIMIT is number of newsgroups on server:
	$sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ?

	$sth->execute($docid, $xibx_id);
	my $rows = $sth->fetchall_arrayref;

	my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1

	my %xref = map {
		my ($ibx_id, $xnum) = @$_;

		$eidx_key_sth->execute($ibx_id);
		my $eidx_key = $eidx_key_sth->fetchrow_array;

		# only include if there's a newsgroup name
		$eidx_key && index($eidx_key, '/') >= 0 ?
			() : ($eidx_key => $xnum)
	} @$rows;
	$xref{$xibx->{newsgroup}} = $xsmsg->{num};
	\%xref;
}

sub mm { undef }

sub altid_map { {} }

sub description {
	my ($self) = @_;
	($self->{description} //=
		PublicInbox::Inbox::cat_desc("$self->{topdir}/description")) //
		'$EXTINDEX_DIR/description missing';
}

sub cloneurl { [] } # TODO

sub base_url { 'https://example.com/TODO/' }
sub nntp_url { [] }

no warnings 'once';
*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
*smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
*msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
*modified = \&PublicInbox::Inbox::modified;
*recent = \&PublicInbox::Inbox::recent;

*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
*isrch = *search = \&PublicInbox::Search::reopen;

1;

debug log:

solving 8ba4d396 ...
found 8ba4d396 in https://80x24.org/public-inbox.git

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).