user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
038e1cc2bd49c6316aa05457df1113c8e96ab557 blob 2393 bytes (raw)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
 
# Copyright (C) 2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# read-only counterpart to LeiXkwIdx
package PublicInbox::LeiXkw;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Search);
use PublicInbox::ContentHash qw(content_hash);
use PublicInbox::Eml;
use PublicInbox::Search qw(xap_terms);
use PublicInbox::MID qw(mids_for_index);

sub new {
	my ($cls, $dir, $opt) = @_;
	bless { xpfx => $dir, git => $opt->{git} }, $cls;
}

sub _content_cmp { # git->cat_async callback
	my ($bref, $oid, $type, $size, $cmp) = @_;
	if ($bref) {
		my $existing = content_hash(PublicInbox::Eml->new($bref));
		return if $cmp->{expect} ne $existing;
		push @{$cmp->{hits}}, $oid;
	} else {
		push @{$cmp->{gone}}, $oid;
	}
}

sub _docids_by_mids ($$$) {
	my ($self, $eml, $smsg) = @_;
	my $cmp = { expect => content_hash($eml), hits => [] };
	my $mids = $smsg->{mids4idx} //= mids_for_index($eml);
	my $xdb = $self->{xdb};
	my $git = $self->{git};
	my %oid2docid;
	for my $mid (@$mids) { # typically 1
		my $head = $xdb->postlist_begin('Q'.$mid);
		my $tail = $xdb->postlist_end('Q'.$mid);
		for (; $head != $tail; $head++) {
			my $docid = $head->get_docid;
			my $oids = xap_terms('U', $xdb, $docid);
			for my $oid (keys %$oids) {
				next if exists $oid2docid{$oid};
				$oid2docid{$oid} = $docid;
				$git->cat_async($oid, \&_content_cmp, $cmp);
			}
		}
	}
	$git->cat_async_wait;
	for my $oid (@{$cmp->{gone} // []}) {
		my $docid = $oid2docid{$oid} // die "BUG $oid not mapped";
		$xdb->delete_document($docid);
	}
	map { $oid2docid{$_} // die "BUG $_ miss (@$mids)" } @{$cmp->{hits}};
}

sub docids_for ($$$) {
	my ($self, $eml, $smsg) = @_;
	my $xdb = $self->{xdb};
	my $oid = $smsg->{blob} // die 'BUG: no blob';
	my $head = $xdb->postlist_begin('U'.$oid);
	my $tail = $xdb->postlist_end('U'.$oid);
	return ($head->get_docid) if $head != $tail;
	_docids_by_mids($self, $eml, $smsg);
}

sub xdb_shards_flat { # for ->xdb
	my ($self) = @_;
	PublicInbox::Search::load_xapian();
	($PublicInbox::Search::X{Database}->new($self->{xpfx}));
}

sub get_xkw {
	my ($self, $eml, $smsg) = @_;
	$self->xdb;
	my @docids = docids_for($self, $eml, $smsg);
	my %all;
	# unusual for @docids > 1, but dedupe can change
	for my $docid (@docids) {
		my $terms = xap_terms('K', $self->{xdb}, $docid);
		%all = (%all, %$terms);
	}
	sort keys %all
}

1;
debug log:

solving 038e1cc2 ...
found 038e1cc2 in https://public-inbox.org/meta/20210303140139.7637-1-e@80x24.org/

applying [1/1] https://public-inbox.org/meta/20210303140139.7637-1-e@80x24.org/
diff --git a/lib/PublicInbox/LeiXkw.pm b/lib/PublicInbox/LeiXkw.pm
new file mode 100644
index 00000000..038e1cc2

Checking patch lib/PublicInbox/LeiXkw.pm...
Applied patch lib/PublicInbox/LeiXkw.pm cleanly.

index at:
100644 038e1cc2bd49c6316aa05457df1113c8e96ab557	lib/PublicInbox/LeiXkw.pm

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).