From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A7EC91FFB1 for ; Tue, 27 Oct 2020 07:54:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 14/52] overidx: introduce changes for external index Date: Tue, 27 Oct 2020 07:54:15 +0000 Message-Id: <20201027075453.19163-15-e@80x24.org> In-Reply-To: <20201027075453.19163-1-e@80x24.org> References: <20201027075453.19163-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since external indices won't have msgmap.sqlite3, we'll need to store last_commit-* metadata in over.sqlite3 instead. This has a longer limits to account for path names or newsgroup names stored in keys. We'll also rely on built-in counters for Xapian document IDs, since msgmap.sqlite3 no longer provides an AUTOINCREMENT column. --- lib/PublicInbox/OverIdx.pm | 76 ++++++++++++++++++++++++++++++++++++++ t/over.t | 11 ++++++ 2 files changed, 87 insertions(+) diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index db4b7738..09bca790 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -512,4 +512,80 @@ EOM $pr->("I: rethread culled $total ghosts\n") if $pr && $total; } +# used for cross-inbox search +sub eidx_prep ($) { + my ($self) = @_; + $self->{-eidx_prep} //= do { + my $dbh = $self->dbh; + $dbh->do(<<''); +INSERT OR IGNORE INTO counter (key) VALUES ('oidmap_num') + + $dbh->do(<<''); +INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid') + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS oidmap ( + num INTEGER NOT NULL, /* NNTP article number == IMAP UID */ + oidbin VARBINARY, /* 20-byte SHA-1 or 32-byte SHA-256 */ + UNIQUE (num), + UNIQUE (oidbin) +) + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS eidx_meta ( + key VARCHAR(255) PRIMARY KEY, + val VARCHAR(255) NOT NULL +) + + $dbh; + }; +} + +sub eidx_meta { # requires transaction + my ($self, $key, $val) = @_; + + my $sql = 'SELECT val FROM eidx_meta WHERE key = ? LIMIT 1'; + my $dbh = $self->{dbh}; + defined($val) or return $dbh->selectrow_array($sql, undef, $key); + + my $prev = $dbh->selectrow_array($sql, undef, $key); + if (defined $prev) { + $sql = 'UPDATE eidx_meta SET val = ? WHERE key = ?'; + $dbh->do($sql, undef, $val, $key); + } else { + $sql = 'INSERT INTO eidx_meta (key,val) VALUES (?,?)'; + $dbh->do($sql, undef, $key, $val); + } + $prev; +} + +sub eidx_max { + my ($self) = @_; + get_counter($self->{dbh}, 'eidx_docid'); +} + +sub oid2num { + my ($self, $oidhex) = @_; + my $dbh = eidx_prep($self); + my $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT num FROM oidmap WHERE oidbin = ? + + $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->execute; + $sth->fetchrow_array; +} + +sub oid_add { + my ($self, $oidhex) = @_; + my $dbh = eidx_prep($self); + my $num = adj_counter($self, 'oidmap_num', '+'); + my $sth = $dbh->prepare_cached(<<''); +INSERT INTO oidmap (num, oidbin) VALUES (?,?) + + $sth->bind_param(1, $num); + $sth->bind_param(2, pack('H*', $oidhex), SQL_BLOB); + $sth->execute; + $num; +} + 1; diff --git a/t/over.t b/t/over.t index 4c8f8098..3e2860f8 100644 --- a/t/over.t +++ b/t/over.t @@ -74,4 +74,15 @@ SKIP: { 'WAL journal_mode not clobbered if manually set'); } +# ext index additions +{ + my $hex = 'deadbeefcafe'; + my $n = $over->oid_add($hex); + ok($n > 0, 'oid_add returned number'); + is($over->oid2num($hex), $n, 'oid2num works'); + my $n2 = $over->oid_add($hex.$hex); + ok($n2 > $n, 'oid_add increments'); + is($over->oid2num($hex.$hex), $n2, 'oid2num works again'); +} + done_testing();