From 090238c3353438526dec707dad0718cb21efd07f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 27 Oct 2020 07:54:43 +0000 Subject: over: store xref3 data in over.sqlite3 We may not end up storing xref3 data in Xapian, actually. This will make indexlevel=basic possible, and along with --sequential-shard indexing support for slow storage. Making oidmap a separate table seems unnecessary, too, so fold it into the xref3 table since it's unlikely a git blob will be responsible for multiple xref3 rows. --- lib/PublicInbox/Over.pm | 19 ++++++++++++++ lib/PublicInbox/OverIdx.pm | 64 +++++++++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 26 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 08112386..f34e7fc1 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -260,6 +260,25 @@ SELECT num,tid,ds,ts,ddd FROM over WHERE num = ? LIMIT 1 $smsg ? load_from_row($smsg) : undef; } +sub get_xref3 { + my ($self, $num) = @_; + my $dbh = dbh($self); + my $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT ibx_id,xnum,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id ASC + + $sth->execute($num); + my $rows = $sth->fetchall_arrayref; + my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT eidx_key FROM inboxes WHERE ibx_id = ? + + [ map { + my $r = $_; + $eidx_key_sth->execute($r->[0]); + my $eidx_key = $eidx_key_sth->fetchrow_array; + "$eidx_key:$r->[1]:".unpack('H*', $r->[2]); + } @$rows ]; +} + sub next_by_mid { my ($self, $mid, $id, $prev) = @_; my $dbh = dbh($self); diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 09bca790..dff2780d 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -517,20 +517,27 @@ sub eidx_prep ($) { my ($self) = @_; $self->{-eidx_prep} //= do { my $dbh = $self->dbh; - $dbh->do(<<''); -INSERT OR IGNORE INTO counter (key) VALUES ('oidmap_num') + $dbh->do(<<""); +INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid') $dbh->do(<<''); -INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid') +CREATE TABLE IF NOT EXISTS inboxes ( + ibx_id INTEGER PRIMARY KEY AUTOINCREMENT, + eidx_key VARCHAR(255) NOT NULL, /* {newsgroup} // {inboxdir} */ + UNIQUE (eidx_key) +) $dbh->do(<<''); -CREATE TABLE IF NOT EXISTS oidmap ( - num INTEGER NOT NULL, /* NNTP article number == IMAP UID */ - oidbin VARBINARY, /* 20-byte SHA-1 or 32-byte SHA-256 */ - UNIQUE (num), - UNIQUE (oidbin) +CREATE TABLE IF NOT EXISTS xref3 ( + docid INTEGER NOT NULL, /* <=> over.num */ + ibx_id INTEGER NOT NULL, /* <=> inboxes.ibx_id */ + xnum INTEGER NOT NULL, /* NNTP article number in ibx */ + oidbin VARBINARY NOT NULL, /* 20-byte SHA-1 or 32-byte SHA-256 */ + UNIQUE (docid, ibx_id, xnum, oidbin) ) + $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)'); + $dbh->do(<<''); CREATE TABLE IF NOT EXISTS eidx_meta ( key VARCHAR(255) PRIMARY KEY, @@ -564,28 +571,33 @@ sub eidx_max { get_counter($self->{dbh}, 'eidx_docid'); } -sub oid2num { - my ($self, $oidhex) = @_; - my $dbh = eidx_prep($self); - my $sth = $dbh->prepare_cached(<<'', undef, 1); -SELECT num FROM oidmap WHERE oidbin = ? - - $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); +sub add_xref3 { + my ($self, $docid, $xnum, $oidhex, $eidx_key) = @_; + begin_lazy($self); + my $ibx_id = id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key); + my $oidbin = pack('H*', $oidhex); + my $sth = $self->{dbh}->prepare_cached(<<''); +INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?) + + $sth->bind_param(1, $docid); + $sth->bind_param(2, $ibx_id); + $sth->bind_param(3, $xnum); + $sth->bind_param(4, $oidbin, SQL_BLOB); $sth->execute; - $sth->fetchrow_array; } -sub oid_add { - my ($self, $oidhex) = @_; - my $dbh = eidx_prep($self); - my $num = adj_counter($self, 'oidmap_num', '+'); - my $sth = $dbh->prepare_cached(<<''); -INSERT INTO oidmap (num, oidbin) VALUES (?,?) - - $sth->bind_param(1, $num); - $sth->bind_param(2, pack('H*', $oidhex), SQL_BLOB); +sub remove_xref3 { + my ($self, $docid, $oidhex, $eidx_key) = @_; + begin_lazy($self); + my $ibx_id = id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key); + my $oidbin = pack('H*', $oidhex); + my $sth = $self->{dbh}->prepare_cached(<<''); +DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ? + + $sth->bind_param(1, $docid); + $sth->bind_param(2, $ibx_id); + $sth->bind_param(3, $oidbin, SQL_BLOB); $sth->execute; - $num; } 1; -- cgit v1.2.3-24-ge0c7