From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id AA39C2009D for ; Tue, 27 Oct 2020 07:55:00 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 42/52] over: store xref3 data in over.sqlite3 Date: Tue, 27 Oct 2020 07:54:43 +0000 Message-Id: <20201027075453.19163-43-e@80x24.org> In-Reply-To: <20201027075453.19163-1-e@80x24.org> References: <20201027075453.19163-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We may not end up storing xref3 data in Xapian, actually. This will make indexlevel=basic possible, and along with --sequential-shard indexing support for slow storage. Making oidmap a separate table seems unnecessary, too, so fold it into the xref3 table since it's unlikely a git blob will be responsible for multiple xref3 rows. --- lib/PublicInbox/Over.pm | 19 +++++++++++ lib/PublicInbox/OverIdx.pm | 64 ++++++++++++++++++++++---------------- t/over.t | 27 +++++++++++----- 3 files changed, 77 insertions(+), 33 deletions(-) diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 08112386..f34e7fc1 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -260,6 +260,25 @@ SELECT num,tid,ds,ts,ddd FROM over WHERE num = ? LIMIT 1 $smsg ? load_from_row($smsg) : undef; } +sub get_xref3 { + my ($self, $num) = @_; + my $dbh = dbh($self); + my $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT ibx_id,xnum,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id ASC + + $sth->execute($num); + my $rows = $sth->fetchall_arrayref; + my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT eidx_key FROM inboxes WHERE ibx_id = ? + + [ map { + my $r = $_; + $eidx_key_sth->execute($r->[0]); + my $eidx_key = $eidx_key_sth->fetchrow_array; + "$eidx_key:$r->[1]:".unpack('H*', $r->[2]); + } @$rows ]; +} + sub next_by_mid { my ($self, $mid, $id, $prev) = @_; my $dbh = dbh($self); diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 09bca790..dff2780d 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -517,20 +517,27 @@ sub eidx_prep ($) { my ($self) = @_; $self->{-eidx_prep} //= do { my $dbh = $self->dbh; - $dbh->do(<<''); -INSERT OR IGNORE INTO counter (key) VALUES ('oidmap_num') + $dbh->do(<<""); +INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid') $dbh->do(<<''); -INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid') +CREATE TABLE IF NOT EXISTS inboxes ( + ibx_id INTEGER PRIMARY KEY AUTOINCREMENT, + eidx_key VARCHAR(255) NOT NULL, /* {newsgroup} // {inboxdir} */ + UNIQUE (eidx_key) +) $dbh->do(<<''); -CREATE TABLE IF NOT EXISTS oidmap ( - num INTEGER NOT NULL, /* NNTP article number == IMAP UID */ - oidbin VARBINARY, /* 20-byte SHA-1 or 32-byte SHA-256 */ - UNIQUE (num), - UNIQUE (oidbin) +CREATE TABLE IF NOT EXISTS xref3 ( + docid INTEGER NOT NULL, /* <=> over.num */ + ibx_id INTEGER NOT NULL, /* <=> inboxes.ibx_id */ + xnum INTEGER NOT NULL, /* NNTP article number in ibx */ + oidbin VARBINARY NOT NULL, /* 20-byte SHA-1 or 32-byte SHA-256 */ + UNIQUE (docid, ibx_id, xnum, oidbin) ) + $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)'); + $dbh->do(<<''); CREATE TABLE IF NOT EXISTS eidx_meta ( key VARCHAR(255) PRIMARY KEY, @@ -564,28 +571,33 @@ sub eidx_max { get_counter($self->{dbh}, 'eidx_docid'); } -sub oid2num { - my ($self, $oidhex) = @_; - my $dbh = eidx_prep($self); - my $sth = $dbh->prepare_cached(<<'', undef, 1); -SELECT num FROM oidmap WHERE oidbin = ? - - $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); +sub add_xref3 { + my ($self, $docid, $xnum, $oidhex, $eidx_key) = @_; + begin_lazy($self); + my $ibx_id = id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key); + my $oidbin = pack('H*', $oidhex); + my $sth = $self->{dbh}->prepare_cached(<<''); +INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?) + + $sth->bind_param(1, $docid); + $sth->bind_param(2, $ibx_id); + $sth->bind_param(3, $xnum); + $sth->bind_param(4, $oidbin, SQL_BLOB); $sth->execute; - $sth->fetchrow_array; } -sub oid_add { - my ($self, $oidhex) = @_; - my $dbh = eidx_prep($self); - my $num = adj_counter($self, 'oidmap_num', '+'); - my $sth = $dbh->prepare_cached(<<''); -INSERT INTO oidmap (num, oidbin) VALUES (?,?) - - $sth->bind_param(1, $num); - $sth->bind_param(2, pack('H*', $oidhex), SQL_BLOB); +sub remove_xref3 { + my ($self, $docid, $oidhex, $eidx_key) = @_; + begin_lazy($self); + my $ibx_id = id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key); + my $oidbin = pack('H*', $oidhex); + my $sth = $self->{dbh}->prepare_cached(<<''); +DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ? + + $sth->bind_param(1, $docid); + $sth->bind_param(2, $ibx_id); + $sth->bind_param(3, $oidbin, SQL_BLOB); $sth->execute; - $num; } 1; diff --git a/t/over.t b/t/over.t index 3e2860f8..56c20d01 100644 --- a/t/over.t +++ b/t/over.t @@ -75,14 +75,27 @@ SKIP: { } # ext index additions +$over->eidx_prep; { - my $hex = 'deadbeefcafe'; - my $n = $over->oid_add($hex); - ok($n > 0, 'oid_add returned number'); - is($over->oid2num($hex), $n, 'oid2num works'); - my $n2 = $over->oid_add($hex.$hex); - ok($n2 > $n, 'oid_add increments'); - is($over->oid2num($hex.$hex), $n2, 'oid2num works again'); + my @arg = qw(1349 2019 adeadba7cafe example.key); + ok($over->add_xref3(@arg), 'first add'); + ok($over->add_xref3(@arg), 'add idempotent'); + my $xref3 = $over->get_xref3(1349); + is_deeply($xref3, [ 'example.key:2019:adeadba7cafe' ], 'xref3 works'); + + @arg = qw(1349 2018 deadbeefcafe example.kee); + ok($over->add_xref3(@arg), 'add another xref3'); + $xref3 = $over->get_xref3(1349); + is_deeply($xref3, [ 'example.key:2019:adeadba7cafe', + 'example.kee:2018:deadbeefcafe' ], + 'xref3 works forw two'); + + @arg = qw(1349 adeadba7cafe example.key); + ok($over->remove_xref3(@arg), 'remove first'); + $xref3 = $over->get_xref3(1349); + is_deeply($xref3, [ 'example.kee:2018:deadbeefcafe' ], + 'confirm removal successful'); + $over->rollback_lazy; } done_testing();