As noted in commit 87dca6d8d5988c5eb54019cca342450b0b7dd6b7 ("www: rework query responses to avoid COUNT in SQLite"), COUNT on many rows is expensive on big SQLite DBs. We've already stopped using that code path long ago in WWW while -imapd and -nntpd never used it. So we'll adjust our remaining test cases to not need it, either. --- lib/PublicInbox/Over.pm | 8 +------- t/indexlevels-mirror.t | 29 +++++++++++++++-------------- t/v2writable.t | 4 +++- t/watch_maildir_v2.t | 30 +++++++++++++++--------------- 4 files changed, 34 insertions(+), 37 deletions(-) diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index a2cf9f21..6b7d5216 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -244,15 +244,9 @@ sub recent { $s = '+num > 0 ORDER BY ts DESC'; } } - my $msgs = do_get($self, <<"", $opts, @v); + do_get($self, <<"", $opts, @v); SELECT ts,ds,ddd FROM over WHERE $s - return $msgs unless wantarray; - - my $nr = $self->{dbh}->selectrow_array(<<''); -SELECT COUNT(num) FROM over WHERE num > 0 - - ($nr, $msgs); } sub get_art { diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 859c2c17..27533546 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -49,8 +49,8 @@ my $import_index_incremental = sub { inboxdir => $ibx->{inboxdir}, indexlevel => $level }); - my ($nr, $msgs) = $ro_master->recent; - is($nr, 1, 'only one message in master, so far'); + my $msgs = $ro_master->recent; + is(scalar(@$msgs), 1, 'only one message in master, so far'); is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed'); # clone @@ -79,8 +79,8 @@ my $import_index_incremental = sub { inboxdir => $mirror, indexlevel => $level, }); - ($nr, $msgs) = $ro_mirror->recent; - is($nr, 1, 'only one message, so far'); + $msgs = $ro_mirror->recent; + is(scalar(@$msgs), 1, 'only one message, so far'); is($msgs->[0]->{mid}, 'm@1', 'read first message'); # update master @@ -91,16 +91,16 @@ my $import_index_incremental = sub { # mirror updates is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); - ($nr, $msgs) = $ro_mirror->recent; - is($nr, 2, '2nd message seen in mirror'); + $msgs = $ro_mirror->recent; + is(scalar(@$msgs), 2, '2nd message seen in mirror'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in mirror'); # incremental index master (required for v1) ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]), 'index master OK'); - ($nr, $msgs) = $ro_master->recent; - is($nr, 2, '2nd message seen in master'); + $msgs = $ro_master->recent; + is(scalar(@$msgs), 2, '2nd message seen in master'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in master'); @@ -121,15 +121,15 @@ my $import_index_incremental = sub { is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin after xcpdb v' .$v.$level); delete $ro_mirror->{$_} for (qw(over search)); - ($nr, $msgs) = $ro_mirror->search->query('m:m@2'); - is($nr, 1, "v$v found m\@2 via Xapian on $level"); + $msgs = $ro_mirror->search->query('m:m@2'); + is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level"); } # sync the mirror is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); - ($nr, $msgs) = $ro_mirror->recent; - is($nr, 1, '2nd message gone from mirror'); + $msgs = $ro_mirror->recent; + is(scalar(@$msgs), 1, '2nd message gone from mirror'); is_deeply([map { $_->{mid} } @$msgs], ['m@1'], 'message unavailable in mirror'); @@ -138,8 +138,9 @@ my $import_index_incremental = sub { 'no Xapian shard directories for v2 basic'); } if ($level ne 'basic') { - ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2'); - is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level"); + $msgs = $ro_mirror->search->reopen->query('m:m@2'); + is(scalar(@$msgs), 0, + "v$v m\@2 gone from Xapian in mirror on $level"); } # add another message to master and have the mirror diff --git a/t/v2writable.t b/t/v2writable.t index 2bd7a400..9e4547ba 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -120,7 +120,9 @@ if ('ensure git configs are correct') { $mime->header_set('References', '<zz-mid@b>'); ok($im->add($mime), 'message with multiple Message-ID'); $im->done; - my ($total, undef) = $ibx->over->recent; + my $total = $ibx->over->dbh->selectrow_array(<<''); +SELECT COUNT(*) FROM over WHERE num > 0 + is($ibx->mm->num_highwater, $total, 'got expected highwater value'); my $srch = $ibx->search; my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 }); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index f5b8e932..59ec247e 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -50,7 +50,7 @@ ok($ibx, 'found inbox by name'); my $srch = $ibx->search; PublicInbox::WatchMaildir->new($config)->scan('full'); -my ($total, undef) = $srch->reopen->query(''); +my $total = scalar @{$srch->reopen->query('')}; is($total, 1, 'got one revision'); # my $git = PublicInbox::Git->new("$inboxdir/git/0.git"); @@ -70,7 +70,7 @@ my $write_spam = sub { $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::WatchMaildir->new($config)->scan('full'); -is(($srch->reopen->query(''))[0], 0, 'deleted file'); +is_deeply($srch->reopen->query(''), [], 'deleted file'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing @@ -81,16 +81,16 @@ the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 1, 'got one file back'); + my $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 1, 'got one file back'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::WatchMaildir->new($config)->scan('full'); - ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 0, 'inbox is empty again'); + $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 0, 'inbox is empty again'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } @@ -105,8 +105,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::WatchMaildir->new($config)->scan('full'); } - my ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 0, 'inbox is still empty'); + my $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 0, 'inbox is still empty'); is(unlink(glob("$maildir/new/*")), 1); } @@ -118,8 +118,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 1, 'inbox has one mail after spamc OK-ed a message'); + my $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); delete $config->{'publicinboxwatch.spamcheck'}; @@ -131,11 +131,11 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); $msg = do { local $/; <$fh> }; PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7'); - is($nr, 1, 'diff postimage found'); + my $msgs = $srch->reopen->query('dfpost:6e006fd7'); + is(scalar(@$msgs), 1, 'diff postimage found'); my $post = $msgs->[0]; - ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c'); - is($nr, 1, 'diff preimage found'); + $msgs = $srch->query('dfpre:090d998b6c2c'); + is(scalar(@$msgs), 1, 'diff preimage found'); is($post->{blob}, $msgs->[0]->{blob}, 'same message'); } @@ -162,7 +162,7 @@ both EOF PublicInbox::Emergency->new($maildir)->prepare(\$both); PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($total, $msgs) = $srch->reopen->query('m:both@b.com'); + my $msgs = $srch->reopen->query('m:both@b.com'); my $v1 = $config->lookup_name('v1'); my $msg = $v1->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v1');
The v1.2.0 is a work-in-progress, while the others are copied out of our mail archives. Eventually, a NEWS file will be generated from these emails and distributed in the release tarball. There'll also be an Atom feed for the website reusing our feed generation code. --- .gitattributes | 2 + Documentation/RelNotes/v1.0.0.eml | 21 ++ Documentation/RelNotes/v1.1.0-pre1.eml | 295 +++++++++++++++++++++++++ Documentation/RelNotes/v1.2.0.wip | 40 ++++ MANIFEST | 4 + 5 files changed, 362 insertions(+) create mode 100644 .gitattributes create mode 100644 Documentation/RelNotes/v1.0.0.eml create mode 100644 Documentation/RelNotes/v1.1.0-pre1.eml create mode 100644 Documentation/RelNotes/v1.2.0.wip diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..bb53518 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Email signatures start with "-- \n" +*.eml whitespace=-blank-at-eol diff --git a/Documentation/RelNotes/v1.0.0.eml b/Documentation/RelNotes/v1.0.0.eml new file mode 100644 index 0000000..ae6ea4e --- /dev/null +++ b/Documentation/RelNotes/v1.0.0.eml @@ -0,0 +1,21 @@ +From e@80x24.org Thu Feb 8 02:33:57 2018 +Date: Thu, 8 Feb 2018 02:33:57 +0000 +From: Eric Wong <e@80x24.org> +To: meta@public-inbox.org +Subject: [ANNOUNCE] public-inbox 1.0.0 +Message-ID: <20180208023357.GA32591@80x24.org> + +After some 3.5 odd years of working on this, I suppose now is +as good a time as any to tar this up and call it 1.0.0. + +The TODO list is still very long and there'll be some new +development in coming weeks :> + +So, here you have a release: + + https://public-inbox.org/releases/public-inbox-1.0.0.tar.gz + +Checksums, mainly as a safeguard against accidental file corruption: + +SHA-256 4a08569f3d99310f713bb32bec0aa4819d6b41871e0421ec4eec0657a5582216 + (in other words, don't trust me; instead read the code :>) diff --git a/Documentation/RelNotes/v1.1.0-pre1.eml b/Documentation/RelNotes/v1.1.0-pre1.eml new file mode 100644 index 0000000..ee1ecc3 --- /dev/null +++ b/Documentation/RelNotes/v1.1.0-pre1.eml @@ -0,0 +1,295 @@ +From e@80x24.org Wed May 9 20:23:03 2018 +Date: Wed, 9 May 2018 20:23:03 +0000 +From: Eric Wong <e@80x24.org> +To: meta@public-inbox.org +Cc: Konstantin Ryabitsev <konstantin@linuxfoundation.org> +Subject: [ANNOUNCE] public-inbox 1.1.0-pre1 +Message-ID: <20180509202303.GA15156@dcvr> + +Pre-release for v2 repository support. +Thanks to The Linux Foundation for supporting this work! + +https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz + +SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009 +(only intended to guard against accidental file corruption) + +shortlog below: + +Eric Wong (27): + nntp: improve fairness during XOVER and similar commands + nntp: do not drain rbuf if there is a command pending + extmsg: use news.gmane.org for Message-ID lookups + searchview: fix non-numeric comparison + mbox: do not barf on queries which return no results + nntp: allow and ignore empty commands + ensure SQLite and Xapian files respect core.sharedRepository + TODO: a few more updates + filter/rubylang: do not set altid on spam training + import: cleanup git cat-file processes when ->done + disallow "\t" and "\n" in OVER headers + searchidx: release lock again during v1 batch callback + searchidx: remove leftover debugging code + convert: copy description and git config from v1 repo + view: untangle loop when showing message headers + view: wrap To: and Cc: headers in HTML display + view: drop redundant References: display code + TODO: add EPOLLEXCLUSIVE item + searchview: do not blindly append "l" parameter to URL + search: avoid repeated mbox results from search + msgmap: add limit to response for NNTP + thread: prevent hidden threads in /$INBOX/ landing page + thread: sort incoming messages by Date + searchidx: preserve umask when starting/committing transactions + scripts/import_slrnspool: support v2 repos + scripts/import_slrnspool: cleanup progress messages + public-inbox 1.1.0-pre1 + +Eric Wong (Contractor, The Linux Foundation) (239): + AUTHORS: add The Linux Foundation + watch_maildir: allow '-' in mail filename + scripts/import_vger_from_mbox: relax From_ line match slightly + import: stop writing legacy ssoma.index by default + import: begin supporting this without ssoma.lock + import: initial handling for v2 + t/import: test for last_object_id insertion + content_id: add test case + searchmsg: add mid_mime import for _extract_mid + scripts/import_vger_from_mbox: support --dry-run option + import: APIs to support v2 use + search: free up 'Q' prefix for a real unique identifier + searchidx: fix comment around next_thread_id + address: extract more characters from email addresses + import: pass "raw" dates to git-fast-import(1) + scripts/import_vger_from_mbox: use v2 layout for import + import: quiet down warnings from bogus From: lines + import: allow the epoch (0s) as a valid time + extmsg: fix broken Xapian MID lookup + search: stop assuming Message-ID is unique + www: stop assuming mainrepo == git_dir + v2writable: initial cut for repo-rotation + git: reload alternates file on missing blob + v2: support Xapian + SQLite indexing + import_vger_from_inbox: allow "-V" option + import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering + v2: parallelize Xapian indexing + v2writable: round-robin to partitions based on article number + searchidxpart: increase pipe size for partitions + v2writable: warn on duplicate Message-IDs + searchidx: do not modify Xapian DB while iterating + v2/ui: some hacky things to get the PSGI UI to show up + v2/ui: retry DB reopens in a few more places + v2writable: cleanup unused pipes in partitions + searchidxpart: binmode + use PublicInbox::MIME consistently + searchidxpart: chomp line before splitting + searchidx*: name child subprocesses + searchidx: get rid of pointless index_blob wrapper + view: remove X-PI-TS reference + searchidxthread: load doc data for references + searchidxpart: force integers into add_message + search: reopen skeleton DB as well + searchidx: index values in the threader + search: use different Enquire object for skeleton queries + rename SearchIdxThread to SearchIdxSkeleton + v2writable: commit to skeleton via remote partitions + searchidxskeleton: extra error checking + searchidx: do not modify Xapian DB while iterating + search: query_xover uses skeleton DB iff available + v2/ui: get nntpd and init tests running on v2 + v2writable: delete ::Import obj when ->done + search: remove informational "warning" message + searchidx: add PID to error message when die-ing + content_id: special treatment for Message-Id headers + evcleanup: disable outside of daemon + v2writable: deduplicate detection on add + evcleanup: do not create event loop if nothing was registered + mid: add `mids' and `references' methods for extraction + content_id: use `mids' and `references' for MID extraction + searchidx: use new `references' method for parsing References + content_id: no need to be human-friendly + v2writable: inject new Message-IDs on true duplicates + search: revert to using 'Q' as a uniQue id per-Xapian conventions + searchidx: support indexing multiple MIDs + mid: be strict with References, but loose on Message-Id + searchidx: avoid excessive XNQ indexing with diffs + searchidxskeleton: add a note about locking + v2writable: generated Message-ID goes first + searchidx: use add_boolean_term for internal terms + searchidx: add NNTP article number as a searchable term + mid: truncate excessively long MIDs early + nntp: use NNTP article numbers for lookups + nntp: fix NEWNEWS command + searchidx: store the primary MID in doc data for NNTP + import: consolidate object info for v2 imports + v2: avoid redundant/repeated configs for git partition repos + INSTALL: document more optional dependencies + search: favor skeleton DB for lookup_mail + search: each_smsg_by_mid uses skeleton if available + v2writable: remove unnecessary skeleton commit + favor Received: date over Date: header globally + import: fall back to Sender for extracting name and email + scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping + v2writable: detect and use previous partition count + extmsg: rework partial MID matching to favor current inbox + extmsg: rework partial MID matching to favor current inbox + content_id: use Sender header if From is not available + v2writable: support "barrier" operation to avoid reforking + use string ref for Email::Simple->new + v2writable: remove unnecessary idx_init call + searchidx: do not delete documents while iterating + search: allow ->reopen to be chainable + v2writable: implement remove correctly + skeleton: barrier init requires a lock + import: (v2) delete writes the blob into history in subdir + import: (v2): write deletes to a separate '_' subdirectory + import: implement barrier operation for v1 repos + mid: mid_mime uses v2-compatible mids function + watchmaildir: use content_digest to generate Message-Id + import: force Message-ID generation for v1 here + import: switch to URL-safe Base64 for Message-IDs + v2writable: test for idempotent removals + import: enable locking under v2 + index: s/GIT_DIR/REPO_DIR/ + Lock: new base class for writable lockers + t/watch_maildir: note the reason for FIFO creation + v2writable: ensure ->done is idempotent + watchmaildir: support v2 repositories + searchidxpart: s/barrier/remote_barrier/ + v2writable: allow disabling parallelization + scripts/import_vger_from_mbox: filter out same headers as MDA + v2writable: add DEBUG_DIFF env support + v2writable: remove "resent" message for duplicate Message-IDs + content_id: do not take Message-Id into account + introduce InboxWritable class + import: discard all the same headers as MDA + InboxWritable: add mbox/maildir parsing + import logic + use both Date: and Received: times + msgmap: add tmp_clone to create an anonymous copy + fix syntax warnings + v2writable: support reindexing Xapian + t/altid.t: extra tests for mid_set + v2writable: add NNTP article number regeneration support + v2writable: clarify header cleanups + v2writable: DEBUG_DIFF respects $TMPDIR + feed: $INBOX/new.atom endpoint supports v2 inboxes + import: consolidate mid prepend logic, here + www: $MESSAGE_ID/raw endpoint supports "duplicates" + search: reopen DB if each_smsg_by_mid fails + t/psgi_v2: minimal test for Atom feed and t.mbox.gz + feed: fix new.html for v2 + view: permalink (per-message) view shows multiple messages + searchidx: warn about vivifying multiple ghosts + v2writable: warn on unseen deleted files + www: get rid of unnecessary 'inbox' name reference + searchview: remove unnecessary imports from MID module + view: depend on SearchMsg for Message-ID + http: fix modification of read-only value + githttpbackend: avoid infinite loop on generic PSGI servers + www: support cloning individual v2 git partitions + http: fix modification of read-only value + githttpbackend: avoid infinite loop on generic PSGI servers + www: remove unnecessary ghost checks + v2writable: append, instead of prepending generated Message-ID + lookup by Message-ID favors the "primary" one + www: fix attachment downloads for conflicted Message-IDs + searchmsg: document why we store To: and Cc: for NNTP + public-inbox-convert: tool for converting old to new inboxes + v2writable: support purging messages from git entirely + search: cleanup uniqueness checking + search: get rid of most lookup_* subroutines + search: move find_doc_ids to searchidx + v2writable: cleanup: get rid of unused fields + mbox: avoid extracting Message-ID for linkification + www: cleanup expensive fallback for legacy URLs + view: get rid of some unnecessary imports + search: retry_reopen on first_smsg_by_mid + import: run_die supports redirects as spawn does + v2writable: initializing an existing inbox is idempotent + public-inbox-compact: new tool for driving xapian-compact + mda: support v2 inboxes + search: warn on reopens and die on total failure + v2writable: allow gaps in git partitions + v2writable: convert some fatal reindex errors to warnings + wwwstream: flesh out clone instructions for v2 + v2writable: go backwards through alternate Message-IDs + view: speed up homepage loading time with date clamp + view: drop load_results + feed: optimize query for feeds, too + msgtime: parse 3-digit years properly + convert: avoid redundant "done\n" statement for fast-import + search: move permissions handling to InboxWritable + t/v2writable: use simplify permissions reading + v2: respect core.sharedRepository in git configs + searchidx: correct warning for over-vivification + v2: one file, really + v2writable: fix parallel termination + truncate Message-IDs and References consistently + scripts/import_vger_from_mbox: set address properly + search: reduce columns stored in Xapian + replace Xapian skeleton with SQLite overview DB + v2writable: simplify barrier vs checkpoints + t/over: test empty Subject: line matching + www: rework query responses to avoid COUNT in SQLite + over: speedup get_thread by avoiding JOIN + nntp: fix NEWNEWS command + t/thread-all.t: modernize test to support modern inboxes + rename+rewrite test using Benchmark module + nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster + view: avoid offset during pagination + mbox: remove remaining OFFSET usage in SQLite + msgmap: replace id_batch with ids_after + nntp: simplify the long_response API + searchidx: ensure duplicated Message-IDs can be linked together + init: s/GIT_DIR/REPO_DIR/ in usage + import: rewrite less history during purge + v2: support incremental indexing + purge + v2writable: do not modify DBs while iterating for ->remove + v2writable: recount partitions after acquiring lock + searchmsg: remove unused `tid' and `path' methods + search: remove unnecessary OP_AND of query + mbox: do not sort search results + searchview: minor cleanup + support altid mechanism for v2 + compact: better handling of over.sqlite3* files + v2writable: remove redundant remove from Over DB + v2writable: allow tracking parallel versions + v2writable: refer to git each repository as "epoch" + over: use only supported and safe SQLite APIs + search: index and allow searching by date-time + altid: fix miscopied field name + nntp: set Xref across multiple inboxes + www: favor reading more from SQLite, and less from Xapian + ensure Xapian and SQLite are still optional for v1 tests + psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological + over: avoid excessive SELECT + over: remove forked subprocess + v2writable: reduce barriers + index: allow specifying --jobs=0 to disable multiprocess + convert: support converting with altid defined + store less data in the Xapian document + msgmap: speed up minmax with separate queries + feed: respect feedmax, again + v1: remove articles from overview DB + compact: do not merge v2 repos by default + v2writable: reduce partititions by one + search: preserve References in Xapian smsg for x=t view + v2: generate better Message-IDs for duplicates + v2: improve deduplication checks + import: cat_blob drops leading 'From ' lines like Inbox + searchidx: regenerate and avoid article number gaps on full index + extmsg: remove expensive git path checks + use %H consistently to disable abbreviations + searchidx: increase term positions for all text terms + searchidx: revert default BATCH_BYTES to 1_000_000 + Merge remote-tracking branch 'origin/master' into v2 + fix tests to run without Xapian installed + extmsg: use Xapian only for partial matches + +Jonathan Corbet (3): + Don't use LIMIT in UPDATE statements + Update the installation instructions with Fedora package names + Allow specification of the number of search results to return +-- +git clone https://public-inbox.org/ public-inbox +(working on a homepage... sorta :) diff --git a/Documentation/RelNotes/v1.2.0.wip b/Documentation/RelNotes/v1.2.0.wip new file mode 100644 index 0000000..41236a0 --- /dev/null +++ b/Documentation/RelNotes/v1.2.0.wip @@ -0,0 +1,40 @@ +To: meta@public-inbox.org +Subject: [WIP] public-inbox 1.2.0 + +* first non-pre/rc release with v2 format support for scalability. + See public-inbox-v2-format(5) manpage for more details. + +* new admin tools for v2 repos: + - public-inbox-convert - converts v1 to v2 repo formats + - public-inbox-compact - v2 convenience wrapper for xapian-compact(1) + - public-inbox-purge - purges entire messages out of v2 history + - public-inbox-edit - edits sensitive data out messages from v2 history + - public-inbox-xcpdb - copydatabase(1) wrapper to upgrade Xapian formats + (e.g. from "chert" to "glass") and resharding + of v2 repos + +* SQLite3 support decoupled from Xapian support, and Xapian DBs may be + configured without phrase support to save space. See "indexlevel" in + public-inbox-config(5) manpage for more info. + +* public-inbox-nntpd + - support STARTTLS and NNTPS + - support COMPRESS extension + - fix several RFC3977 compliance bugs + - improved interopability with picky clients such as leafnode + +* public-inbox-watch + - support multiple spam training directories + - support mapping multiple inboxes per Maildir + +* PublicInbox::WWW + - grokmirror-compatible manifest.js.gz endpoint generation + - user-configurable color support in $INBOX_URL/_/text/color/ + - BOFHs may set default colors via "publicinbox.css" + (see public-inbox-config(5)) + +* Danga::Socket is no longer a runtime dependency of daemons. + +* improved FreeBSD support + +See archives at https://public-inbox.org/meta/ for all history. diff --git a/MANIFEST b/MANIFEST index f5290b4..ecf239f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,7 +1,11 @@ +.gitattributes .gitignore AUTHORS COPYING Documentation/.gitignore +Documentation/RelNotes/v1.0.0.eml +Documentation/RelNotes/v1.1.0-pre1.eml +Documentation/RelNotes/v1.2.0.wip Documentation/dc-dlvr-spam-flow.txt Documentation/design_notes.txt Documentation/design_www.txt -- EW
Pre-release for v2 repository support. Thanks to The Linux Foundation for supporting this work! https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009 (only intended to guard against accidental file corruption) shortlog below: Eric Wong (27): nntp: improve fairness during XOVER and similar commands nntp: do not drain rbuf if there is a command pending extmsg: use news.gmane.org for Message-ID lookups searchview: fix non-numeric comparison mbox: do not barf on queries which return no results nntp: allow and ignore empty commands ensure SQLite and Xapian files respect core.sharedRepository TODO: a few more updates filter/rubylang: do not set altid on spam training import: cleanup git cat-file processes when ->done disallow "\t" and "\n" in OVER headers searchidx: release lock again during v1 batch callback searchidx: remove leftover debugging code convert: copy description and git config from v1 repo view: untangle loop when showing message headers view: wrap To: and Cc: headers in HTML display view: drop redundant References: display code TODO: add EPOLLEXCLUSIVE item searchview: do not blindly append "l" parameter to URL search: avoid repeated mbox results from search msgmap: add limit to response for NNTP thread: prevent hidden threads in /$INBOX/ landing page thread: sort incoming messages by Date searchidx: preserve umask when starting/committing transactions scripts/import_slrnspool: support v2 repos scripts/import_slrnspool: cleanup progress messages public-inbox 1.1.0-pre1 Eric Wong (Contractor, The Linux Foundation) (239): AUTHORS: add The Linux Foundation watch_maildir: allow '-' in mail filename scripts/import_vger_from_mbox: relax From_ line match slightly import: stop writing legacy ssoma.index by default import: begin supporting this without ssoma.lock import: initial handling for v2 t/import: test for last_object_id insertion content_id: add test case searchmsg: add mid_mime import for _extract_mid scripts/import_vger_from_mbox: support --dry-run option import: APIs to support v2 use search: free up 'Q' prefix for a real unique identifier searchidx: fix comment around next_thread_id address: extract more characters from email addresses import: pass "raw" dates to git-fast-import(1) scripts/import_vger_from_mbox: use v2 layout for import import: quiet down warnings from bogus From: lines import: allow the epoch (0s) as a valid time extmsg: fix broken Xapian MID lookup search: stop assuming Message-ID is unique www: stop assuming mainrepo == git_dir v2writable: initial cut for repo-rotation git: reload alternates file on missing blob v2: support Xapian + SQLite indexing import_vger_from_inbox: allow "-V" option import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering v2: parallelize Xapian indexing v2writable: round-robin to partitions based on article number searchidxpart: increase pipe size for partitions v2writable: warn on duplicate Message-IDs searchidx: do not modify Xapian DB while iterating v2/ui: some hacky things to get the PSGI UI to show up v2/ui: retry DB reopens in a few more places v2writable: cleanup unused pipes in partitions searchidxpart: binmode use PublicInbox::MIME consistently searchidxpart: chomp line before splitting searchidx*: name child subprocesses searchidx: get rid of pointless index_blob wrapper view: remove X-PI-TS reference searchidxthread: load doc data for references searchidxpart: force integers into add_message search: reopen skeleton DB as well searchidx: index values in the threader search: use different Enquire object for skeleton queries rename SearchIdxThread to SearchIdxSkeleton v2writable: commit to skeleton via remote partitions searchidxskeleton: extra error checking searchidx: do not modify Xapian DB while iterating search: query_xover uses skeleton DB iff available v2/ui: get nntpd and init tests running on v2 v2writable: delete ::Import obj when ->done search: remove informational "warning" message searchidx: add PID to error message when die-ing content_id: special treatment for Message-Id headers evcleanup: disable outside of daemon v2writable: deduplicate detection on add evcleanup: do not create event loop if nothing was registered mid: add `mids' and `references' methods for extraction content_id: use `mids' and `references' for MID extraction searchidx: use new `references' method for parsing References content_id: no need to be human-friendly v2writable: inject new Message-IDs on true duplicates search: revert to using 'Q' as a uniQue id per-Xapian conventions searchidx: support indexing multiple MIDs mid: be strict with References, but loose on Message-Id searchidx: avoid excessive XNQ indexing with diffs searchidxskeleton: add a note about locking v2writable: generated Message-ID goes first searchidx: use add_boolean_term for internal terms searchidx: add NNTP article number as a searchable term mid: truncate excessively long MIDs early nntp: use NNTP article numbers for lookups nntp: fix NEWNEWS command searchidx: store the primary MID in doc data for NNTP import: consolidate object info for v2 imports v2: avoid redundant/repeated configs for git partition repos INSTALL: document more optional dependencies search: favor skeleton DB for lookup_mail search: each_smsg_by_mid uses skeleton if available v2writable: remove unnecessary skeleton commit favor Received: date over Date: header globally import: fall back to Sender for extracting name and email scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping v2writable: detect and use previous partition count extmsg: rework partial MID matching to favor current inbox extmsg: rework partial MID matching to favor current inbox content_id: use Sender header if From is not available v2writable: support "barrier" operation to avoid reforking use string ref for Email::Simple->new v2writable: remove unnecessary idx_init call searchidx: do not delete documents while iterating search: allow ->reopen to be chainable v2writable: implement remove correctly skeleton: barrier init requires a lock import: (v2) delete writes the blob into history in subdir import: (v2): write deletes to a separate '_' subdirectory import: implement barrier operation for v1 repos mid: mid_mime uses v2-compatible mids function watchmaildir: use content_digest to generate Message-Id import: force Message-ID generation for v1 here import: switch to URL-safe Base64 for Message-IDs v2writable: test for idempotent removals import: enable locking under v2 index: s/GIT_DIR/REPO_DIR/ Lock: new base class for writable lockers t/watch_maildir: note the reason for FIFO creation v2writable: ensure ->done is idempotent watchmaildir: support v2 repositories searchidxpart: s/barrier/remote_barrier/ v2writable: allow disabling parallelization scripts/import_vger_from_mbox: filter out same headers as MDA v2writable: add DEBUG_DIFF env support v2writable: remove "resent" message for duplicate Message-IDs content_id: do not take Message-Id into account introduce InboxWritable class import: discard all the same headers as MDA InboxWritable: add mbox/maildir parsing + import logic use both Date: and Received: times msgmap: add tmp_clone to create an anonymous copy fix syntax warnings v2writable: support reindexing Xapian t/altid.t: extra tests for mid_set v2writable: add NNTP article number regeneration support v2writable: clarify header cleanups v2writable: DEBUG_DIFF respects $TMPDIR feed: $INBOX/new.atom endpoint supports v2 inboxes import: consolidate mid prepend logic, here www: $MESSAGE_ID/raw endpoint supports "duplicates" search: reopen DB if each_smsg_by_mid fails t/psgi_v2: minimal test for Atom feed and t.mbox.gz feed: fix new.html for v2 view: permalink (per-message) view shows multiple messages searchidx: warn about vivifying multiple ghosts v2writable: warn on unseen deleted files www: get rid of unnecessary 'inbox' name reference searchview: remove unnecessary imports from MID module view: depend on SearchMsg for Message-ID http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: support cloning individual v2 git partitions http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: remove unnecessary ghost checks v2writable: append, instead of prepending generated Message-ID lookup by Message-ID favors the "primary" one www: fix attachment downloads for conflicted Message-IDs searchmsg: document why we store To: and Cc: for NNTP public-inbox-convert: tool for converting old to new inboxes v2writable: support purging messages from git entirely search: cleanup uniqueness checking search: get rid of most lookup_* subroutines search: move find_doc_ids to searchidx v2writable: cleanup: get rid of unused fields mbox: avoid extracting Message-ID for linkification www: cleanup expensive fallback for legacy URLs view: get rid of some unnecessary imports search: retry_reopen on first_smsg_by_mid import: run_die supports redirects as spawn does v2writable: initializing an existing inbox is idempotent public-inbox-compact: new tool for driving xapian-compact mda: support v2 inboxes search: warn on reopens and die on total failure v2writable: allow gaps in git partitions v2writable: convert some fatal reindex errors to warnings wwwstream: flesh out clone instructions for v2 v2writable: go backwards through alternate Message-IDs view: speed up homepage loading time with date clamp view: drop load_results feed: optimize query for feeds, too msgtime: parse 3-digit years properly convert: avoid redundant "done\n" statement for fast-import search: move permissions handling to InboxWritable t/v2writable: use simplify permissions reading v2: respect core.sharedRepository in git configs searchidx: correct warning for over-vivification v2: one file, really v2writable: fix parallel termination truncate Message-IDs and References consistently scripts/import_vger_from_mbox: set address properly search: reduce columns stored in Xapian replace Xapian skeleton with SQLite overview DB v2writable: simplify barrier vs checkpoints t/over: test empty Subject: line matching www: rework query responses to avoid COUNT in SQLite over: speedup get_thread by avoiding JOIN nntp: fix NEWNEWS command t/thread-all.t: modernize test to support modern inboxes rename+rewrite test using Benchmark module nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster view: avoid offset during pagination mbox: remove remaining OFFSET usage in SQLite msgmap: replace id_batch with ids_after nntp: simplify the long_response API searchidx: ensure duplicated Message-IDs can be linked together init: s/GIT_DIR/REPO_DIR/ in usage import: rewrite less history during purge v2: support incremental indexing + purge v2writable: do not modify DBs while iterating for ->remove v2writable: recount partitions after acquiring lock searchmsg: remove unused `tid' and `path' methods search: remove unnecessary OP_AND of query mbox: do not sort search results searchview: minor cleanup support altid mechanism for v2 compact: better handling of over.sqlite3* files v2writable: remove redundant remove from Over DB v2writable: allow tracking parallel versions v2writable: refer to git each repository as "epoch" over: use only supported and safe SQLite APIs search: index and allow searching by date-time altid: fix miscopied field name nntp: set Xref across multiple inboxes www: favor reading more from SQLite, and less from Xapian ensure Xapian and SQLite are still optional for v1 tests psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological over: avoid excessive SELECT over: remove forked subprocess v2writable: reduce barriers index: allow specifying --jobs=0 to disable multiprocess convert: support converting with altid defined store less data in the Xapian document msgmap: speed up minmax with separate queries feed: respect feedmax, again v1: remove articles from overview DB compact: do not merge v2 repos by default v2writable: reduce partititions by one search: preserve References in Xapian smsg for x=t view v2: generate better Message-IDs for duplicates v2: improve deduplication checks import: cat_blob drops leading 'From ' lines like Inbox searchidx: regenerate and avoid article number gaps on full index extmsg: remove expensive git path checks use %H consistently to disable abbreviations searchidx: increase term positions for all text terms searchidx: revert default BATCH_BYTES to 1_000_000 Merge remote-tracking branch 'origin/master' into v2 fix tests to run without Xapian installed extmsg: use Xapian only for partial matches Jonathan Corbet (3): Don't use LIMIT in UPDATE statements Update the installation instructions with Fedora package names Allow specification of the number of search results to return -- git clone https://public-inbox.org/ public-inbox (working on a homepage... sorta :)
I actually merged master into v2, so it's a bit backwards :P commit cfb8d16578e7f2f2e300f9f436205e4a8fc7f322 Merge: 1dc0f0c 119463b Author: Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> Date: Wed Apr 18 20:58:35 2018 +0000 Merge remote-tracking branch 'origin/master' into v2 I screwed up the indexing on http://hjrcffqmbrq6wope.onion/git/ so that's still going, but I think I was able to update the rest of them (including the heavily trafficked non-.onion) w/o downtime. The mirror at http://czquwvybam4bgbro.onion/git/ has been running the v2 code for over a week, now. Thanks to the Linux Foundation for funding this work. Will still need to make some documentation updates and such. Eric Wong (Contractor, The Linux Foundation) (237): AUTHORS: add The Linux Foundation watch_maildir: allow '-' in mail filename scripts/import_vger_from_mbox: relax From_ line match slightly import: stop writing legacy ssoma.index by default import: begin supporting this without ssoma.lock import: initial handling for v2 t/import: test for last_object_id insertion content_id: add test case searchmsg: add mid_mime import for _extract_mid scripts/import_vger_from_mbox: support --dry-run option import: APIs to support v2 use search: free up 'Q' prefix for a real unique identifier searchidx: fix comment around next_thread_id address: extract more characters from email addresses import: pass "raw" dates to git-fast-import(1) scripts/import_vger_from_mbox: use v2 layout for import import: quiet down warnings from bogus From: lines import: allow the epoch (0s) as a valid time extmsg: fix broken Xapian MID lookup search: stop assuming Message-ID is unique www: stop assuming mainrepo == git_dir v2writable: initial cut for repo-rotation git: reload alternates file on missing blob v2: support Xapian + SQLite indexing import_vger_from_inbox: allow "-V" option import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering v2: parallelize Xapian indexing v2writable: round-robin to partitions based on article number searchidxpart: increase pipe size for partitions v2writable: warn on duplicate Message-IDs searchidx: do not modify Xapian DB while iterating v2/ui: some hacky things to get the PSGI UI to show up v2/ui: retry DB reopens in a few more places v2writable: cleanup unused pipes in partitions searchidxpart: binmode use PublicInbox::MIME consistently searchidxpart: chomp line before splitting searchidx*: name child subprocesses searchidx: get rid of pointless index_blob wrapper view: remove X-PI-TS reference searchidxthread: load doc data for references searchidxpart: force integers into add_message search: reopen skeleton DB as well searchidx: index values in the threader search: use different Enquire object for skeleton queries rename SearchIdxThread to SearchIdxSkeleton v2writable: commit to skeleton via remote partitions searchidxskeleton: extra error checking searchidx: do not modify Xapian DB while iterating search: query_xover uses skeleton DB iff available v2/ui: get nntpd and init tests running on v2 v2writable: delete ::Import obj when ->done search: remove informational "warning" message searchidx: add PID to error message when die-ing content_id: special treatment for Message-Id headers evcleanup: disable outside of daemon v2writable: deduplicate detection on add evcleanup: do not create event loop if nothing was registered mid: add `mids' and `references' methods for extraction content_id: use `mids' and `references' for MID extraction searchidx: use new `references' method for parsing References content_id: no need to be human-friendly v2writable: inject new Message-IDs on true duplicates search: revert to using 'Q' as a uniQue id per-Xapian conventions searchidx: support indexing multiple MIDs mid: be strict with References, but loose on Message-Id searchidx: avoid excessive XNQ indexing with diffs searchidxskeleton: add a note about locking v2writable: generated Message-ID goes first searchidx: use add_boolean_term for internal terms searchidx: add NNTP article number as a searchable term mid: truncate excessively long MIDs early nntp: use NNTP article numbers for lookups nntp: fix NEWNEWS command searchidx: store the primary MID in doc data for NNTP import: consolidate object info for v2 imports v2: avoid redundant/repeated configs for git partition repos INSTALL: document more optional dependencies search: favor skeleton DB for lookup_mail search: each_smsg_by_mid uses skeleton if available v2writable: remove unnecessary skeleton commit favor Received: date over Date: header globally import: fall back to Sender for extracting name and email scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping v2writable: detect and use previous partition count extmsg: rework partial MID matching to favor current inbox extmsg: rework partial MID matching to favor current inbox content_id: use Sender header if From is not available v2writable: support "barrier" operation to avoid reforking use string ref for Email::Simple->new v2writable: remove unnecessary idx_init call searchidx: do not delete documents while iterating search: allow ->reopen to be chainable v2writable: implement remove correctly skeleton: barrier init requires a lock import: (v2) delete writes the blob into history in subdir import: (v2): write deletes to a separate '_' subdirectory import: implement barrier operation for v1 repos mid: mid_mime uses v2-compatible mids function watchmaildir: use content_digest to generate Message-Id import: force Message-ID generation for v1 here import: switch to URL-safe Base64 for Message-IDs v2writable: test for idempotent removals import: enable locking under v2 index: s/GIT_DIR/REPO_DIR/ Lock: new base class for writable lockers t/watch_maildir: note the reason for FIFO creation v2writable: ensure ->done is idempotent watchmaildir: support v2 repositories searchidxpart: s/barrier/remote_barrier/ v2writable: allow disabling parallelization scripts/import_vger_from_mbox: filter out same headers as MDA v2writable: add DEBUG_DIFF env support v2writable: remove "resent" message for duplicate Message-IDs content_id: do not take Message-Id into account introduce InboxWritable class import: discard all the same headers as MDA InboxWritable: add mbox/maildir parsing + import logic use both Date: and Received: times msgmap: add tmp_clone to create an anonymous copy fix syntax warnings v2writable: support reindexing Xapian t/altid.t: extra tests for mid_set v2writable: add NNTP article number regeneration support v2writable: clarify header cleanups v2writable: DEBUG_DIFF respects $TMPDIR feed: $INBOX/new.atom endpoint supports v2 inboxes import: consolidate mid prepend logic, here www: $MESSAGE_ID/raw endpoint supports "duplicates" search: reopen DB if each_smsg_by_mid fails t/psgi_v2: minimal test for Atom feed and t.mbox.gz feed: fix new.html for v2 view: permalink (per-message) view shows multiple messages searchidx: warn about vivifying multiple ghosts v2writable: warn on unseen deleted files www: get rid of unnecessary 'inbox' name reference searchview: remove unnecessary imports from MID module view: depend on SearchMsg for Message-ID http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: support cloning individual v2 git partitions http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: remove unnecessary ghost checks v2writable: append, instead of prepending generated Message-ID lookup by Message-ID favors the "primary" one www: fix attachment downloads for conflicted Message-IDs searchmsg: document why we store To: and Cc: for NNTP public-inbox-convert: tool for converting old to new inboxes v2writable: support purging messages from git entirely search: cleanup uniqueness checking search: get rid of most lookup_* subroutines search: move find_doc_ids to searchidx v2writable: cleanup: get rid of unused fields mbox: avoid extracting Message-ID for linkification www: cleanup expensive fallback for legacy URLs view: get rid of some unnecessary imports search: retry_reopen on first_smsg_by_mid import: run_die supports redirects as spawn does v2writable: initializing an existing inbox is idempotent public-inbox-compact: new tool for driving xapian-compact mda: support v2 inboxes search: warn on reopens and die on total failure v2writable: allow gaps in git partitions v2writable: convert some fatal reindex errors to warnings wwwstream: flesh out clone instructions for v2 v2writable: go backwards through alternate Message-IDs view: speed up homepage loading time with date clamp view: drop load_results feed: optimize query for feeds, too msgtime: parse 3-digit years properly convert: avoid redundant "done\n" statement for fast-import search: move permissions handling to InboxWritable t/v2writable: use simplify permissions reading v2: respect core.sharedRepository in git configs searchidx: correct warning for over-vivification v2: one file, really v2writable: fix parallel termination truncate Message-IDs and References consistently scripts/import_vger_from_mbox: set address properly search: reduce columns stored in Xapian replace Xapian skeleton with SQLite overview DB v2writable: simplify barrier vs checkpoints t/over: test empty Subject: line matching www: rework query responses to avoid COUNT in SQLite over: speedup get_thread by avoiding JOIN nntp: fix NEWNEWS command t/thread-all.t: modernize test to support modern inboxes rename+rewrite test using Benchmark module nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster view: avoid offset during pagination mbox: remove remaining OFFSET usage in SQLite msgmap: replace id_batch with ids_after nntp: simplify the long_response API searchidx: ensure duplicated Message-IDs can be linked together init: s/GIT_DIR/REPO_DIR/ in usage import: rewrite less history during purge v2: support incremental indexing + purge v2writable: do not modify DBs while iterating for ->remove v2writable: recount partitions after acquiring lock searchmsg: remove unused `tid' and `path' methods search: remove unnecessary OP_AND of query mbox: do not sort search results searchview: minor cleanup support altid mechanism for v2 compact: better handling of over.sqlite3* files v2writable: remove redundant remove from Over DB v2writable: allow tracking parallel versions v2writable: refer to git each repository as "epoch" over: use only supported and safe SQLite APIs search: index and allow searching by date-time altid: fix miscopied field name nntp: set Xref across multiple inboxes www: favor reading more from SQLite, and less from Xapian ensure Xapian and SQLite are still optional for v1 tests psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological over: avoid excessive SELECT over: remove forked subprocess v2writable: reduce barriers index: allow specifying --jobs=0 to disable multiprocess convert: support converting with altid defined store less data in the Xapian document msgmap: speed up minmax with separate queries feed: respect feedmax, again v1: remove articles from overview DB compact: do not merge v2 repos by default v2writable: reduce partititions by one search: preserve References in Xapian smsg for x=t view v2: generate better Message-IDs for duplicates v2: improve deduplication checks import: cat_blob drops leading 'From ' lines like Inbox searchidx: regenerate and avoid article number gaps on full index extmsg: remove expensive git path checks use %H consistently to disable abbreviations searchidx: increase term positions for all text terms searchidx: revert default BATCH_BYTES to 1_000_000 Merge remote-tracking branch 'origin/master' into v2
In many cases, we do not care about the total number of messages. It's a rather expensive operation in SQLite (Xapian only provides an estimate). For LKML, this brings top-level /$INBOX/ loading time from ~375ms to around 60ms on my system. Days ago, this operation was taking 800-900ms(!) for me before introducing the SQLite overview DB. --- lib/PublicInbox/Feed.pm | 11 +++---- lib/PublicInbox/Mbox.pm | 12 +++---- lib/PublicInbox/Over.pm | 8 +++-- lib/PublicInbox/Search.pm | 15 ++++++--- lib/PublicInbox/View.pm | 17 ++++------ t/altid.t | 9 +++--- t/convert-compact.t | 6 ++-- t/over.t | 6 ++-- t/search-thr-index.t | 4 +-- t/search.t | 80 +++++++++++++++++++++++------------------------ t/v2-add-remove-add.t | 6 ++-- t/v2mda.t | 4 +-- t/v2writable.t | 2 +- t/watch_maildir_v2.t | 26 +++++++-------- 14 files changed, 104 insertions(+), 102 deletions(-) diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 2f59f8c..ff20d7f 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -27,14 +27,13 @@ sub generate { sub generate_thread_atom { my ($ctx) = @_; my $mid = $ctx->{mid}; - my $res = $ctx->{srch}->get_thread($mid); - return _no_thread() unless $res->{total}; + my $msgs = $ctx->{srch}->get_thread($mid); + return _no_thread() unless @$msgs; my $ibx = $ctx->{-inbox}; my $html_url = $ibx->base_url($ctx->{env}); $html_url .= PublicInbox::Hval->new_msgid($mid)->{href}; $ctx->{-html_url} = $html_url; - my $msgs = $res->{msgs}; PublicInbox::WwwAtomStream->response($ctx, 200, sub { while (my $smsg = shift @$msgs) { $ibx->smsg_mime($smsg) and return $smsg; @@ -114,10 +113,10 @@ sub recent_msgs { my $o = $qp ? $qp->{o} : 0; $o += 0; $o = 0 if $o < 0; - my $res = $ibx->recent({ limit => $max, offset => $o }); + my $msgs = $ibx->recent({ limit => $max, offset => $o }); my $next = $o + $max; - $ctx->{next_page} = "o=$next" if $res->{total} >= $next; - return $res->{msgs}; + $ctx->{next_page} = "o=$next" if scalar(@$msgs) == $max; + return $msgs; } my $hex = '[a-f0-9]'; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 1b68f02..05de6be 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -217,12 +217,12 @@ sub set_filename ($$) { sub getline { my ($self) = @_; my $ctx = $self->{ctx} or return; - my $res; my $ibx = $ctx->{-inbox}; my $gz = $self->{gz}; + my $msgs = $self->{msgs}; do { # work on existing result set - while (defined(my $smsg = shift @{$self->{msgs}})) { + while (defined(my $smsg = shift @$msgs)) { my $msg = eval { $ibx->msg_by_smsg($smsg) } or next; $msg = Email::Simple->new($msg); $gz->write(PublicInbox::Mbox::msg_str($ctx, $msg, @@ -247,11 +247,9 @@ sub getline { } # refill result set - $res = $self->{cb}->($self->{opts}); - $self->{msgs} = $res->{msgs}; - $res = scalar @{$self->{msgs}}; - $self->{opts}->{offset} += $res; - } while ($res); + $msgs = $self->{msgs} = $self->{cb}->($self->{opts}); + $self->{opts}->{offset} += scalar @$msgs; + } while (@$msgs); $gz->close; delete $self->{ctx}; ${delete $self->{buf}}; diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index cf7a884..c74072a 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -88,10 +88,12 @@ LIMIT 1 SELECT * $cond ORDER BY ts ASC + return $msgs unless wantarray; + my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid); SELECT COUNT(num) $cond - { total => $nr, msgs => $msgs }; + ($nr, $msgs); } sub recent { @@ -100,10 +102,12 @@ sub recent { SELECT * FROM over WHERE num > 0 ORDER BY ts DESC + return $msgs unless wantarray; + my $nr = $self->{dbh}->selectrow_array(<<''); SELECT COUNT(num) FROM over WHERE num > 0 - { total => $nr, msgs => $msgs }; + ($nr, $msgs); } sub get_art { diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 9125124..84c0a22 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -185,10 +185,16 @@ sub get_thread { sub retry_reopen { my ($self, $cb) = @_; - my $ret; for my $i (1..10) { - eval { $ret = $cb->() }; - return $ret unless $@; + if (wantarray) { + my @ret; + eval { @ret = $cb->() }; + return @ret unless $@; + } else { + my $ret; + eval { $ret = $cb->() }; + return $ret unless $@; + } # Exception: The revision being read has been discarded - # you should call Xapian::Database::reopen() if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') { @@ -226,8 +232,9 @@ sub _enquire_once { my @msgs = map { PublicInbox::SearchMsg->load_doc($_->get_document); } $mset->items; + return \@msgs unless wantarray; - { total => $mset->get_matches_estimated, msgs => \@msgs } + ($mset->get_matches_estimated, \@msgs) } # read-write diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 8ac405f..cad90a7 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -408,9 +408,7 @@ sub thread_html { my ($ctx) = @_; my $mid = $ctx->{mid}; my $srch = $ctx->{srch}; - my $sres = $srch->get_thread($mid); - my $msgs = $sres->{msgs}; - my $nr = $sres->{total}; + my ($nr, $msgs) = $srch->get_thread($mid); return missing_thread($ctx) if $nr == 0; my $skel = '<hr><pre>'; $skel .= $nr == 1 ? 'only message in thread' : 'end of thread'; @@ -649,8 +647,7 @@ sub thread_skel { my ($dst, $ctx, $hdr, $tpfx) = @_; my $srch = $ctx->{srch}; my $mid = mids($hdr)->[0]; - my $sres = $srch->get_thread($mid); - my $nr = $sres->{total}; + my ($nr, $msgs) = $srch->get_thread($mid); my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) . qq(|<a\nhref="${tpfx}t/#u">nested</a>] ) . qq(<a\nhref="${tpfx}t.mbox.gz">mbox.gz</a> ) . @@ -680,12 +677,11 @@ sub thread_skel { $ctx->{prev_attr} = ''; $ctx->{prev_level} = 0; $ctx->{dst} = $dst; - $sres = $sres->{msgs}; # reduce hash lookups in skel_dump my $ibx = $ctx->{-inbox}; $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; - walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump); + walk_thread(thread_results($ctx, $msgs), $ctx, *skel_dump); $ctx->{parent_msg} = $parent; } @@ -1066,11 +1062,10 @@ sub index_topics { $ctx->{order} = []; my $srch = $ctx->{srch}; - my $sres = $ctx->{-inbox}->recent({offset => $off, limit => 200 }); - $sres = $sres->{msgs}; - my $nr = scalar @$sres; + my $msgs = $ctx->{-inbox}->recent({offset => $off, limit => 200 }); + my $nr = scalar @$msgs; if ($nr) { - walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic); + walk_thread(thread_results($ctx, $msgs), $ctx, *acc_topic); } $ctx->{-next_o} = $off + $nr; $ctx->{-cur_o} = $off; diff --git a/t/altid.t b/t/altid.t index 0f3b86c..d4f6152 100644 --- a/t/altid.t +++ b/t/altid.t @@ -50,12 +50,11 @@ my $altid = [ "serial:gmane:file=$alt_file" ]; { my $ro = PublicInbox::Search->new($git_dir, $altid); - my $res = $ro->query("gmane:1234"); - is($res->{total}, 1, 'got one match'); - is($res->{msgs}->[0]->mid, 'a@example.com'); + my $msgs = $ro->query("gmane:1234"); + is_deeply([map { $_->mid } @$msgs], ['a@example.com'], 'got one match'); - $res = $ro->query("gmane:666"); - is($res->{total}, 0, 'body did NOT match'); + $msgs = $ro->query("gmane:666"); + is_deeply([], $msgs, 'body did NOT match'); }; { diff --git a/t/convert-compact.t b/t/convert-compact.t index 92a6a9c..e2ba40a 100644 --- a/t/convert-compact.t +++ b/t/convert-compact.t @@ -99,8 +99,8 @@ foreach (@xdir) { is($st[2] & 07777, -f _ ? 0444 : 0755, 'sharedRepository respected after v2 compact'); } -my $res = $ibx->recent({limit => 1000}); -is($res->{msgs}->[0]->{mid}, 'a-mid@b', 'message exists in history'); -is(scalar @{$res->{msgs}}, 1, 'only one message in history'); +my $msgs = $ibx->recent({limit => 1000}); +is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); +is(scalar @$msgs, 1, 'only one message in history'); done_testing(); diff --git a/t/over.t b/t/over.t index bfe331e..2a7e8d1 100644 --- a/t/over.t +++ b/t/over.t @@ -40,7 +40,7 @@ my $ddd = compress(''); foreach my $s ('', undef) { $over->add_over([0, 98, [ 'a' ], [], $s, $ddd]); $over->add_over([0, 99, [ 'b' ], [], $s, $ddd]); - my $msgs = [ map { $_->{num} } @{$over->get_thread('a')->{msgs}} ]; + my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98], $msgs, 'messages not linked by empty subject'); } @@ -48,13 +48,13 @@ foreach my $s ('', undef) { $over->add_over([0, 98, [ 'a' ], [], 's', $ddd]); $over->add_over([0, 99, [ 'b' ], [], 's', $ddd]); foreach my $mid (qw(a b)) { - my $msgs = [ map { $_->{num} } @{$over->get_thread('a')->{msgs}} ]; + my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98, 99], $msgs, 'linked messages by subject'); } $over->add_over([0, 98, [ 'a' ], [], 's', $ddd]); $over->add_over([0, 99, [ 'b' ], ['a'], 'diff', $ddd]); foreach my $mid (qw(a b)) { - my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)->{msgs}} ]; + my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)} ]; is_deeply([98, 99], $msgs, "linked messages by Message-ID: <$mid>"); } diff --git a/t/search-thr-index.t b/t/search-thr-index.t index 3ddef80..2aa97bf 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -49,8 +49,8 @@ foreach (reverse split(/\n\n/, $data)) { my $prev; foreach my $mid (@mids) { - my $res = $rw->{over}->get_thread($mid); - is(3, $res->{total}, "got all messages from $mid"); + my $msgs = $rw->{over}->get_thread($mid); + is(3, scalar(@$msgs), "got all messages from $mid"); } $rw->commit_txn_lazy; diff --git a/t/search.t b/t/search.t index 51adb9f..c9bef71 100644 --- a/t/search.t +++ b/t/search.t @@ -82,8 +82,8 @@ my $rw_commit = sub { } sub filter_mids { - my ($res) = @_; - sort(map { $_->mid } @{$res->{msgs}}); + my ($msgs) = @_; + sort(map { $_->mid } @$msgs); } { @@ -106,12 +106,12 @@ sub filter_mids { is_deeply(\@res, \@exp, 'got expected results for s:"" match'); $res = $ro->query('s:"Hello world"', {limit => 1}); - is(scalar @{$res->{msgs}}, 1, "limit works"); - my $first = $res->{msgs}->[0]; + is(scalar @$res, 1, "limit works"); + my $first = $res->[0]; $res = $ro->query('s:"Hello world"', {offset => 1}); - is(scalar @{$res->{msgs}}, 1, "offset works"); - my $second = $res->{msgs}->[0]; + is(scalar @$res, 1, "offset works"); + my $second = $res->[0]; isnt($first, $second, "offset returned different result from limit"); } @@ -147,7 +147,7 @@ sub filter_mids { my $ghost_id = $rw->add_message($was_ghost); is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id"); - my $msgs = $rw->{over}->get_thread('ghost-message@s')->{msgs}; + my $msgs = $rw->{over}->get_thread('ghost-message@s'); is(scalar(@$msgs), 2, 'got both messages in ghost thread'); foreach (qw(sid tid)) { is($msgs->[0]->{$_}, $msgs->[1]->{$_}, "{$_} match"); @@ -169,7 +169,7 @@ sub filter_mids { # body $res = $ro->query('goodbye'); - is($res->{msgs}->[0]->mid, 'last@s', 'got goodbye message body'); + is($res->[0]->mid, 'last@s', 'got goodbye message body'); } # long message-id @@ -215,7 +215,7 @@ sub filter_mids { $rw_commit->(); $ro->reopen; my $t = $ro->get_thread('root@s'); - is($t->{total}, 4, "got all 4 mesages in thread"); + is(scalar(@$t), 4, "got all 4 mesages in thread"); my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid); @res = filter_mids($t); is_deeply(\@res, \@exp, "get_thread works"); @@ -244,13 +244,13 @@ sub filter_mids { ], body => "theatre\nfade\n")); my $res = $rw->query("theatre"); - is($res->{total}, 2, "got both matches"); - is($res->{msgs}->[0]->mid, 'nquote@a', "non-quoted scores higher"); - is($res->{msgs}->[1]->mid, 'quote@a', "quoted result still returned"); + is(scalar(@$res), 2, "got both matches"); + is($res->[0]->mid, 'nquote@a', "non-quoted scores higher"); + is($res->[1]->mid, 'quote@a', "quoted result still returned"); $res = $rw->query("illusions"); - is($res->{total}, 1, "got a match for quoted text"); - is($res->{msgs}->[0]->mid, 'quote@a', + is(scalar(@$res), 1, "got a match for quoted text"); + is($res->[0]->mid, 'quote@a', "quoted result returned if nothing else"); } @@ -293,34 +293,34 @@ sub filter_mids { } { - my $res = $ro->query('d:19931002..20101002'); - ok(scalar @{$res->{msgs}} > 0, 'got results within range'); - $res = $ro->query('d:20101003..'); - is(scalar @{$res->{msgs}}, 0, 'nothing after 20101003'); - $res = $ro->query('d:..19931001'); - is(scalar @{$res->{msgs}}, 0, 'nothing before 19931001'); + my $msgs = $ro->query('d:19931002..20101002'); + ok(scalar(@$msgs) > 0, 'got results within range'); + $msgs = $ro->query('d:20101003..'); + is(scalar(@$msgs), 0, 'nothing after 20101003'); + $msgs = $ro->query('d:..19931001'); + is(scalar(@$msgs), 0, 'nothing before 19931001'); } # names and addresses { my $res = $ro->query('t:list@example.com'); - is(scalar @{$res->{msgs}}, 6, 'searched To: successfully'); - foreach my $smsg (@{$res->{msgs}}) { + is(scalar @$res, 6, 'searched To: successfully'); + foreach my $smsg (@$res) { like($smsg->to, qr/\blist\@example\.com\b/, 'to appears'); } $res = $ro->query('tc:list@example.com'); - is(scalar @{$res->{msgs}}, 6, 'searched To+Cc: successfully'); - foreach my $smsg (@{$res->{msgs}}) { + is(scalar @$res, 6, 'searched To+Cc: successfully'); + foreach my $smsg (@$res) { my $tocc = join("\n", $smsg->to, $smsg->cc); like($tocc, qr/\blist\@example\.com\b/, 'tocc appears'); } foreach my $pfx ('tcf:', 'c:') { $res = $ro->query($pfx . 'foo@example.com'); - is(scalar @{$res->{msgs}}, 1, + is(scalar @$res, 1, "searched $pfx successfully for Cc:"); - foreach my $smsg (@{$res->{msgs}}) { + foreach my $smsg (@$res) { like($smsg->cc, qr/\bfoo\@example\.com\b/, 'cc appears'); } @@ -328,9 +328,9 @@ sub filter_mids { foreach my $pfx ('', 'tcf:', 'f:') { $res = $ro->query($pfx . 'Laggy'); - is(scalar @{$res->{msgs}}, 1, + is(scalar(@$res), 1, "searched $pfx successfully for From:"); - foreach my $smsg (@{$res->{msgs}}) { + foreach my $smsg (@$res) { like($smsg->from, qr/Laggy Sender/, "From appears with $pfx"); } @@ -341,23 +341,23 @@ sub filter_mids { $rw_commit->(); $ro->reopen; my $res = $ro->query('b:hello'); - is(scalar @{$res->{msgs}}, 0, 'no match on body search only'); + is(scalar(@$res), 0, 'no match on body search only'); $res = $ro->query('bs:smith'); - is(scalar @{$res->{msgs}}, 0, + is(scalar(@$res), 0, 'no match on body+subject search for From'); $res = $ro->query('q:theatre'); - is(scalar @{$res->{msgs}}, 1, 'only one quoted body'); - like($res->{msgs}->[0]->from, qr/\AQuoter/, 'got quoted body'); + is(scalar(@$res), 1, 'only one quoted body'); + like($res->[0]->from, qr/\AQuoter/, 'got quoted body'); $res = $ro->query('nq:theatre'); - is(scalar @{$res->{msgs}}, 1, 'only one non-quoted body'); - like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body'); + is(scalar @$res, 1, 'only one non-quoted body'); + like($res->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body'); foreach my $pfx (qw(b: bs:)) { $res = $ro->query($pfx . 'theatre'); - is(scalar @{$res->{msgs}}, 2, "searched both bodies for $pfx"); - like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, + is(scalar @$res, 2, "searched both bodies for $pfx"); + like($res->[0]->from, qr/\ANon-Quoter/, "non-quoter first for $pfx"); } } @@ -396,13 +396,13 @@ sub filter_mids { $rw_commit->(); $ro->reopen; my $n = $ro->query('n:attached_fart.txt'); - is(scalar @{$n->{msgs}}, 1, 'got result for n:'); + is(scalar @$n, 1, 'got result for n:'); my $res = $ro->query('part_deux.txt'); - is(scalar @{$res->{msgs}}, 1, 'got result without n:'); - is($n->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + is(scalar @$res, 1, 'got result without n:'); + is($n->[0]->mid, $res->[0]->mid, 'same result with and without'); my $txt = $ro->query('"inside another"'); - is($txt->{msgs}->[0]->mid, $res->{msgs}->[0]->mid, + is($txt->[0]->mid, $res->[0]->mid, 'search inside text attachments works'); } $rw->commit_txn_lazy; diff --git a/t/v2-add-remove-add.t b/t/v2-add-remove-add.t index b6c5887..c8d12d3 100644 --- a/t/v2-add-remove-add.t +++ b/t/v2-add-remove-add.t @@ -35,8 +35,8 @@ ok($im->add($mime), 'message added'); ok($im->remove($mime), 'message added'); ok($im->add($mime), 'message added again'); $im->done; -my $res = $ibx->recent({limit => 1000}); -is($res->{msgs}->[0]->{mid}, 'a-mid@b', 'message exists in history'); -is(scalar @{$res->{msgs}}, 1, 'only one message in history'); +my $msgs = $ibx->recent({limit => 1000}); +is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); +is(scalar @$msgs, 1, 'only one message in history'); done_testing(); diff --git a/t/v2mda.t b/t/v2mda.t index be27ca0..ca1bb09 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -52,8 +52,8 @@ ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr), 'mda delivered a message'); $ibx = PublicInbox::Inbox->new($ibx); -my $res = $ibx->search->query(''); -my $saved = $ibx->smsg_mime($res->{msgs}->[0]); +my $msgs = $ibx->search->query(''); +my $saved = $ibx->smsg_mime($msgs->[0]); is($saved->{mime}->as_string, $mime->as_string, 'injected message'); done_testing(); diff --git a/t/v2writable.t b/t/v2writable.t index 7e29ef7..1e8e404 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -249,7 +249,7 @@ EOF ok($im->add($mime), 'add excessively long References'); $im->barrier; - my $msgs = $ibx->search->reopen->get_thread('x'x244)->{msgs}; + my $msgs = $ibx->search->reopen->get_thread('x'x244); is(2, scalar(@$msgs), 'got both messages'); is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid'); is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref'); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index 85130e3..a76e413 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -50,8 +50,8 @@ ok($ibx, 'found inbox by name'); my $srch = $ibx->search; PublicInbox::WatchMaildir->new($config)->scan('full'); -my $res = $srch->reopen->query(''); -is($res->{total}, 1, 'got one revision'); +my ($total, undef) = $srch->reopen->query(''); +is($total, 1, 'got one revision'); # my $git = PublicInbox::Git->new("$mainrepo/git/0.git"); # my @list = $git->qx(qw(rev-list refs/heads/master)); @@ -70,7 +70,7 @@ my $write_spam = sub { $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::WatchMaildir->new($config)->scan('full'); -is($srch->reopen->query('')->{total}, 0, 'deleted file'); +is(($srch->reopen->query(''))[0], 0, 'deleted file'); # check with scrubbing { @@ -80,16 +80,16 @@ the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); - $res = $srch->reopen->query(''); - is($res->{total}, 1, 'got one file back'); - my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]); + my ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 1, 'got one file back'); + my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::WatchMaildir->new($config)->scan('full'); - $res = $srch->reopen->query(''); - is($res->{total}, 0, 'inbox is empty again'); + ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 0, 'inbox is empty again'); } { @@ -103,8 +103,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::WatchMaildir->new($config)->scan('full'); } - $res = $srch->reopen->query(''); - is($res->{total}, 0, 'inbox is still empty'); + ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 0, 'inbox is still empty'); is(unlink(glob("$maildir/new/*")), 1); } @@ -116,9 +116,9 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; PublicInbox::WatchMaildir->new($config)->scan('full'); - $res = $srch->reopen->query(''); - is($res->{total}, 1, 'inbox has one mail after spamc OK-ed a message'); - my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]); + ($nr, $msgs) = $srch->reopen->query(''); + is($nr, 1, 'inbox has one mail after spamc OK-ed a message'); + my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); } -- EW
400ms to load the top-level /$INBOX/ for /git/ is pretty bad on my systems as-is. 800ms for /lkml/ was not unacceptable (though probably 400ms on decent servers). It's now around 60ms across the board with the new overview DB. This also affects OVER/XOVER/NEWNEWS NNTP commands, as well as Atom feeds. Eric Wong (Contractor, The Linux Foundation) (5): replace Xapian skeleton with SQLite overview DB v2writable: simplify barrier vs checkpoints t/over: test empty Subject: line matching www: rework query responses to avoid COUNT in SQLite over: speedup get_thread by avoiding JOIN MANIFEST | 5 +- lib/PublicInbox/Feed.pm | 11 +- lib/PublicInbox/Inbox.pm | 15 +- lib/PublicInbox/Mbox.pm | 12 +- lib/PublicInbox/Msgmap.pm | 1 + lib/PublicInbox/NNTP.pm | 29 +- lib/PublicInbox/Over.pm | 134 ++++++++ lib/PublicInbox/OverIdx.pm | 370 +++++++++++++++++++++ .../{SearchIdxSkeleton.pm => OverIdxFork.pm} | 131 +++++--- lib/PublicInbox/Search.pm | 124 ++----- lib/PublicInbox/SearchIdx.pm | 214 +++--------- lib/PublicInbox/SearchIdxPart.pm | 16 +- lib/PublicInbox/SearchMsg.pm | 26 +- lib/PublicInbox/V2Writable.pm | 114 +++---- lib/PublicInbox/View.pm | 17 +- script/public-inbox-compact | 32 +- t/altid.t | 9 +- t/convert-compact.t | 6 +- t/over.t | 63 ++++ t/psgi_search.t | 6 +- t/search-thr-index.t | 9 +- t/search.t | 92 ++--- t/v2-add-remove-add.t | 6 +- t/v2mda.t | 4 +- t/v2writable.t | 2 +- t/watch_maildir_v2.t | 26 +- 26 files changed, 907 insertions(+), 567 deletions(-) create mode 100644 lib/PublicInbox/Over.pm create mode 100644 lib/PublicInbox/OverIdx.pm rename lib/PublicInbox/{SearchIdxSkeleton.pm => OverIdxFork.pm} (54%) create mode 100644 t/over.t -- EW