* [PATCH 3/5] over: recent: remove expensive COUNT query
@ 2020-08-26 8:17 4% ` Eric Wong
0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2020-08-26 8:17 UTC (permalink / raw)
To: meta
As noted in commit 87dca6d8d5988c5eb54019cca342450b0b7dd6b7
("www: rework query responses to avoid COUNT in SQLite"),
COUNT on many rows is expensive on big SQLite DBs.
We've already stopped using that code path long ago in WWW
while -imapd and -nntpd never used it. So we'll adjust our
remaining test cases to not need it, either.
---
lib/PublicInbox/Over.pm | 8 +-------
t/indexlevels-mirror.t | 29 +++++++++++++++--------------
t/v2writable.t | 4 +++-
t/watch_maildir_v2.t | 30 +++++++++++++++---------------
4 files changed, 34 insertions(+), 37 deletions(-)
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index a2cf9f21..6b7d5216 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -244,15 +244,9 @@ sub recent {
$s = '+num > 0 ORDER BY ts DESC';
}
}
- my $msgs = do_get($self, <<"", $opts, @v);
+ do_get($self, <<"", $opts, @v);
SELECT ts,ds,ddd FROM over WHERE $s
- return $msgs unless wantarray;
-
- my $nr = $self->{dbh}->selectrow_array(<<'');
-SELECT COUNT(num) FROM over WHERE num > 0
-
- ($nr, $msgs);
}
sub get_art {
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index 859c2c17..27533546 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -49,8 +49,8 @@ my $import_index_incremental = sub {
inboxdir => $ibx->{inboxdir},
indexlevel => $level
});
- my ($nr, $msgs) = $ro_master->recent;
- is($nr, 1, 'only one message in master, so far');
+ my $msgs = $ro_master->recent;
+ is(scalar(@$msgs), 1, 'only one message in master, so far');
is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed');
# clone
@@ -79,8 +79,8 @@ my $import_index_incremental = sub {
inboxdir => $mirror,
indexlevel => $level,
});
- ($nr, $msgs) = $ro_mirror->recent;
- is($nr, 1, 'only one message, so far');
+ $msgs = $ro_mirror->recent;
+ is(scalar(@$msgs), 1, 'only one message, so far');
is($msgs->[0]->{mid}, 'm@1', 'read first message');
# update master
@@ -91,16 +91,16 @@ my $import_index_incremental = sub {
# mirror updates
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
- ($nr, $msgs) = $ro_mirror->recent;
- is($nr, 2, '2nd message seen in mirror');
+ $msgs = $ro_mirror->recent;
+ is(scalar(@$msgs), 2, '2nd message seen in mirror');
is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
['m@1','m@2'], 'got both messages in mirror');
# incremental index master (required for v1)
ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]),
'index master OK');
- ($nr, $msgs) = $ro_master->recent;
- is($nr, 2, '2nd message seen in master');
+ $msgs = $ro_master->recent;
+ is(scalar(@$msgs), 2, '2nd message seen in master');
is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
['m@1','m@2'], 'got both messages in master');
@@ -121,15 +121,15 @@ my $import_index_incremental = sub {
is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level,
'indexlevel detectable by Admin after xcpdb v' .$v.$level);
delete $ro_mirror->{$_} for (qw(over search));
- ($nr, $msgs) = $ro_mirror->search->query('m:m@2');
- is($nr, 1, "v$v found m\@2 via Xapian on $level");
+ $msgs = $ro_mirror->search->query('m:m@2');
+ is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level");
}
# sync the mirror
is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK');
ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK");
- ($nr, $msgs) = $ro_mirror->recent;
- is($nr, 1, '2nd message gone from mirror');
+ $msgs = $ro_mirror->recent;
+ is(scalar(@$msgs), 1, '2nd message gone from mirror');
is_deeply([map { $_->{mid} } @$msgs], ['m@1'],
'message unavailable in mirror');
@@ -138,8 +138,9 @@ my $import_index_incremental = sub {
'no Xapian shard directories for v2 basic');
}
if ($level ne 'basic') {
- ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2');
- is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level");
+ $msgs = $ro_mirror->search->reopen->query('m:m@2');
+ is(scalar(@$msgs), 0,
+ "v$v m\@2 gone from Xapian in mirror on $level");
}
# add another message to master and have the mirror
diff --git a/t/v2writable.t b/t/v2writable.t
index 2bd7a400..9e4547ba 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -120,7 +120,9 @@ if ('ensure git configs are correct') {
$mime->header_set('References', '<zz-mid@b>');
ok($im->add($mime), 'message with multiple Message-ID');
$im->done;
- my ($total, undef) = $ibx->over->recent;
+ my $total = $ibx->over->dbh->selectrow_array(<<'');
+SELECT COUNT(*) FROM over WHERE num > 0
+
is($ibx->mm->num_highwater, $total, 'got expected highwater value');
my $srch = $ibx->search;
my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 });
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index f5b8e932..59ec247e 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -50,7 +50,7 @@ ok($ibx, 'found inbox by name');
my $srch = $ibx->search;
PublicInbox::WatchMaildir->new($config)->scan('full');
-my ($total, undef) = $srch->reopen->query('');
+my $total = scalar @{$srch->reopen->query('')};
is($total, 1, 'got one revision');
# my $git = PublicInbox::Git->new("$inboxdir/git/0.git");
@@ -70,7 +70,7 @@ my $write_spam = sub {
$write_spam->();
is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
PublicInbox::WatchMaildir->new($config)->scan('full');
-is(($srch->reopen->query(''))[0], 0, 'deleted file');
+is_deeply($srch->reopen->query(''), [], 'deleted file');
is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
# check with scrubbing
@@ -81,16 +81,16 @@ the body of a message to majordomo\@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 1, 'got one file back');
+ my $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 1, 'got one file back');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
$write_spam->();
PublicInbox::WatchMaildir->new($config)->scan('full');
- ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 0, 'inbox is empty again');
+ $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 0, 'inbox is empty again');
is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
}
@@ -105,8 +105,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
local $SIG{__WARN__} = sub {}; # quiet spam check warning
PublicInbox::WatchMaildir->new($config)->scan('full');
}
- my ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 0, 'inbox is still empty');
+ my $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 0, 'inbox is still empty');
is(unlink(glob("$maildir/new/*")), 1);
}
@@ -118,8 +118,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
$config->{'publicinboxwatch.spamcheck'} = 'spamc';
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($nr, $msgs) = $srch->reopen->query('');
- is($nr, 1, 'inbox has one mail after spamc OK-ed a message');
+ my $msgs = $srch->reopen->query('');
+ is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
delete $config->{'publicinboxwatch.spamcheck'};
@@ -131,11 +131,11 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
$msg = do { local $/; <$fh> };
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7');
- is($nr, 1, 'diff postimage found');
+ my $msgs = $srch->reopen->query('dfpost:6e006fd7');
+ is(scalar(@$msgs), 1, 'diff postimage found');
my $post = $msgs->[0];
- ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c');
- is($nr, 1, 'diff preimage found');
+ $msgs = $srch->query('dfpre:090d998b6c2c');
+ is(scalar(@$msgs), 1, 'diff preimage found');
is($post->{blob}, $msgs->[0]->{blob}, 'same message');
}
@@ -162,7 +162,7 @@ both
EOF
PublicInbox::Emergency->new($maildir)->prepare(\$both);
PublicInbox::WatchMaildir->new($config)->scan('full');
- my ($total, $msgs) = $srch->reopen->query('m:both@b.com');
+ my $msgs = $srch->reopen->query('m:both@b.com');
my $v1 = $config->lookup_name('v1');
my $msg = $v1->git->cat_file($msgs->[0]->{blob});
is($both, $$msg, 'got original message back from v1');
^ permalink raw reply related [relevance 4%]
* [PATCH] doc: add release notes directory
@ 2019-09-14 19:50 7% Eric Wong
0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2019-09-14 19:50 UTC (permalink / raw)
To: meta
The v1.2.0 is a work-in-progress, while the others are copied
out of our mail archives.
Eventually, a NEWS file will be generated from these emails and
distributed in the release tarball. There'll also be an Atom
feed for the website reusing our feed generation code.
---
.gitattributes | 2 +
Documentation/RelNotes/v1.0.0.eml | 21 ++
Documentation/RelNotes/v1.1.0-pre1.eml | 295 +++++++++++++++++++++++++
Documentation/RelNotes/v1.2.0.wip | 40 ++++
MANIFEST | 4 +
5 files changed, 362 insertions(+)
create mode 100644 .gitattributes
create mode 100644 Documentation/RelNotes/v1.0.0.eml
create mode 100644 Documentation/RelNotes/v1.1.0-pre1.eml
create mode 100644 Documentation/RelNotes/v1.2.0.wip
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..bb53518
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Email signatures start with "-- \n"
+*.eml whitespace=-blank-at-eol
diff --git a/Documentation/RelNotes/v1.0.0.eml b/Documentation/RelNotes/v1.0.0.eml
new file mode 100644
index 0000000..ae6ea4e
--- /dev/null
+++ b/Documentation/RelNotes/v1.0.0.eml
@@ -0,0 +1,21 @@
+From e@80x24.org Thu Feb 8 02:33:57 2018
+Date: Thu, 8 Feb 2018 02:33:57 +0000
+From: Eric Wong <e@80x24.org>
+To: meta@public-inbox.org
+Subject: [ANNOUNCE] public-inbox 1.0.0
+Message-ID: <20180208023357.GA32591@80x24.org>
+
+After some 3.5 odd years of working on this, I suppose now is
+as good a time as any to tar this up and call it 1.0.0.
+
+The TODO list is still very long and there'll be some new
+development in coming weeks :>
+
+So, here you have a release:
+
+ https://public-inbox.org/releases/public-inbox-1.0.0.tar.gz
+
+Checksums, mainly as a safeguard against accidental file corruption:
+
+SHA-256 4a08569f3d99310f713bb32bec0aa4819d6b41871e0421ec4eec0657a5582216
+ (in other words, don't trust me; instead read the code :>)
diff --git a/Documentation/RelNotes/v1.1.0-pre1.eml b/Documentation/RelNotes/v1.1.0-pre1.eml
new file mode 100644
index 0000000..ee1ecc3
--- /dev/null
+++ b/Documentation/RelNotes/v1.1.0-pre1.eml
@@ -0,0 +1,295 @@
+From e@80x24.org Wed May 9 20:23:03 2018
+Date: Wed, 9 May 2018 20:23:03 +0000
+From: Eric Wong <e@80x24.org>
+To: meta@public-inbox.org
+Cc: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
+Subject: [ANNOUNCE] public-inbox 1.1.0-pre1
+Message-ID: <20180509202303.GA15156@dcvr>
+
+Pre-release for v2 repository support.
+Thanks to The Linux Foundation for supporting this work!
+
+https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz
+
+SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009
+(only intended to guard against accidental file corruption)
+
+shortlog below:
+
+Eric Wong (27):
+ nntp: improve fairness during XOVER and similar commands
+ nntp: do not drain rbuf if there is a command pending
+ extmsg: use news.gmane.org for Message-ID lookups
+ searchview: fix non-numeric comparison
+ mbox: do not barf on queries which return no results
+ nntp: allow and ignore empty commands
+ ensure SQLite and Xapian files respect core.sharedRepository
+ TODO: a few more updates
+ filter/rubylang: do not set altid on spam training
+ import: cleanup git cat-file processes when ->done
+ disallow "\t" and "\n" in OVER headers
+ searchidx: release lock again during v1 batch callback
+ searchidx: remove leftover debugging code
+ convert: copy description and git config from v1 repo
+ view: untangle loop when showing message headers
+ view: wrap To: and Cc: headers in HTML display
+ view: drop redundant References: display code
+ TODO: add EPOLLEXCLUSIVE item
+ searchview: do not blindly append "l" parameter to URL
+ search: avoid repeated mbox results from search
+ msgmap: add limit to response for NNTP
+ thread: prevent hidden threads in /$INBOX/ landing page
+ thread: sort incoming messages by Date
+ searchidx: preserve umask when starting/committing transactions
+ scripts/import_slrnspool: support v2 repos
+ scripts/import_slrnspool: cleanup progress messages
+ public-inbox 1.1.0-pre1
+
+Eric Wong (Contractor, The Linux Foundation) (239):
+ AUTHORS: add The Linux Foundation
+ watch_maildir: allow '-' in mail filename
+ scripts/import_vger_from_mbox: relax From_ line match slightly
+ import: stop writing legacy ssoma.index by default
+ import: begin supporting this without ssoma.lock
+ import: initial handling for v2
+ t/import: test for last_object_id insertion
+ content_id: add test case
+ searchmsg: add mid_mime import for _extract_mid
+ scripts/import_vger_from_mbox: support --dry-run option
+ import: APIs to support v2 use
+ search: free up 'Q' prefix for a real unique identifier
+ searchidx: fix comment around next_thread_id
+ address: extract more characters from email addresses
+ import: pass "raw" dates to git-fast-import(1)
+ scripts/import_vger_from_mbox: use v2 layout for import
+ import: quiet down warnings from bogus From: lines
+ import: allow the epoch (0s) as a valid time
+ extmsg: fix broken Xapian MID lookup
+ search: stop assuming Message-ID is unique
+ www: stop assuming mainrepo == git_dir
+ v2writable: initial cut for repo-rotation
+ git: reload alternates file on missing blob
+ v2: support Xapian + SQLite indexing
+ import_vger_from_inbox: allow "-V" option
+ import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering
+ v2: parallelize Xapian indexing
+ v2writable: round-robin to partitions based on article number
+ searchidxpart: increase pipe size for partitions
+ v2writable: warn on duplicate Message-IDs
+ searchidx: do not modify Xapian DB while iterating
+ v2/ui: some hacky things to get the PSGI UI to show up
+ v2/ui: retry DB reopens in a few more places
+ v2writable: cleanup unused pipes in partitions
+ searchidxpart: binmode
+ use PublicInbox::MIME consistently
+ searchidxpart: chomp line before splitting
+ searchidx*: name child subprocesses
+ searchidx: get rid of pointless index_blob wrapper
+ view: remove X-PI-TS reference
+ searchidxthread: load doc data for references
+ searchidxpart: force integers into add_message
+ search: reopen skeleton DB as well
+ searchidx: index values in the threader
+ search: use different Enquire object for skeleton queries
+ rename SearchIdxThread to SearchIdxSkeleton
+ v2writable: commit to skeleton via remote partitions
+ searchidxskeleton: extra error checking
+ searchidx: do not modify Xapian DB while iterating
+ search: query_xover uses skeleton DB iff available
+ v2/ui: get nntpd and init tests running on v2
+ v2writable: delete ::Import obj when ->done
+ search: remove informational "warning" message
+ searchidx: add PID to error message when die-ing
+ content_id: special treatment for Message-Id headers
+ evcleanup: disable outside of daemon
+ v2writable: deduplicate detection on add
+ evcleanup: do not create event loop if nothing was registered
+ mid: add `mids' and `references' methods for extraction
+ content_id: use `mids' and `references' for MID extraction
+ searchidx: use new `references' method for parsing References
+ content_id: no need to be human-friendly
+ v2writable: inject new Message-IDs on true duplicates
+ search: revert to using 'Q' as a uniQue id per-Xapian conventions
+ searchidx: support indexing multiple MIDs
+ mid: be strict with References, but loose on Message-Id
+ searchidx: avoid excessive XNQ indexing with diffs
+ searchidxskeleton: add a note about locking
+ v2writable: generated Message-ID goes first
+ searchidx: use add_boolean_term for internal terms
+ searchidx: add NNTP article number as a searchable term
+ mid: truncate excessively long MIDs early
+ nntp: use NNTP article numbers for lookups
+ nntp: fix NEWNEWS command
+ searchidx: store the primary MID in doc data for NNTP
+ import: consolidate object info for v2 imports
+ v2: avoid redundant/repeated configs for git partition repos
+ INSTALL: document more optional dependencies
+ search: favor skeleton DB for lookup_mail
+ search: each_smsg_by_mid uses skeleton if available
+ v2writable: remove unnecessary skeleton commit
+ favor Received: date over Date: header globally
+ import: fall back to Sender for extracting name and email
+ scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping
+ v2writable: detect and use previous partition count
+ extmsg: rework partial MID matching to favor current inbox
+ extmsg: rework partial MID matching to favor current inbox
+ content_id: use Sender header if From is not available
+ v2writable: support "barrier" operation to avoid reforking
+ use string ref for Email::Simple->new
+ v2writable: remove unnecessary idx_init call
+ searchidx: do not delete documents while iterating
+ search: allow ->reopen to be chainable
+ v2writable: implement remove correctly
+ skeleton: barrier init requires a lock
+ import: (v2) delete writes the blob into history in subdir
+ import: (v2): write deletes to a separate '_' subdirectory
+ import: implement barrier operation for v1 repos
+ mid: mid_mime uses v2-compatible mids function
+ watchmaildir: use content_digest to generate Message-Id
+ import: force Message-ID generation for v1 here
+ import: switch to URL-safe Base64 for Message-IDs
+ v2writable: test for idempotent removals
+ import: enable locking under v2
+ index: s/GIT_DIR/REPO_DIR/
+ Lock: new base class for writable lockers
+ t/watch_maildir: note the reason for FIFO creation
+ v2writable: ensure ->done is idempotent
+ watchmaildir: support v2 repositories
+ searchidxpart: s/barrier/remote_barrier/
+ v2writable: allow disabling parallelization
+ scripts/import_vger_from_mbox: filter out same headers as MDA
+ v2writable: add DEBUG_DIFF env support
+ v2writable: remove "resent" message for duplicate Message-IDs
+ content_id: do not take Message-Id into account
+ introduce InboxWritable class
+ import: discard all the same headers as MDA
+ InboxWritable: add mbox/maildir parsing + import logic
+ use both Date: and Received: times
+ msgmap: add tmp_clone to create an anonymous copy
+ fix syntax warnings
+ v2writable: support reindexing Xapian
+ t/altid.t: extra tests for mid_set
+ v2writable: add NNTP article number regeneration support
+ v2writable: clarify header cleanups
+ v2writable: DEBUG_DIFF respects $TMPDIR
+ feed: $INBOX/new.atom endpoint supports v2 inboxes
+ import: consolidate mid prepend logic, here
+ www: $MESSAGE_ID/raw endpoint supports "duplicates"
+ search: reopen DB if each_smsg_by_mid fails
+ t/psgi_v2: minimal test for Atom feed and t.mbox.gz
+ feed: fix new.html for v2
+ view: permalink (per-message) view shows multiple messages
+ searchidx: warn about vivifying multiple ghosts
+ v2writable: warn on unseen deleted files
+ www: get rid of unnecessary 'inbox' name reference
+ searchview: remove unnecessary imports from MID module
+ view: depend on SearchMsg for Message-ID
+ http: fix modification of read-only value
+ githttpbackend: avoid infinite loop on generic PSGI servers
+ www: support cloning individual v2 git partitions
+ http: fix modification of read-only value
+ githttpbackend: avoid infinite loop on generic PSGI servers
+ www: remove unnecessary ghost checks
+ v2writable: append, instead of prepending generated Message-ID
+ lookup by Message-ID favors the "primary" one
+ www: fix attachment downloads for conflicted Message-IDs
+ searchmsg: document why we store To: and Cc: for NNTP
+ public-inbox-convert: tool for converting old to new inboxes
+ v2writable: support purging messages from git entirely
+ search: cleanup uniqueness checking
+ search: get rid of most lookup_* subroutines
+ search: move find_doc_ids to searchidx
+ v2writable: cleanup: get rid of unused fields
+ mbox: avoid extracting Message-ID for linkification
+ www: cleanup expensive fallback for legacy URLs
+ view: get rid of some unnecessary imports
+ search: retry_reopen on first_smsg_by_mid
+ import: run_die supports redirects as spawn does
+ v2writable: initializing an existing inbox is idempotent
+ public-inbox-compact: new tool for driving xapian-compact
+ mda: support v2 inboxes
+ search: warn on reopens and die on total failure
+ v2writable: allow gaps in git partitions
+ v2writable: convert some fatal reindex errors to warnings
+ wwwstream: flesh out clone instructions for v2
+ v2writable: go backwards through alternate Message-IDs
+ view: speed up homepage loading time with date clamp
+ view: drop load_results
+ feed: optimize query for feeds, too
+ msgtime: parse 3-digit years properly
+ convert: avoid redundant "done\n" statement for fast-import
+ search: move permissions handling to InboxWritable
+ t/v2writable: use simplify permissions reading
+ v2: respect core.sharedRepository in git configs
+ searchidx: correct warning for over-vivification
+ v2: one file, really
+ v2writable: fix parallel termination
+ truncate Message-IDs and References consistently
+ scripts/import_vger_from_mbox: set address properly
+ search: reduce columns stored in Xapian
+ replace Xapian skeleton with SQLite overview DB
+ v2writable: simplify barrier vs checkpoints
+ t/over: test empty Subject: line matching
+ www: rework query responses to avoid COUNT in SQLite
+ over: speedup get_thread by avoiding JOIN
+ nntp: fix NEWNEWS command
+ t/thread-all.t: modernize test to support modern inboxes
+ rename+rewrite test using Benchmark module
+ nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
+ view: avoid offset during pagination
+ mbox: remove remaining OFFSET usage in SQLite
+ msgmap: replace id_batch with ids_after
+ nntp: simplify the long_response API
+ searchidx: ensure duplicated Message-IDs can be linked together
+ init: s/GIT_DIR/REPO_DIR/ in usage
+ import: rewrite less history during purge
+ v2: support incremental indexing + purge
+ v2writable: do not modify DBs while iterating for ->remove
+ v2writable: recount partitions after acquiring lock
+ searchmsg: remove unused `tid' and `path' methods
+ search: remove unnecessary OP_AND of query
+ mbox: do not sort search results
+ searchview: minor cleanup
+ support altid mechanism for v2
+ compact: better handling of over.sqlite3* files
+ v2writable: remove redundant remove from Over DB
+ v2writable: allow tracking parallel versions
+ v2writable: refer to git each repository as "epoch"
+ over: use only supported and safe SQLite APIs
+ search: index and allow searching by date-time
+ altid: fix miscopied field name
+ nntp: set Xref across multiple inboxes
+ www: favor reading more from SQLite, and less from Xapian
+ ensure Xapian and SQLite are still optional for v1 tests
+ psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological
+ over: avoid excessive SELECT
+ over: remove forked subprocess
+ v2writable: reduce barriers
+ index: allow specifying --jobs=0 to disable multiprocess
+ convert: support converting with altid defined
+ store less data in the Xapian document
+ msgmap: speed up minmax with separate queries
+ feed: respect feedmax, again
+ v1: remove articles from overview DB
+ compact: do not merge v2 repos by default
+ v2writable: reduce partititions by one
+ search: preserve References in Xapian smsg for x=t view
+ v2: generate better Message-IDs for duplicates
+ v2: improve deduplication checks
+ import: cat_blob drops leading 'From ' lines like Inbox
+ searchidx: regenerate and avoid article number gaps on full index
+ extmsg: remove expensive git path checks
+ use %H consistently to disable abbreviations
+ searchidx: increase term positions for all text terms
+ searchidx: revert default BATCH_BYTES to 1_000_000
+ Merge remote-tracking branch 'origin/master' into v2
+ fix tests to run without Xapian installed
+ extmsg: use Xapian only for partial matches
+
+Jonathan Corbet (3):
+ Don't use LIMIT in UPDATE statements
+ Update the installation instructions with Fedora package names
+ Allow specification of the number of search results to return
+--
+git clone https://public-inbox.org/ public-inbox
+(working on a homepage... sorta :)
diff --git a/Documentation/RelNotes/v1.2.0.wip b/Documentation/RelNotes/v1.2.0.wip
new file mode 100644
index 0000000..41236a0
--- /dev/null
+++ b/Documentation/RelNotes/v1.2.0.wip
@@ -0,0 +1,40 @@
+To: meta@public-inbox.org
+Subject: [WIP] public-inbox 1.2.0
+
+* first non-pre/rc release with v2 format support for scalability.
+ See public-inbox-v2-format(5) manpage for more details.
+
+* new admin tools for v2 repos:
+ - public-inbox-convert - converts v1 to v2 repo formats
+ - public-inbox-compact - v2 convenience wrapper for xapian-compact(1)
+ - public-inbox-purge - purges entire messages out of v2 history
+ - public-inbox-edit - edits sensitive data out messages from v2 history
+ - public-inbox-xcpdb - copydatabase(1) wrapper to upgrade Xapian formats
+ (e.g. from "chert" to "glass") and resharding
+ of v2 repos
+
+* SQLite3 support decoupled from Xapian support, and Xapian DBs may be
+ configured without phrase support to save space. See "indexlevel" in
+ public-inbox-config(5) manpage for more info.
+
+* public-inbox-nntpd
+ - support STARTTLS and NNTPS
+ - support COMPRESS extension
+ - fix several RFC3977 compliance bugs
+ - improved interopability with picky clients such as leafnode
+
+* public-inbox-watch
+ - support multiple spam training directories
+ - support mapping multiple inboxes per Maildir
+
+* PublicInbox::WWW
+ - grokmirror-compatible manifest.js.gz endpoint generation
+ - user-configurable color support in $INBOX_URL/_/text/color/
+ - BOFHs may set default colors via "publicinbox.css"
+ (see public-inbox-config(5))
+
+* Danga::Socket is no longer a runtime dependency of daemons.
+
+* improved FreeBSD support
+
+See archives at https://public-inbox.org/meta/ for all history.
diff --git a/MANIFEST b/MANIFEST
index f5290b4..ecf239f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,7 +1,11 @@
+.gitattributes
.gitignore
AUTHORS
COPYING
Documentation/.gitignore
+Documentation/RelNotes/v1.0.0.eml
+Documentation/RelNotes/v1.1.0-pre1.eml
+Documentation/RelNotes/v1.2.0.wip
Documentation/dc-dlvr-spam-flow.txt
Documentation/design_notes.txt
Documentation/design_www.txt
--
EW
^ permalink raw reply related [relevance 7%]
* [ANNOUNCE] public-inbox 1.1.0-pre1
@ 2018-05-09 20:23 5% Eric Wong
0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2018-05-09 20:23 UTC (permalink / raw)
To: meta; +Cc: Konstantin Ryabitsev
Pre-release for v2 repository support.
Thanks to The Linux Foundation for supporting this work!
https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz
SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009
(only intended to guard against accidental file corruption)
shortlog below:
Eric Wong (27):
nntp: improve fairness during XOVER and similar commands
nntp: do not drain rbuf if there is a command pending
extmsg: use news.gmane.org for Message-ID lookups
searchview: fix non-numeric comparison
mbox: do not barf on queries which return no results
nntp: allow and ignore empty commands
ensure SQLite and Xapian files respect core.sharedRepository
TODO: a few more updates
filter/rubylang: do not set altid on spam training
import: cleanup git cat-file processes when ->done
disallow "\t" and "\n" in OVER headers
searchidx: release lock again during v1 batch callback
searchidx: remove leftover debugging code
convert: copy description and git config from v1 repo
view: untangle loop when showing message headers
view: wrap To: and Cc: headers in HTML display
view: drop redundant References: display code
TODO: add EPOLLEXCLUSIVE item
searchview: do not blindly append "l" parameter to URL
search: avoid repeated mbox results from search
msgmap: add limit to response for NNTP
thread: prevent hidden threads in /$INBOX/ landing page
thread: sort incoming messages by Date
searchidx: preserve umask when starting/committing transactions
scripts/import_slrnspool: support v2 repos
scripts/import_slrnspool: cleanup progress messages
public-inbox 1.1.0-pre1
Eric Wong (Contractor, The Linux Foundation) (239):
AUTHORS: add The Linux Foundation
watch_maildir: allow '-' in mail filename
scripts/import_vger_from_mbox: relax From_ line match slightly
import: stop writing legacy ssoma.index by default
import: begin supporting this without ssoma.lock
import: initial handling for v2
t/import: test for last_object_id insertion
content_id: add test case
searchmsg: add mid_mime import for _extract_mid
scripts/import_vger_from_mbox: support --dry-run option
import: APIs to support v2 use
search: free up 'Q' prefix for a real unique identifier
searchidx: fix comment around next_thread_id
address: extract more characters from email addresses
import: pass "raw" dates to git-fast-import(1)
scripts/import_vger_from_mbox: use v2 layout for import
import: quiet down warnings from bogus From: lines
import: allow the epoch (0s) as a valid time
extmsg: fix broken Xapian MID lookup
search: stop assuming Message-ID is unique
www: stop assuming mainrepo == git_dir
v2writable: initial cut for repo-rotation
git: reload alternates file on missing blob
v2: support Xapian + SQLite indexing
import_vger_from_inbox: allow "-V" option
import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering
v2: parallelize Xapian indexing
v2writable: round-robin to partitions based on article number
searchidxpart: increase pipe size for partitions
v2writable: warn on duplicate Message-IDs
searchidx: do not modify Xapian DB while iterating
v2/ui: some hacky things to get the PSGI UI to show up
v2/ui: retry DB reopens in a few more places
v2writable: cleanup unused pipes in partitions
searchidxpart: binmode
use PublicInbox::MIME consistently
searchidxpart: chomp line before splitting
searchidx*: name child subprocesses
searchidx: get rid of pointless index_blob wrapper
view: remove X-PI-TS reference
searchidxthread: load doc data for references
searchidxpart: force integers into add_message
search: reopen skeleton DB as well
searchidx: index values in the threader
search: use different Enquire object for skeleton queries
rename SearchIdxThread to SearchIdxSkeleton
v2writable: commit to skeleton via remote partitions
searchidxskeleton: extra error checking
searchidx: do not modify Xapian DB while iterating
search: query_xover uses skeleton DB iff available
v2/ui: get nntpd and init tests running on v2
v2writable: delete ::Import obj when ->done
search: remove informational "warning" message
searchidx: add PID to error message when die-ing
content_id: special treatment for Message-Id headers
evcleanup: disable outside of daemon
v2writable: deduplicate detection on add
evcleanup: do not create event loop if nothing was registered
mid: add `mids' and `references' methods for extraction
content_id: use `mids' and `references' for MID extraction
searchidx: use new `references' method for parsing References
content_id: no need to be human-friendly
v2writable: inject new Message-IDs on true duplicates
search: revert to using 'Q' as a uniQue id per-Xapian conventions
searchidx: support indexing multiple MIDs
mid: be strict with References, but loose on Message-Id
searchidx: avoid excessive XNQ indexing with diffs
searchidxskeleton: add a note about locking
v2writable: generated Message-ID goes first
searchidx: use add_boolean_term for internal terms
searchidx: add NNTP article number as a searchable term
mid: truncate excessively long MIDs early
nntp: use NNTP article numbers for lookups
nntp: fix NEWNEWS command
searchidx: store the primary MID in doc data for NNTP
import: consolidate object info for v2 imports
v2: avoid redundant/repeated configs for git partition repos
INSTALL: document more optional dependencies
search: favor skeleton DB for lookup_mail
search: each_smsg_by_mid uses skeleton if available
v2writable: remove unnecessary skeleton commit
favor Received: date over Date: header globally
import: fall back to Sender for extracting name and email
scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping
v2writable: detect and use previous partition count
extmsg: rework partial MID matching to favor current inbox
extmsg: rework partial MID matching to favor current inbox
content_id: use Sender header if From is not available
v2writable: support "barrier" operation to avoid reforking
use string ref for Email::Simple->new
v2writable: remove unnecessary idx_init call
searchidx: do not delete documents while iterating
search: allow ->reopen to be chainable
v2writable: implement remove correctly
skeleton: barrier init requires a lock
import: (v2) delete writes the blob into history in subdir
import: (v2): write deletes to a separate '_' subdirectory
import: implement barrier operation for v1 repos
mid: mid_mime uses v2-compatible mids function
watchmaildir: use content_digest to generate Message-Id
import: force Message-ID generation for v1 here
import: switch to URL-safe Base64 for Message-IDs
v2writable: test for idempotent removals
import: enable locking under v2
index: s/GIT_DIR/REPO_DIR/
Lock: new base class for writable lockers
t/watch_maildir: note the reason for FIFO creation
v2writable: ensure ->done is idempotent
watchmaildir: support v2 repositories
searchidxpart: s/barrier/remote_barrier/
v2writable: allow disabling parallelization
scripts/import_vger_from_mbox: filter out same headers as MDA
v2writable: add DEBUG_DIFF env support
v2writable: remove "resent" message for duplicate Message-IDs
content_id: do not take Message-Id into account
introduce InboxWritable class
import: discard all the same headers as MDA
InboxWritable: add mbox/maildir parsing + import logic
use both Date: and Received: times
msgmap: add tmp_clone to create an anonymous copy
fix syntax warnings
v2writable: support reindexing Xapian
t/altid.t: extra tests for mid_set
v2writable: add NNTP article number regeneration support
v2writable: clarify header cleanups
v2writable: DEBUG_DIFF respects $TMPDIR
feed: $INBOX/new.atom endpoint supports v2 inboxes
import: consolidate mid prepend logic, here
www: $MESSAGE_ID/raw endpoint supports "duplicates"
search: reopen DB if each_smsg_by_mid fails
t/psgi_v2: minimal test for Atom feed and t.mbox.gz
feed: fix new.html for v2
view: permalink (per-message) view shows multiple messages
searchidx: warn about vivifying multiple ghosts
v2writable: warn on unseen deleted files
www: get rid of unnecessary 'inbox' name reference
searchview: remove unnecessary imports from MID module
view: depend on SearchMsg for Message-ID
http: fix modification of read-only value
githttpbackend: avoid infinite loop on generic PSGI servers
www: support cloning individual v2 git partitions
http: fix modification of read-only value
githttpbackend: avoid infinite loop on generic PSGI servers
www: remove unnecessary ghost checks
v2writable: append, instead of prepending generated Message-ID
lookup by Message-ID favors the "primary" one
www: fix attachment downloads for conflicted Message-IDs
searchmsg: document why we store To: and Cc: for NNTP
public-inbox-convert: tool for converting old to new inboxes
v2writable: support purging messages from git entirely
search: cleanup uniqueness checking
search: get rid of most lookup_* subroutines
search: move find_doc_ids to searchidx
v2writable: cleanup: get rid of unused fields
mbox: avoid extracting Message-ID for linkification
www: cleanup expensive fallback for legacy URLs
view: get rid of some unnecessary imports
search: retry_reopen on first_smsg_by_mid
import: run_die supports redirects as spawn does
v2writable: initializing an existing inbox is idempotent
public-inbox-compact: new tool for driving xapian-compact
mda: support v2 inboxes
search: warn on reopens and die on total failure
v2writable: allow gaps in git partitions
v2writable: convert some fatal reindex errors to warnings
wwwstream: flesh out clone instructions for v2
v2writable: go backwards through alternate Message-IDs
view: speed up homepage loading time with date clamp
view: drop load_results
feed: optimize query for feeds, too
msgtime: parse 3-digit years properly
convert: avoid redundant "done\n" statement for fast-import
search: move permissions handling to InboxWritable
t/v2writable: use simplify permissions reading
v2: respect core.sharedRepository in git configs
searchidx: correct warning for over-vivification
v2: one file, really
v2writable: fix parallel termination
truncate Message-IDs and References consistently
scripts/import_vger_from_mbox: set address properly
search: reduce columns stored in Xapian
replace Xapian skeleton with SQLite overview DB
v2writable: simplify barrier vs checkpoints
t/over: test empty Subject: line matching
www: rework query responses to avoid COUNT in SQLite
over: speedup get_thread by avoiding JOIN
nntp: fix NEWNEWS command
t/thread-all.t: modernize test to support modern inboxes
rename+rewrite test using Benchmark module
nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
view: avoid offset during pagination
mbox: remove remaining OFFSET usage in SQLite
msgmap: replace id_batch with ids_after
nntp: simplify the long_response API
searchidx: ensure duplicated Message-IDs can be linked together
init: s/GIT_DIR/REPO_DIR/ in usage
import: rewrite less history during purge
v2: support incremental indexing + purge
v2writable: do not modify DBs while iterating for ->remove
v2writable: recount partitions after acquiring lock
searchmsg: remove unused `tid' and `path' methods
search: remove unnecessary OP_AND of query
mbox: do not sort search results
searchview: minor cleanup
support altid mechanism for v2
compact: better handling of over.sqlite3* files
v2writable: remove redundant remove from Over DB
v2writable: allow tracking parallel versions
v2writable: refer to git each repository as "epoch"
over: use only supported and safe SQLite APIs
search: index and allow searching by date-time
altid: fix miscopied field name
nntp: set Xref across multiple inboxes
www: favor reading more from SQLite, and less from Xapian
ensure Xapian and SQLite are still optional for v1 tests
psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological
over: avoid excessive SELECT
over: remove forked subprocess
v2writable: reduce barriers
index: allow specifying --jobs=0 to disable multiprocess
convert: support converting with altid defined
store less data in the Xapian document
msgmap: speed up minmax with separate queries
feed: respect feedmax, again
v1: remove articles from overview DB
compact: do not merge v2 repos by default
v2writable: reduce partititions by one
search: preserve References in Xapian smsg for x=t view
v2: generate better Message-IDs for duplicates
v2: improve deduplication checks
import: cat_blob drops leading 'From ' lines like Inbox
searchidx: regenerate and avoid article number gaps on full index
extmsg: remove expensive git path checks
use %H consistently to disable abbreviations
searchidx: increase term positions for all text terms
searchidx: revert default BATCH_BYTES to 1_000_000
Merge remote-tracking branch 'origin/master' into v2
fix tests to run without Xapian installed
extmsg: use Xapian only for partial matches
Jonathan Corbet (3):
Don't use LIMIT in UPDATE statements
Update the installation instructions with Fedora package names
Allow specification of the number of search results to return
--
git clone https://public-inbox.org/ public-inbox
(working on a homepage... sorta :)
^ permalink raw reply [relevance 5%]
* v2 merged to master
@ 2018-04-19 1:20 6% Eric Wong
0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2018-04-19 1:20 UTC (permalink / raw)
To: meta
I actually merged master into v2, so it's a bit backwards :P
commit cfb8d16578e7f2f2e300f9f436205e4a8fc7f322
Merge: 1dc0f0c 119463b
Author: Eric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Date: Wed Apr 18 20:58:35 2018 +0000
Merge remote-tracking branch 'origin/master' into v2
I screwed up the indexing on http://hjrcffqmbrq6wope.onion/git/
so that's still going, but I think I was able to update the rest
of them (including the heavily trafficked non-.onion) w/o downtime.
The mirror at http://czquwvybam4bgbro.onion/git/ has been running
the v2 code for over a week, now.
Thanks to the Linux Foundation for funding this work. Will still
need to make some documentation updates and such.
Eric Wong (Contractor, The Linux Foundation) (237):
AUTHORS: add The Linux Foundation
watch_maildir: allow '-' in mail filename
scripts/import_vger_from_mbox: relax From_ line match slightly
import: stop writing legacy ssoma.index by default
import: begin supporting this without ssoma.lock
import: initial handling for v2
t/import: test for last_object_id insertion
content_id: add test case
searchmsg: add mid_mime import for _extract_mid
scripts/import_vger_from_mbox: support --dry-run option
import: APIs to support v2 use
search: free up 'Q' prefix for a real unique identifier
searchidx: fix comment around next_thread_id
address: extract more characters from email addresses
import: pass "raw" dates to git-fast-import(1)
scripts/import_vger_from_mbox: use v2 layout for import
import: quiet down warnings from bogus From: lines
import: allow the epoch (0s) as a valid time
extmsg: fix broken Xapian MID lookup
search: stop assuming Message-ID is unique
www: stop assuming mainrepo == git_dir
v2writable: initial cut for repo-rotation
git: reload alternates file on missing blob
v2: support Xapian + SQLite indexing
import_vger_from_inbox: allow "-V" option
import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering
v2: parallelize Xapian indexing
v2writable: round-robin to partitions based on article number
searchidxpart: increase pipe size for partitions
v2writable: warn on duplicate Message-IDs
searchidx: do not modify Xapian DB while iterating
v2/ui: some hacky things to get the PSGI UI to show up
v2/ui: retry DB reopens in a few more places
v2writable: cleanup unused pipes in partitions
searchidxpart: binmode
use PublicInbox::MIME consistently
searchidxpart: chomp line before splitting
searchidx*: name child subprocesses
searchidx: get rid of pointless index_blob wrapper
view: remove X-PI-TS reference
searchidxthread: load doc data for references
searchidxpart: force integers into add_message
search: reopen skeleton DB as well
searchidx: index values in the threader
search: use different Enquire object for skeleton queries
rename SearchIdxThread to SearchIdxSkeleton
v2writable: commit to skeleton via remote partitions
searchidxskeleton: extra error checking
searchidx: do not modify Xapian DB while iterating
search: query_xover uses skeleton DB iff available
v2/ui: get nntpd and init tests running on v2
v2writable: delete ::Import obj when ->done
search: remove informational "warning" message
searchidx: add PID to error message when die-ing
content_id: special treatment for Message-Id headers
evcleanup: disable outside of daemon
v2writable: deduplicate detection on add
evcleanup: do not create event loop if nothing was registered
mid: add `mids' and `references' methods for extraction
content_id: use `mids' and `references' for MID extraction
searchidx: use new `references' method for parsing References
content_id: no need to be human-friendly
v2writable: inject new Message-IDs on true duplicates
search: revert to using 'Q' as a uniQue id per-Xapian conventions
searchidx: support indexing multiple MIDs
mid: be strict with References, but loose on Message-Id
searchidx: avoid excessive XNQ indexing with diffs
searchidxskeleton: add a note about locking
v2writable: generated Message-ID goes first
searchidx: use add_boolean_term for internal terms
searchidx: add NNTP article number as a searchable term
mid: truncate excessively long MIDs early
nntp: use NNTP article numbers for lookups
nntp: fix NEWNEWS command
searchidx: store the primary MID in doc data for NNTP
import: consolidate object info for v2 imports
v2: avoid redundant/repeated configs for git partition repos
INSTALL: document more optional dependencies
search: favor skeleton DB for lookup_mail
search: each_smsg_by_mid uses skeleton if available
v2writable: remove unnecessary skeleton commit
favor Received: date over Date: header globally
import: fall back to Sender for extracting name and email
scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping
v2writable: detect and use previous partition count
extmsg: rework partial MID matching to favor current inbox
extmsg: rework partial MID matching to favor current inbox
content_id: use Sender header if From is not available
v2writable: support "barrier" operation to avoid reforking
use string ref for Email::Simple->new
v2writable: remove unnecessary idx_init call
searchidx: do not delete documents while iterating
search: allow ->reopen to be chainable
v2writable: implement remove correctly
skeleton: barrier init requires a lock
import: (v2) delete writes the blob into history in subdir
import: (v2): write deletes to a separate '_' subdirectory
import: implement barrier operation for v1 repos
mid: mid_mime uses v2-compatible mids function
watchmaildir: use content_digest to generate Message-Id
import: force Message-ID generation for v1 here
import: switch to URL-safe Base64 for Message-IDs
v2writable: test for idempotent removals
import: enable locking under v2
index: s/GIT_DIR/REPO_DIR/
Lock: new base class for writable lockers
t/watch_maildir: note the reason for FIFO creation
v2writable: ensure ->done is idempotent
watchmaildir: support v2 repositories
searchidxpart: s/barrier/remote_barrier/
v2writable: allow disabling parallelization
scripts/import_vger_from_mbox: filter out same headers as MDA
v2writable: add DEBUG_DIFF env support
v2writable: remove "resent" message for duplicate Message-IDs
content_id: do not take Message-Id into account
introduce InboxWritable class
import: discard all the same headers as MDA
InboxWritable: add mbox/maildir parsing + import logic
use both Date: and Received: times
msgmap: add tmp_clone to create an anonymous copy
fix syntax warnings
v2writable: support reindexing Xapian
t/altid.t: extra tests for mid_set
v2writable: add NNTP article number regeneration support
v2writable: clarify header cleanups
v2writable: DEBUG_DIFF respects $TMPDIR
feed: $INBOX/new.atom endpoint supports v2 inboxes
import: consolidate mid prepend logic, here
www: $MESSAGE_ID/raw endpoint supports "duplicates"
search: reopen DB if each_smsg_by_mid fails
t/psgi_v2: minimal test for Atom feed and t.mbox.gz
feed: fix new.html for v2
view: permalink (per-message) view shows multiple messages
searchidx: warn about vivifying multiple ghosts
v2writable: warn on unseen deleted files
www: get rid of unnecessary 'inbox' name reference
searchview: remove unnecessary imports from MID module
view: depend on SearchMsg for Message-ID
http: fix modification of read-only value
githttpbackend: avoid infinite loop on generic PSGI servers
www: support cloning individual v2 git partitions
http: fix modification of read-only value
githttpbackend: avoid infinite loop on generic PSGI servers
www: remove unnecessary ghost checks
v2writable: append, instead of prepending generated Message-ID
lookup by Message-ID favors the "primary" one
www: fix attachment downloads for conflicted Message-IDs
searchmsg: document why we store To: and Cc: for NNTP
public-inbox-convert: tool for converting old to new inboxes
v2writable: support purging messages from git entirely
search: cleanup uniqueness checking
search: get rid of most lookup_* subroutines
search: move find_doc_ids to searchidx
v2writable: cleanup: get rid of unused fields
mbox: avoid extracting Message-ID for linkification
www: cleanup expensive fallback for legacy URLs
view: get rid of some unnecessary imports
search: retry_reopen on first_smsg_by_mid
import: run_die supports redirects as spawn does
v2writable: initializing an existing inbox is idempotent
public-inbox-compact: new tool for driving xapian-compact
mda: support v2 inboxes
search: warn on reopens and die on total failure
v2writable: allow gaps in git partitions
v2writable: convert some fatal reindex errors to warnings
wwwstream: flesh out clone instructions for v2
v2writable: go backwards through alternate Message-IDs
view: speed up homepage loading time with date clamp
view: drop load_results
feed: optimize query for feeds, too
msgtime: parse 3-digit years properly
convert: avoid redundant "done\n" statement for fast-import
search: move permissions handling to InboxWritable
t/v2writable: use simplify permissions reading
v2: respect core.sharedRepository in git configs
searchidx: correct warning for over-vivification
v2: one file, really
v2writable: fix parallel termination
truncate Message-IDs and References consistently
scripts/import_vger_from_mbox: set address properly
search: reduce columns stored in Xapian
replace Xapian skeleton with SQLite overview DB
v2writable: simplify barrier vs checkpoints
t/over: test empty Subject: line matching
www: rework query responses to avoid COUNT in SQLite
over: speedup get_thread by avoiding JOIN
nntp: fix NEWNEWS command
t/thread-all.t: modernize test to support modern inboxes
rename+rewrite test using Benchmark module
nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
view: avoid offset during pagination
mbox: remove remaining OFFSET usage in SQLite
msgmap: replace id_batch with ids_after
nntp: simplify the long_response API
searchidx: ensure duplicated Message-IDs can be linked together
init: s/GIT_DIR/REPO_DIR/ in usage
import: rewrite less history during purge
v2: support incremental indexing + purge
v2writable: do not modify DBs while iterating for ->remove
v2writable: recount partitions after acquiring lock
searchmsg: remove unused `tid' and `path' methods
search: remove unnecessary OP_AND of query
mbox: do not sort search results
searchview: minor cleanup
support altid mechanism for v2
compact: better handling of over.sqlite3* files
v2writable: remove redundant remove from Over DB
v2writable: allow tracking parallel versions
v2writable: refer to git each repository as "epoch"
over: use only supported and safe SQLite APIs
search: index and allow searching by date-time
altid: fix miscopied field name
nntp: set Xref across multiple inboxes
www: favor reading more from SQLite, and less from Xapian
ensure Xapian and SQLite are still optional for v1 tests
psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological
over: avoid excessive SELECT
over: remove forked subprocess
v2writable: reduce barriers
index: allow specifying --jobs=0 to disable multiprocess
convert: support converting with altid defined
store less data in the Xapian document
msgmap: speed up minmax with separate queries
feed: respect feedmax, again
v1: remove articles from overview DB
compact: do not merge v2 repos by default
v2writable: reduce partititions by one
search: preserve References in Xapian smsg for x=t view
v2: generate better Message-IDs for duplicates
v2: improve deduplication checks
import: cat_blob drops leading 'From ' lines like Inbox
searchidx: regenerate and avoid article number gaps on full index
extmsg: remove expensive git path checks
use %H consistently to disable abbreviations
searchidx: increase term positions for all text terms
searchidx: revert default BATCH_BYTES to 1_000_000
Merge remote-tracking branch 'origin/master' into v2
^ permalink raw reply [relevance 6%]
* [PATCH 4/5] www: rework query responses to avoid COUNT in SQLite
2018-04-02 0:04 6% [PATCH 0/5] v2: drop Xapian skeleton for SQLite overview DB Eric Wong (Contractor, The Linux Foundation)
@ 2018-04-02 0:04 2% ` Eric Wong (Contractor, The Linux Foundation)
0 siblings, 0 replies; 6+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-04-02 0:04 UTC (permalink / raw)
To: meta
In many cases, we do not care about the total number of
messages. It's a rather expensive operation in SQLite
(Xapian only provides an estimate).
For LKML, this brings top-level /$INBOX/ loading time from
~375ms to around 60ms on my system. Days ago, this operation
was taking 800-900ms(!) for me before introducing the SQLite
overview DB.
---
lib/PublicInbox/Feed.pm | 11 +++----
lib/PublicInbox/Mbox.pm | 12 +++----
lib/PublicInbox/Over.pm | 8 +++--
lib/PublicInbox/Search.pm | 15 ++++++---
lib/PublicInbox/View.pm | 17 ++++------
t/altid.t | 9 +++---
t/convert-compact.t | 6 ++--
t/over.t | 6 ++--
t/search-thr-index.t | 4 +--
t/search.t | 80 +++++++++++++++++++++++------------------------
t/v2-add-remove-add.t | 6 ++--
t/v2mda.t | 4 +--
t/v2writable.t | 2 +-
t/watch_maildir_v2.t | 26 +++++++--------
14 files changed, 104 insertions(+), 102 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 2f59f8c..ff20d7f 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -27,14 +27,13 @@ sub generate {
sub generate_thread_atom {
my ($ctx) = @_;
my $mid = $ctx->{mid};
- my $res = $ctx->{srch}->get_thread($mid);
- return _no_thread() unless $res->{total};
+ my $msgs = $ctx->{srch}->get_thread($mid);
+ return _no_thread() unless @$msgs;
my $ibx = $ctx->{-inbox};
my $html_url = $ibx->base_url($ctx->{env});
$html_url .= PublicInbox::Hval->new_msgid($mid)->{href};
$ctx->{-html_url} = $html_url;
- my $msgs = $res->{msgs};
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
while (my $smsg = shift @$msgs) {
$ibx->smsg_mime($smsg) and return $smsg;
@@ -114,10 +113,10 @@ sub recent_msgs {
my $o = $qp ? $qp->{o} : 0;
$o += 0;
$o = 0 if $o < 0;
- my $res = $ibx->recent({ limit => $max, offset => $o });
+ my $msgs = $ibx->recent({ limit => $max, offset => $o });
my $next = $o + $max;
- $ctx->{next_page} = "o=$next" if $res->{total} >= $next;
- return $res->{msgs};
+ $ctx->{next_page} = "o=$next" if scalar(@$msgs) == $max;
+ return $msgs;
}
my $hex = '[a-f0-9]';
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 1b68f02..05de6be 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -217,12 +217,12 @@ sub set_filename ($$) {
sub getline {
my ($self) = @_;
my $ctx = $self->{ctx} or return;
- my $res;
my $ibx = $ctx->{-inbox};
my $gz = $self->{gz};
+ my $msgs = $self->{msgs};
do {
# work on existing result set
- while (defined(my $smsg = shift @{$self->{msgs}})) {
+ while (defined(my $smsg = shift @$msgs)) {
my $msg = eval { $ibx->msg_by_smsg($smsg) } or next;
$msg = Email::Simple->new($msg);
$gz->write(PublicInbox::Mbox::msg_str($ctx, $msg,
@@ -247,11 +247,9 @@ sub getline {
}
# refill result set
- $res = $self->{cb}->($self->{opts});
- $self->{msgs} = $res->{msgs};
- $res = scalar @{$self->{msgs}};
- $self->{opts}->{offset} += $res;
- } while ($res);
+ $msgs = $self->{msgs} = $self->{cb}->($self->{opts});
+ $self->{opts}->{offset} += scalar @$msgs;
+ } while (@$msgs);
$gz->close;
delete $self->{ctx};
${delete $self->{buf}};
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index cf7a884..c74072a 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -88,10 +88,12 @@ LIMIT 1
SELECT * $cond
ORDER BY ts ASC
+ return $msgs unless wantarray;
+
my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid);
SELECT COUNT(num) $cond
- { total => $nr, msgs => $msgs };
+ ($nr, $msgs);
}
sub recent {
@@ -100,10 +102,12 @@ sub recent {
SELECT * FROM over WHERE num > 0
ORDER BY ts DESC
+ return $msgs unless wantarray;
+
my $nr = $self->{dbh}->selectrow_array(<<'');
SELECT COUNT(num) FROM over WHERE num > 0
- { total => $nr, msgs => $msgs };
+ ($nr, $msgs);
}
sub get_art {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 9125124..84c0a22 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -185,10 +185,16 @@ sub get_thread {
sub retry_reopen {
my ($self, $cb) = @_;
- my $ret;
for my $i (1..10) {
- eval { $ret = $cb->() };
- return $ret unless $@;
+ if (wantarray) {
+ my @ret;
+ eval { @ret = $cb->() };
+ return @ret unless $@;
+ } else {
+ my $ret;
+ eval { $ret = $cb->() };
+ return $ret unless $@;
+ }
# Exception: The revision being read has been discarded -
# you should call Xapian::Database::reopen()
if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
@@ -226,8 +232,9 @@ sub _enquire_once {
my @msgs = map {
PublicInbox::SearchMsg->load_doc($_->get_document);
} $mset->items;
+ return \@msgs unless wantarray;
- { total => $mset->get_matches_estimated, msgs => \@msgs }
+ ($mset->get_matches_estimated, \@msgs)
}
# read-write
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 8ac405f..cad90a7 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -408,9 +408,7 @@ sub thread_html {
my ($ctx) = @_;
my $mid = $ctx->{mid};
my $srch = $ctx->{srch};
- my $sres = $srch->get_thread($mid);
- my $msgs = $sres->{msgs};
- my $nr = $sres->{total};
+ my ($nr, $msgs) = $srch->get_thread($mid);
return missing_thread($ctx) if $nr == 0;
my $skel = '<hr><pre>';
$skel .= $nr == 1 ? 'only message in thread' : 'end of thread';
@@ -649,8 +647,7 @@ sub thread_skel {
my ($dst, $ctx, $hdr, $tpfx) = @_;
my $srch = $ctx->{srch};
my $mid = mids($hdr)->[0];
- my $sres = $srch->get_thread($mid);
- my $nr = $sres->{total};
+ my ($nr, $msgs) = $srch->get_thread($mid);
my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) .
qq(|<a\nhref="${tpfx}t/#u">nested</a>] ) .
qq(<a\nhref="${tpfx}t.mbox.gz">mbox.gz</a> ) .
@@ -680,12 +677,11 @@ sub thread_skel {
$ctx->{prev_attr} = '';
$ctx->{prev_level} = 0;
$ctx->{dst} = $dst;
- $sres = $sres->{msgs};
# reduce hash lookups in skel_dump
my $ibx = $ctx->{-inbox};
$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
- walk_thread(thread_results($ctx, $sres), $ctx, *skel_dump);
+ walk_thread(thread_results($ctx, $msgs), $ctx, *skel_dump);
$ctx->{parent_msg} = $parent;
}
@@ -1066,11 +1062,10 @@ sub index_topics {
$ctx->{order} = [];
my $srch = $ctx->{srch};
- my $sres = $ctx->{-inbox}->recent({offset => $off, limit => 200 });
- $sres = $sres->{msgs};
- my $nr = scalar @$sres;
+ my $msgs = $ctx->{-inbox}->recent({offset => $off, limit => 200 });
+ my $nr = scalar @$msgs;
if ($nr) {
- walk_thread(thread_results($ctx, $sres), $ctx, *acc_topic);
+ walk_thread(thread_results($ctx, $msgs), $ctx, *acc_topic);
}
$ctx->{-next_o} = $off + $nr;
$ctx->{-cur_o} = $off;
diff --git a/t/altid.t b/t/altid.t
index 0f3b86c..d4f6152 100644
--- a/t/altid.t
+++ b/t/altid.t
@@ -50,12 +50,11 @@ my $altid = [ "serial:gmane:file=$alt_file" ];
{
my $ro = PublicInbox::Search->new($git_dir, $altid);
- my $res = $ro->query("gmane:1234");
- is($res->{total}, 1, 'got one match');
- is($res->{msgs}->[0]->mid, 'a@example.com');
+ my $msgs = $ro->query("gmane:1234");
+ is_deeply([map { $_->mid } @$msgs], ['a@example.com'], 'got one match');
- $res = $ro->query("gmane:666");
- is($res->{total}, 0, 'body did NOT match');
+ $msgs = $ro->query("gmane:666");
+ is_deeply([], $msgs, 'body did NOT match');
};
{
diff --git a/t/convert-compact.t b/t/convert-compact.t
index 92a6a9c..e2ba40a 100644
--- a/t/convert-compact.t
+++ b/t/convert-compact.t
@@ -99,8 +99,8 @@ foreach (@xdir) {
is($st[2] & 07777, -f _ ? 0444 : 0755,
'sharedRepository respected after v2 compact');
}
-my $res = $ibx->recent({limit => 1000});
-is($res->{msgs}->[0]->{mid}, 'a-mid@b', 'message exists in history');
-is(scalar @{$res->{msgs}}, 1, 'only one message in history');
+my $msgs = $ibx->recent({limit => 1000});
+is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history');
+is(scalar @$msgs, 1, 'only one message in history');
done_testing();
diff --git a/t/over.t b/t/over.t
index bfe331e..2a7e8d1 100644
--- a/t/over.t
+++ b/t/over.t
@@ -40,7 +40,7 @@ my $ddd = compress('');
foreach my $s ('', undef) {
$over->add_over([0, 98, [ 'a' ], [], $s, $ddd]);
$over->add_over([0, 99, [ 'b' ], [], $s, $ddd]);
- my $msgs = [ map { $_->{num} } @{$over->get_thread('a')->{msgs}} ];
+ my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ];
is_deeply([98], $msgs,
'messages not linked by empty subject');
}
@@ -48,13 +48,13 @@ foreach my $s ('', undef) {
$over->add_over([0, 98, [ 'a' ], [], 's', $ddd]);
$over->add_over([0, 99, [ 'b' ], [], 's', $ddd]);
foreach my $mid (qw(a b)) {
- my $msgs = [ map { $_->{num} } @{$over->get_thread('a')->{msgs}} ];
+ my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ];
is_deeply([98, 99], $msgs, 'linked messages by subject');
}
$over->add_over([0, 98, [ 'a' ], [], 's', $ddd]);
$over->add_over([0, 99, [ 'b' ], ['a'], 'diff', $ddd]);
foreach my $mid (qw(a b)) {
- my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)->{msgs}} ];
+ my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)} ];
is_deeply([98, 99], $msgs, "linked messages by Message-ID: <$mid>");
}
diff --git a/t/search-thr-index.t b/t/search-thr-index.t
index 3ddef80..2aa97bf 100644
--- a/t/search-thr-index.t
+++ b/t/search-thr-index.t
@@ -49,8 +49,8 @@ foreach (reverse split(/\n\n/, $data)) {
my $prev;
foreach my $mid (@mids) {
- my $res = $rw->{over}->get_thread($mid);
- is(3, $res->{total}, "got all messages from $mid");
+ my $msgs = $rw->{over}->get_thread($mid);
+ is(3, scalar(@$msgs), "got all messages from $mid");
}
$rw->commit_txn_lazy;
diff --git a/t/search.t b/t/search.t
index 51adb9f..c9bef71 100644
--- a/t/search.t
+++ b/t/search.t
@@ -82,8 +82,8 @@ my $rw_commit = sub {
}
sub filter_mids {
- my ($res) = @_;
- sort(map { $_->mid } @{$res->{msgs}});
+ my ($msgs) = @_;
+ sort(map { $_->mid } @$msgs);
}
{
@@ -106,12 +106,12 @@ sub filter_mids {
is_deeply(\@res, \@exp, 'got expected results for s:"" match');
$res = $ro->query('s:"Hello world"', {limit => 1});
- is(scalar @{$res->{msgs}}, 1, "limit works");
- my $first = $res->{msgs}->[0];
+ is(scalar @$res, 1, "limit works");
+ my $first = $res->[0];
$res = $ro->query('s:"Hello world"', {offset => 1});
- is(scalar @{$res->{msgs}}, 1, "offset works");
- my $second = $res->{msgs}->[0];
+ is(scalar @$res, 1, "offset works");
+ my $second = $res->[0];
isnt($first, $second, "offset returned different result from limit");
}
@@ -147,7 +147,7 @@ sub filter_mids {
my $ghost_id = $rw->add_message($was_ghost);
is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id");
- my $msgs = $rw->{over}->get_thread('ghost-message@s')->{msgs};
+ my $msgs = $rw->{over}->get_thread('ghost-message@s');
is(scalar(@$msgs), 2, 'got both messages in ghost thread');
foreach (qw(sid tid)) {
is($msgs->[0]->{$_}, $msgs->[1]->{$_}, "{$_} match");
@@ -169,7 +169,7 @@ sub filter_mids {
# body
$res = $ro->query('goodbye');
- is($res->{msgs}->[0]->mid, 'last@s', 'got goodbye message body');
+ is($res->[0]->mid, 'last@s', 'got goodbye message body');
}
# long message-id
@@ -215,7 +215,7 @@ sub filter_mids {
$rw_commit->();
$ro->reopen;
my $t = $ro->get_thread('root@s');
- is($t->{total}, 4, "got all 4 mesages in thread");
+ is(scalar(@$t), 4, "got all 4 mesages in thread");
my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid);
@res = filter_mids($t);
is_deeply(\@res, \@exp, "get_thread works");
@@ -244,13 +244,13 @@ sub filter_mids {
],
body => "theatre\nfade\n"));
my $res = $rw->query("theatre");
- is($res->{total}, 2, "got both matches");
- is($res->{msgs}->[0]->mid, 'nquote@a', "non-quoted scores higher");
- is($res->{msgs}->[1]->mid, 'quote@a', "quoted result still returned");
+ is(scalar(@$res), 2, "got both matches");
+ is($res->[0]->mid, 'nquote@a', "non-quoted scores higher");
+ is($res->[1]->mid, 'quote@a', "quoted result still returned");
$res = $rw->query("illusions");
- is($res->{total}, 1, "got a match for quoted text");
- is($res->{msgs}->[0]->mid, 'quote@a',
+ is(scalar(@$res), 1, "got a match for quoted text");
+ is($res->[0]->mid, 'quote@a',
"quoted result returned if nothing else");
}
@@ -293,34 +293,34 @@ sub filter_mids {
}
{
- my $res = $ro->query('d:19931002..20101002');
- ok(scalar @{$res->{msgs}} > 0, 'got results within range');
- $res = $ro->query('d:20101003..');
- is(scalar @{$res->{msgs}}, 0, 'nothing after 20101003');
- $res = $ro->query('d:..19931001');
- is(scalar @{$res->{msgs}}, 0, 'nothing before 19931001');
+ my $msgs = $ro->query('d:19931002..20101002');
+ ok(scalar(@$msgs) > 0, 'got results within range');
+ $msgs = $ro->query('d:20101003..');
+ is(scalar(@$msgs), 0, 'nothing after 20101003');
+ $msgs = $ro->query('d:..19931001');
+ is(scalar(@$msgs), 0, 'nothing before 19931001');
}
# names and addresses
{
my $res = $ro->query('t:list@example.com');
- is(scalar @{$res->{msgs}}, 6, 'searched To: successfully');
- foreach my $smsg (@{$res->{msgs}}) {
+ is(scalar @$res, 6, 'searched To: successfully');
+ foreach my $smsg (@$res) {
like($smsg->to, qr/\blist\@example\.com\b/, 'to appears');
}
$res = $ro->query('tc:list@example.com');
- is(scalar @{$res->{msgs}}, 6, 'searched To+Cc: successfully');
- foreach my $smsg (@{$res->{msgs}}) {
+ is(scalar @$res, 6, 'searched To+Cc: successfully');
+ foreach my $smsg (@$res) {
my $tocc = join("\n", $smsg->to, $smsg->cc);
like($tocc, qr/\blist\@example\.com\b/, 'tocc appears');
}
foreach my $pfx ('tcf:', 'c:') {
$res = $ro->query($pfx . 'foo@example.com');
- is(scalar @{$res->{msgs}}, 1,
+ is(scalar @$res, 1,
"searched $pfx successfully for Cc:");
- foreach my $smsg (@{$res->{msgs}}) {
+ foreach my $smsg (@$res) {
like($smsg->cc, qr/\bfoo\@example\.com\b/,
'cc appears');
}
@@ -328,9 +328,9 @@ sub filter_mids {
foreach my $pfx ('', 'tcf:', 'f:') {
$res = $ro->query($pfx . 'Laggy');
- is(scalar @{$res->{msgs}}, 1,
+ is(scalar(@$res), 1,
"searched $pfx successfully for From:");
- foreach my $smsg (@{$res->{msgs}}) {
+ foreach my $smsg (@$res) {
like($smsg->from, qr/Laggy Sender/,
"From appears with $pfx");
}
@@ -341,23 +341,23 @@ sub filter_mids {
$rw_commit->();
$ro->reopen;
my $res = $ro->query('b:hello');
- is(scalar @{$res->{msgs}}, 0, 'no match on body search only');
+ is(scalar(@$res), 0, 'no match on body search only');
$res = $ro->query('bs:smith');
- is(scalar @{$res->{msgs}}, 0,
+ is(scalar(@$res), 0,
'no match on body+subject search for From');
$res = $ro->query('q:theatre');
- is(scalar @{$res->{msgs}}, 1, 'only one quoted body');
- like($res->{msgs}->[0]->from, qr/\AQuoter/, 'got quoted body');
+ is(scalar(@$res), 1, 'only one quoted body');
+ like($res->[0]->from, qr/\AQuoter/, 'got quoted body');
$res = $ro->query('nq:theatre');
- is(scalar @{$res->{msgs}}, 1, 'only one non-quoted body');
- like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body');
+ is(scalar @$res, 1, 'only one non-quoted body');
+ like($res->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body');
foreach my $pfx (qw(b: bs:)) {
$res = $ro->query($pfx . 'theatre');
- is(scalar @{$res->{msgs}}, 2, "searched both bodies for $pfx");
- like($res->{msgs}->[0]->from, qr/\ANon-Quoter/,
+ is(scalar @$res, 2, "searched both bodies for $pfx");
+ like($res->[0]->from, qr/\ANon-Quoter/,
"non-quoter first for $pfx");
}
}
@@ -396,13 +396,13 @@ sub filter_mids {
$rw_commit->();
$ro->reopen;
my $n = $ro->query('n:attached_fart.txt');
- is(scalar @{$n->{msgs}}, 1, 'got result for n:');
+ is(scalar @$n, 1, 'got result for n:');
my $res = $ro->query('part_deux.txt');
- is(scalar @{$res->{msgs}}, 1, 'got result without n:');
- is($n->{msgs}->[0]->mid, $res->{msgs}->[0]->mid,
+ is(scalar @$res, 1, 'got result without n:');
+ is($n->[0]->mid, $res->[0]->mid,
'same result with and without');
my $txt = $ro->query('"inside another"');
- is($txt->{msgs}->[0]->mid, $res->{msgs}->[0]->mid,
+ is($txt->[0]->mid, $res->[0]->mid,
'search inside text attachments works');
}
$rw->commit_txn_lazy;
diff --git a/t/v2-add-remove-add.t b/t/v2-add-remove-add.t
index b6c5887..c8d12d3 100644
--- a/t/v2-add-remove-add.t
+++ b/t/v2-add-remove-add.t
@@ -35,8 +35,8 @@ ok($im->add($mime), 'message added');
ok($im->remove($mime), 'message added');
ok($im->add($mime), 'message added again');
$im->done;
-my $res = $ibx->recent({limit => 1000});
-is($res->{msgs}->[0]->{mid}, 'a-mid@b', 'message exists in history');
-is(scalar @{$res->{msgs}}, 1, 'only one message in history');
+my $msgs = $ibx->recent({limit => 1000});
+is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history');
+is(scalar @$msgs, 1, 'only one message in history');
done_testing();
diff --git a/t/v2mda.t b/t/v2mda.t
index be27ca0..ca1bb09 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -52,8 +52,8 @@ ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr),
'mda delivered a message');
$ibx = PublicInbox::Inbox->new($ibx);
-my $res = $ibx->search->query('');
-my $saved = $ibx->smsg_mime($res->{msgs}->[0]);
+my $msgs = $ibx->search->query('');
+my $saved = $ibx->smsg_mime($msgs->[0]);
is($saved->{mime}->as_string, $mime->as_string, 'injected message');
done_testing();
diff --git a/t/v2writable.t b/t/v2writable.t
index 7e29ef7..1e8e404 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -249,7 +249,7 @@ EOF
ok($im->add($mime), 'add excessively long References');
$im->barrier;
- my $msgs = $ibx->search->reopen->get_thread('x'x244)->{msgs};
+ my $msgs = $ibx->search->reopen->get_thread('x'x244);
is(2, scalar(@$msgs), 'got both messages');
is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid');
is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref');
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index 85130e3..a76e413 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -50,8 +50,8 @@ ok($ibx, 'found inbox by name');
my $srch = $ibx->search;
PublicInbox::WatchMaildir->new($config)->scan('full');
-my $res = $srch->reopen->query('');
-is($res->{total}, 1, 'got one revision');
+my ($total, undef) = $srch->reopen->query('');
+is($total, 1, 'got one revision');
# my $git = PublicInbox::Git->new("$mainrepo/git/0.git");
# my @list = $git->qx(qw(rev-list refs/heads/master));
@@ -70,7 +70,7 @@ my $write_spam = sub {
$write_spam->();
is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
PublicInbox::WatchMaildir->new($config)->scan('full');
-is($srch->reopen->query('')->{total}, 0, 'deleted file');
+is(($srch->reopen->query(''))[0], 0, 'deleted file');
# check with scrubbing
{
@@ -80,16 +80,16 @@ the body of a message to majordomo\@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
PublicInbox::WatchMaildir->new($config)->scan('full');
- $res = $srch->reopen->query('');
- is($res->{total}, 1, 'got one file back');
- my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]);
+ my ($nr, $msgs) = $srch->reopen->query('');
+ is($nr, 1, 'got one file back');
+ my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam');
$write_spam->();
PublicInbox::WatchMaildir->new($config)->scan('full');
- $res = $srch->reopen->query('');
- is($res->{total}, 0, 'inbox is empty again');
+ ($nr, $msgs) = $srch->reopen->query('');
+ is($nr, 0, 'inbox is empty again');
}
{
@@ -103,8 +103,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
local $SIG{__WARN__} = sub {}; # quiet spam check warning
PublicInbox::WatchMaildir->new($config)->scan('full');
}
- $res = $srch->reopen->query('');
- is($res->{total}, 0, 'inbox is still empty');
+ ($nr, $msgs) = $srch->reopen->query('');
+ is($nr, 0, 'inbox is still empty');
is(unlink(glob("$maildir/new/*")), 1);
}
@@ -116,9 +116,9 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
$config->{'publicinboxwatch.spamcheck'} = 'spamc';
PublicInbox::WatchMaildir->new($config)->scan('full');
- $res = $srch->reopen->query('');
- is($res->{total}, 1, 'inbox has one mail after spamc OK-ed a message');
- my $mref = $ibx->msg_by_smsg($res->{msgs}->[0]);
+ ($nr, $msgs) = $srch->reopen->query('');
+ is($nr, 1, 'inbox has one mail after spamc OK-ed a message');
+ my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
}
--
EW
^ permalink raw reply related [relevance 2%]
* [PATCH 0/5] v2: drop Xapian skeleton for SQLite overview DB
@ 2018-04-02 0:04 6% Eric Wong (Contractor, The Linux Foundation)
2018-04-02 0:04 2% ` [PATCH 4/5] www: rework query responses to avoid COUNT in SQLite Eric Wong (Contractor, The Linux Foundation)
0 siblings, 1 reply; 6+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-04-02 0:04 UTC (permalink / raw)
To: meta
400ms to load the top-level /$INBOX/ for /git/ is pretty bad on
my systems as-is. 800ms for /lkml/ was not unacceptable
(though probably 400ms on decent servers). It's now around 60ms
across the board with the new overview DB.
This also affects OVER/XOVER/NEWNEWS NNTP commands, as well
as Atom feeds.
Eric Wong (Contractor, The Linux Foundation) (5):
replace Xapian skeleton with SQLite overview DB
v2writable: simplify barrier vs checkpoints
t/over: test empty Subject: line matching
www: rework query responses to avoid COUNT in SQLite
over: speedup get_thread by avoiding JOIN
MANIFEST | 5 +-
lib/PublicInbox/Feed.pm | 11 +-
lib/PublicInbox/Inbox.pm | 15 +-
lib/PublicInbox/Mbox.pm | 12 +-
lib/PublicInbox/Msgmap.pm | 1 +
lib/PublicInbox/NNTP.pm | 29 +-
lib/PublicInbox/Over.pm | 134 ++++++++
lib/PublicInbox/OverIdx.pm | 370 +++++++++++++++++++++
.../{SearchIdxSkeleton.pm => OverIdxFork.pm} | 131 +++++---
lib/PublicInbox/Search.pm | 124 ++-----
lib/PublicInbox/SearchIdx.pm | 214 +++---------
lib/PublicInbox/SearchIdxPart.pm | 16 +-
lib/PublicInbox/SearchMsg.pm | 26 +-
lib/PublicInbox/V2Writable.pm | 114 +++----
lib/PublicInbox/View.pm | 17 +-
script/public-inbox-compact | 32 +-
t/altid.t | 9 +-
t/convert-compact.t | 6 +-
t/over.t | 63 ++++
t/psgi_search.t | 6 +-
t/search-thr-index.t | 9 +-
t/search.t | 92 ++---
t/v2-add-remove-add.t | 6 +-
t/v2mda.t | 4 +-
t/v2writable.t | 2 +-
t/watch_maildir_v2.t | 26 +-
26 files changed, 907 insertions(+), 567 deletions(-)
create mode 100644 lib/PublicInbox/Over.pm
create mode 100644 lib/PublicInbox/OverIdx.pm
rename lib/PublicInbox/{SearchIdxSkeleton.pm => OverIdxFork.pm} (54%)
create mode 100644 t/over.t
--
EW
^ permalink raw reply [relevance 6%]
Results 1-6 of 6 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2018-04-02 0:04 6% [PATCH 0/5] v2: drop Xapian skeleton for SQLite overview DB Eric Wong (Contractor, The Linux Foundation)
2018-04-02 0:04 2% ` [PATCH 4/5] www: rework query responses to avoid COUNT in SQLite Eric Wong (Contractor, The Linux Foundation)
2018-04-19 1:20 6% v2 merged to master Eric Wong
2018-05-09 20:23 5% [ANNOUNCE] public-inbox 1.1.0-pre1 Eric Wong
2019-09-14 19:50 7% [PATCH] doc: add release notes directory Eric Wong
2020-08-26 8:17 [PATCH 0/5] some minor SQLite-related cleanups Eric Wong
2020-08-26 8:17 4% ` [PATCH 3/5] over: recent: remove expensive COUNT query Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).