* [PATCH 3/5] searchidx: sync Msgmap database along with Xapian
2015-09-15 1:07 6% [PATCH 0/5] introduce SQLite message map Eric Wong
@ 2015-09-15 1:08 7% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2015-09-15 1:08 UTC (permalink / raw)
To: meta
We can avoid duplicating work of extracting messages from git if we
tie this to Xapian. Of course, this ties the two features together,
but it's probably reasonable to expect that anybody who wants to use
public-inbox to serve messages to front-end users will have both.
---
lib/PublicInbox/SearchIdx.pm | 84 ++++++++++++++++++++++++++++++++++----------
1 file changed, 66 insertions(+), 18 deletions(-)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 44f6bc1..351450c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -247,18 +247,36 @@ sub link_message_to_parents {
}
sub index_blob {
- my ($self, $git, $blob) = @_;
- my $mime = do_cat_mail($git, $blob) or return;
- eval { $self->add_message($mime) };
- warn "W: index_blob $blob: $@\n" if $@;
+ my ($self, $git, $mime) = @_;
+ $self->add_message($mime);
}
sub unindex_blob {
- my ($self, $git, $blob) = @_;
- my $mime = do_cat_mail($git, $blob) or return;
- my $mid = $mime->header('Message-ID');
- eval { $self->remove_message($mid) } if defined $mid;
- warn "W: unindex_blob $blob: $@\n" if $@;
+ my ($self, $git, $mime) = @_;
+ my $mid = mid_clean($mime->header('Message-ID'));
+ $self->remove_message($mid) if defined $mid;
+}
+
+sub index_mm {
+ my ($self, $git, $mime) = @_;
+ $self->{mm}->mid_insert(mid_clean($mime->header('Message-ID')));
+}
+
+sub unindex_mm {
+ my ($self, $git, $mime) = @_;
+ $self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
+}
+
+sub index_both {
+ my ($self, $git, $mime) = @_;
+ index_blob($self, $git, $mime);
+ index_mm($self, $git, $mime);
+}
+
+sub unindex_both {
+ my ($self, $git, $mime) = @_;
+ unindex_blob($self, $git, $mime);
+ unindex_mm($self, $git, $mime);
}
sub do_cat_mail {
@@ -292,9 +310,11 @@ sub rlog {
die('open` '.join(' ', @cmd) . " pipe failed: $!\n");
while (my $line = <$log>) {
if ($line =~ /$addmsg/o) {
- $add_cb->($self, $git, $1);
+ my $mime = do_cat_mail($git, $1) or next;
+ $add_cb->($self, $git, $mime);
} elsif ($line =~ /$delmsg/o) {
- $del_cb->($self, $git, $1);
+ my $mime = do_cat_mail($git, $1) or next;
+ $del_cb->($self, $git, $mime);
} elsif ($line =~ /^commit ($h40)/o) {
$latest = $1;
}
@@ -308,17 +328,45 @@ sub _index_sync {
my ($self, $head) = @_;
my $db = $self->{xdb};
$head ||= 'HEAD';
+ my $mm = $self->{mm} = eval {
+ require PublicInbox::Msgmap;
+ PublicInbox::Msgmap->new($self->{git_dir}, 1);
+ };
$db->begin_transaction;
- eval {
- my $latest = $db->get_metadata('last_commit');
- my $range = $latest eq '' ? $head : "$latest..$head";
- $latest = $self->rlog($range, *index_blob, *unindex_blob);
- $db->set_metadata('last_commit', $latest) if defined $latest;
- };
+ my $lx = $db->get_metadata('last_commit');
+ my $range = $lx eq '' ? $head : "$lx..$head";
+ if ($mm) {
+ $mm->{dbh}->begin_work;
+ my $lm = $mm->last_commit || '';
+ if ($lm eq $lx) {
+ # Common case is the indexes are synced,
+ # we only need to run git-log once:
+ $lx = $self->rlog($range, *index_both, *unindex_both);
+ $mm->{dbh}->commit;
+ if (defined $lx) {
+ $db->set_metadata('last_commit', $lx);
+ $mm->last_commit($lx);
+ }
+ } else {
+ # dumb case, msgmap and xapian are out-of-sync
+ # do not care for performance:
+ my $r = $lm eq '' ? $head : "$lm..$head";
+ $lm = $self->rlog($r, *index_mm, *unindex_mm);
+ $mm->{dbh}->commit;
+ $mm->last_commit($lm) if defined $lm;
+
+ goto xapian_only;
+ }
+ } else {
+ # user didn't install DBD::SQLite and DBI
+xapian_only:
+ $lx = $self->rlog($range, *index_blob, *unindex_blob);
+ $db->set_metadata('last_commit', $lx) if defined $lx;
+ }
if ($@) {
- warn "indexing failed: $@\n";
$db->cancel_transaction;
+ $mm->{dbh}->rollback if $mm;
} else {
$db->commit_transaction;
}
--
EW
^ permalink raw reply related [relevance 7%]
* [PATCH 0/5] introduce SQLite message map
@ 2015-09-15 1:07 6% Eric Wong
2015-09-15 1:08 7% ` [PATCH 3/5] searchidx: sync Msgmap database along with Xapian Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2015-09-15 1:07 UTC (permalink / raw)
To: meta
This will make it easier for users to deal with truncated URLs
from copy-and-paste errors.
Eric Wong (5):
msgmap: add message mapping via SQLite
searchidx: hoist out rlog code
searchidx: sync Msgmap database along with Xapian
extmsg: wire up to use msgmap for prefixes
INSTALL: document DBD::SQLite and DBI dependencies
INSTALL | 4 +-
lib/PublicInbox/ExtMsg.pm | 29 +++++----
lib/PublicInbox/Msgmap.pm | 137 +++++++++++++++++++++++++++++++++++++++++++
lib/PublicInbox/Search.pm | 9 ---
lib/PublicInbox/SearchIdx.pm | 129 ++++++++++++++++++++++++++++------------
t/msgmap.t | 53 +++++++++++++++++
6 files changed, 300 insertions(+), 61 deletions(-)
^ permalink raw reply [relevance 6%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-09-15 1:07 6% [PATCH 0/5] introduce SQLite message map Eric Wong
2015-09-15 1:08 7% ` [PATCH 3/5] searchidx: sync Msgmap database along with Xapian Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).