user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 3/5] searchidx: sync Msgmap database along with Xapian
  2015-09-15  1:07  6% [PATCH 0/5] introduce SQLite message map Eric Wong
@ 2015-09-15  1:08  7% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2015-09-15  1:08 UTC (permalink / raw)
  To: meta

We can avoid duplicating work of extracting messages from git if we
tie this to Xapian.  Of course, this ties the two features together,
but it's probably reasonable to expect that anybody who wants to use
public-inbox to serve messages to front-end users will have both.
---
 lib/PublicInbox/SearchIdx.pm | 84 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 18 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 44f6bc1..351450c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -247,18 +247,36 @@ sub link_message_to_parents {
 }
 
 sub index_blob {
-	my ($self, $git, $blob) = @_;
-	my $mime = do_cat_mail($git, $blob) or return;
-	eval { $self->add_message($mime) };
-	warn "W: index_blob $blob: $@\n" if $@;
+	my ($self, $git, $mime) = @_;
+	$self->add_message($mime);
 }
 
 sub unindex_blob {
-	my ($self, $git, $blob) = @_;
-	my $mime = do_cat_mail($git, $blob) or return;
-	my $mid = $mime->header('Message-ID');
-	eval { $self->remove_message($mid) } if defined $mid;
-	warn "W: unindex_blob $blob: $@\n" if $@;
+	my ($self, $git, $mime) = @_;
+	my $mid = mid_clean($mime->header('Message-ID'));
+	$self->remove_message($mid) if defined $mid;
+}
+
+sub index_mm {
+	my ($self, $git, $mime) = @_;
+	$self->{mm}->mid_insert(mid_clean($mime->header('Message-ID')));
+}
+
+sub unindex_mm {
+	my ($self, $git, $mime) = @_;
+	$self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
+}
+
+sub index_both {
+	my ($self, $git, $mime) = @_;
+	index_blob($self, $git, $mime);
+	index_mm($self, $git, $mime);
+}
+
+sub unindex_both {
+	my ($self, $git, $mime) = @_;
+	unindex_blob($self, $git, $mime);
+	unindex_mm($self, $git, $mime);
 }
 
 sub do_cat_mail {
@@ -292,9 +310,11 @@ sub rlog {
 		die('open` '.join(' ', @cmd) . " pipe failed: $!\n");
 	while (my $line = <$log>) {
 		if ($line =~ /$addmsg/o) {
-			$add_cb->($self, $git, $1);
+			my $mime = do_cat_mail($git, $1) or next;
+			$add_cb->($self, $git, $mime);
 		} elsif ($line =~ /$delmsg/o) {
-			$del_cb->($self, $git, $1);
+			my $mime = do_cat_mail($git, $1) or next;
+			$del_cb->($self, $git, $mime);
 		} elsif ($line =~ /^commit ($h40)/o) {
 			$latest = $1;
 		}
@@ -308,17 +328,45 @@ sub _index_sync {
 	my ($self, $head) = @_;
 	my $db = $self->{xdb};
 	$head ||= 'HEAD';
+	my $mm = $self->{mm} = eval {
+		require PublicInbox::Msgmap;
+		PublicInbox::Msgmap->new($self->{git_dir}, 1);
+	};
 
 	$db->begin_transaction;
-	eval {
-		my $latest = $db->get_metadata('last_commit');
-		my $range = $latest eq '' ? $head : "$latest..$head";
-		$latest = $self->rlog($range, *index_blob, *unindex_blob);
-		$db->set_metadata('last_commit', $latest) if defined $latest;
-	};
+	my $lx = $db->get_metadata('last_commit');
+	my $range = $lx eq '' ? $head : "$lx..$head";
+	if ($mm) {
+		$mm->{dbh}->begin_work;
+		my $lm = $mm->last_commit || '';
+		if ($lm eq $lx) {
+			# Common case is the indexes are synced,
+			# we only need to run git-log once:
+			$lx = $self->rlog($range, *index_both, *unindex_both);
+			$mm->{dbh}->commit;
+			if (defined $lx) {
+				$db->set_metadata('last_commit', $lx);
+				$mm->last_commit($lx);
+			}
+		} else {
+			# dumb case, msgmap and xapian are out-of-sync
+			# do not care for performance:
+			my $r = $lm eq '' ? $head : "$lm..$head";
+			$lm = $self->rlog($r, *index_mm, *unindex_mm);
+			$mm->{dbh}->commit;
+			$mm->last_commit($lm) if defined $lm;
+
+			goto xapian_only;
+		}
+	} else {
+		# user didn't install DBD::SQLite and DBI
+xapian_only:
+		$lx = $self->rlog($range, *index_blob, *unindex_blob);
+		$db->set_metadata('last_commit', $lx) if defined $lx;
+	}
 	if ($@) {
-		warn "indexing failed: $@\n";
 		$db->cancel_transaction;
+		$mm->{dbh}->rollback if $mm;
 	} else {
 		$db->commit_transaction;
 	}
-- 
EW


^ permalink raw reply related	[relevance 7%]

* [PATCH 0/5] introduce SQLite message map
@ 2015-09-15  1:07  6% Eric Wong
  2015-09-15  1:08  7% ` [PATCH 3/5] searchidx: sync Msgmap database along with Xapian Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2015-09-15  1:07 UTC (permalink / raw)
  To: meta

This will make it easier for users to deal with truncated URLs
from copy-and-paste errors.

Eric Wong (5):
      msgmap: add message mapping via SQLite
      searchidx: hoist out rlog code
      searchidx: sync Msgmap database along with Xapian
      extmsg: wire up to use msgmap for prefixes
      INSTALL: document DBD::SQLite and DBI dependencies

 INSTALL                      |   4 +-
 lib/PublicInbox/ExtMsg.pm    |  29 +++++----
 lib/PublicInbox/Msgmap.pm    | 137 +++++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/Search.pm    |   9 ---
 lib/PublicInbox/SearchIdx.pm | 129 ++++++++++++++++++++++++++++------------
 t/msgmap.t                   |  53 +++++++++++++++++
 6 files changed, 300 insertions(+), 61 deletions(-)


^ permalink raw reply	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-09-15  1:07  6% [PATCH 0/5] introduce SQLite message map Eric Wong
2015-09-15  1:08  7% ` [PATCH 3/5] searchidx: sync Msgmap database along with Xapian Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).