about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-23 07:06:01 +0000
committerEric Wong <e@80x24.org>2020-11-24 05:03:55 +0000
commitbe688d5b00bb77c6601b3ab680403ecd71ac4871 (patch)
treecc77f8b9a7dd6f5a357ba47e0b913302547b7be6
parent616a08457175b442d4caeb67f9ccd9d3e69f50f5 (diff)
downloadpublic-inbox-be688d5b00bb77c6601b3ab680403ecd71ac4871.tar.gz
For a mirror of lore.kernel.org with >140 inboxes, this speeds
up manifest.js.gz generation from ~1s to 40ms on my HW.  This
is still unacceptable when dealing with thousands of inboxes,
but gets us closer to where we need to be.
-rw-r--r--lib/PublicInbox/Config.pm3
-rw-r--r--lib/PublicInbox/Inbox.pm2
-rw-r--r--lib/PublicInbox/InboxWritable.pm2
-rw-r--r--lib/PublicInbox/ManifestJsGz.pm39
-rw-r--r--lib/PublicInbox/MiscSearch.pm19
5 files changed, 56 insertions, 9 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 039eb445..251008a3 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -94,6 +94,9 @@ sub lookup_ei {
         $self->{-ei_by_name}->{$name} //= _fill_ei($self, "extindex.$name");
 }
 
+# special case for [extindex "all"]
+sub ALL { lookup_ei($_[0], 'all') }
+
 sub each_inbox {
         my ($self, $cb, @arg) = @_;
         # may auto-vivify if config file is non-existent:
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index a1a072ad..5a22e40d 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -429,4 +429,6 @@ sub on_unlock {
 
 sub uidvalidity  { $_[0]->{uidvalidity} //= $_[0]->mm->created_at }
 
+sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }
+
 1;
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index d3c255c7..e97c7e2d 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -319,6 +319,4 @@ sub git_dir_latest {
         $latest;
 }
 
-sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }
-
 1;
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 3b436827..2c4a231d 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -21,6 +21,14 @@ sub url_regexp {
         $ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
 }
 
+sub inject_entry ($$$;$) {
+        my ($ctx, $url_path, $ent, $git_dir) = @_;
+        $ctx->{-abs2urlpath}->{$git_dir // delete $ent->{git_dir}} = $url_path;
+        my $modified = $ent->{modified};
+        $ctx->{-mtime} = $modified if $modified > ($ctx->{-mtime} // 0);
+        $ctx->{manifest}->{$url_path} = $ent;
+}
+
 sub manifest_add ($$;$$) {
         my ($ctx, $ibx, $epoch, $default_desc) = @_;
         my $url_path = "/$ibx->{name}";
@@ -32,15 +40,10 @@ sub manifest_add ($$;$$) {
                 $git = $ibx->git;
         }
         my $ent = $git->manifest_entry($epoch, $default_desc) or return;
-        $ctx->{-abs2urlpath}->{$git->{git_dir}} = $url_path;
-        my $modified = $ent->{modified};
-        if ($modified > ($ctx->{-mtime} // 0)) {
-                $ctx->{-mtime} = $modified;
-        }
-        $ctx->{manifest}->{$url_path} = $ent;
+        inject_entry($ctx, $url_path, $ent, $git->{git_dir});
 }
 
-sub ibx_entry {
+sub slow_manifest_add ($$) {
         my ($ctx, $ibx) = @_;
         eval {
                 if (defined(my $max = $ibx->max_git_epoch)) {
@@ -52,6 +55,28 @@ sub ibx_entry {
                         manifest_add($ctx, $ibx);
                 }
         };
+}
+
+sub eidx_manifest_add ($$$) {
+        my ($ctx, $ALL, $ibx) = @_;
+        if (my $data = $ALL->misc->inbox_data($ibx)) {
+                $data = $json->decode($data);
+                while (my ($url_path, $ent) = each %$data) {
+                        inject_entry($ctx, $url_path, $ent);
+                }
+        } else {
+                warn "E: `${\$ibx->eidx_key}' not indexed by $ALL->{topdir}\n";
+        }
+}
+
+sub ibx_entry {
+        my ($ctx, $ibx) = @_;
+        my $ALL = $ctx->{www}->{pi_config}->ALL;
+        if ($ALL) {
+                eidx_manifest_add($ctx, $ALL, $ibx);
+        } else {
+                slow_manifest_add($ctx, $ibx);
+        }
         warn "E: $@" if $@;
 }
 
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index 8beb8349..5a44d751 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -76,4 +76,23 @@ sub mset {
         retry_reopen($self, \&misc_enquire_once, [ $self, $qr, $opt ]);
 }
 
+sub ibx_data_once {
+        my ($self, $ibx) = @{$_[0]};
+        my $xdb = $self->{xdb};
+        my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private
+        my $head = $xdb->postlist_begin('Q'.$eidx_key);
+        my $tail = $xdb->postlist_end('Q'.$eidx_key);
+        if ($head != $tail) {
+                my $doc = $xdb->get_document($head->get_docid);
+                $doc->get_data;
+        } else {
+                undef;
+        }
+}
+
+sub inbox_data {
+        my ($self, $ibx) = @_;
+        retry_reopen($self, \&ibx_data_once, [ $self, $ibx ]);
+}
+
 1;