about summary refs log tree commit homepage
path: root/lib/PublicInbox/Inbox.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-08-02 10:02:54 +0000
committerEric Wong <e@80x24.org>2016-08-04 00:04:53 +0000
commit95acd5901491e4f333f5d2bbeed6fb5e6b53e07c (patch)
treef8e53e9f38ea3710499ca7799936bfd1af352601 /lib/PublicInbox/Inbox.pm
parent4caa17ef5aa8554d78b8c013096f12429c58e050 (diff)
downloadpublic-inbox-95acd5901491e4f333f5d2bbeed6fb5e6b53e07c.tar.gz
Doing git tree lookups based on the SHA-1 of the Message-ID
is expensive as trees get larger, instead, use the SHA-1
object ID directly.  This drastically reduces the amount
of time spent in the "git cat-file --batch" process for
fetching the /$INBOX/all.mbox.gz endpoint on the ~800MB
git@vger.kernel.org mirror

This retains backwards compatibility and allows existing
indices to be transparently upgraded without performance
degradation.
Diffstat (limited to 'lib/PublicInbox/Inbox.pm')
-rw-r--r--lib/PublicInbox/Inbox.pm12
1 files changed, 12 insertions, 0 deletions
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 4fbbb522..e552cd4f 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -162,6 +162,18 @@ sub msg_by_path ($$;$) {
         $str;
 }
 
+sub msg_by_smsg ($$;$) {
+        my ($self, $smsg, $ref) = @_;
+
+        # backwards compat to fallback to msg_by_mid
+        # TODO: remove if we bump SCHEMA_VERSION in Search.pm:
+        defined(my $blob = $smsg->blob) or return msg_by_mid($self, $smsg->mid);
+
+        my $str = git($self)->cat_file($blob, $ref);
+        $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str;
+        $str;
+}
+
 sub path_check {
         my ($self, $path) = @_;
         git($self)->check('HEAD:'.$path);