user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 7/7] implement external Message-ID finder
Date: Wed,  2 Sep 2015 06:59:39 +0000	[thread overview]
Message-ID: <1441177179-16628-8-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1441177179-16628-1-git-send-email-e@80x24.org>

Currently, this looks at other public-inbox configurations
served in the same process.  In the future, it will generate
links to other Message-ID lookup endpoints.
---
 lib/PublicInbox/ExtMsg.pm | 92 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/View.pm   | 14 ++++----
 lib/PublicInbox/WWW.pm    | 15 +++++---
 public-inbox.cgi          |  1 +
 4 files changed, 110 insertions(+), 12 deletions(-)
 create mode 100644 lib/PublicInbox/ExtMsg.pm

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
new file mode 100644
index 0000000..1c0887c
--- /dev/null
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -0,0 +1,92 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+package PublicInbox::ExtMsg;
+use strict;
+use warnings;
+use URI::Escape qw(uri_escape_utf8);
+use PublicInbox::Hval;
+use PublicInbox::MID qw/mid_compress mid2path/;
+
+sub ext_msg {
+	my ($ctx) = @_;
+	my $pi_config = $ctx->{pi_config};
+	my $listname = $ctx->{listname};
+	my $mid = $ctx->{mid};
+	my $cmid = mid_compress($mid);
+
+	eval { require PublicInbox::Search };
+	my $have_xap = $@ ? 0 : 1;
+	my @nox;
+
+	foreach my $k (keys %$pi_config) {
+		$k =~ /\Apublicinbox\.([A-Z0-9a-z-]+)\.url\z/ or next;
+		my $list = $1;
+		next if $list eq $listname;
+
+		my $git_dir = $pi_config->{"publicinbox.$list.mainrepo"};
+		defined $git_dir or next;
+
+		my $url = $pi_config->{"publicinbox.$list.url"};
+		defined $url or next;
+
+		$url =~ s!/+\z!!;
+
+		# try to find the URL with Xapian to avoid forking
+		if ($have_xap) {
+			my $doc_id = eval {
+				my $s = PublicInbox::Search->new($git_dir);
+				$s->find_unique_doc_id('mid', $cmid);
+			};
+			if ($@) {
+				# xapian not configured for this repo
+			} else {
+				# maybe we found it!
+				return r302($url, $cmid) if (defined $doc_id);
+
+				# no point in trying the fork fallback if we
+				# know Xapian is up-to-date but missing the
+				# message in the current repo
+				next;
+			}
+		}
+
+		# queue up for forking after we've tried Xapian on all of them
+		push @nox, { git_dir => $git_dir, url => $url };
+	}
+
+	# Xapian not installed or configured for some repos
+	my $path = "HEAD:" . mid2path($cmid);
+
+	foreach my $n (@nox) {
+		my @cmd = ('git', "--git-dir=$n->{git_dir}", 'cat-file',
+			   '-t', $path);
+		my $pid = open my $fh, '-|';
+		defined $pid or die "fork failed: $!\n";
+
+		if ($pid == 0) {
+			open STDERR, '>', '/dev/null'; # ignore errors
+			exec @cmd or die "exec failed: $!\n";
+		} else {
+			my $type = eval { local $/; <$fh> };
+			close $fh;
+			if ($? == 0 && $type eq "blob\n") {
+				return r302($n->{url}, $cmid);
+			}
+		}
+	}
+
+	# Fall back to external repos
+
+	[404, ['Content-Type'=>'text/plain'], ['Not found']];
+}
+
+# Redirect to another public-inbox which is mapped by $pi_config
+sub r302 {
+	my ($url, $mid) = @_;
+	$url .= '/' . uri_escape_utf8($mid) . '/';
+	[ 302,
+	  [ 'Location' => $url, 'Content-Type' => 'text/plain' ],
+	  [ "Redirecting to\n$url\n" ] ]
+}
+
+1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1528a87..e18895f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -164,7 +164,7 @@ sub emit_thread_html {
 	my $res = $srch->get_thread($mid);
 	my $msgs = load_results($res);
 	my $nr = scalar @$msgs;
-	return missing_thread($cb) if $nr == 0;
+	return missing_thread($cb, $ctx) if $nr == 0;
 	my $flat = $ctx->{flat};
 	my $orig_cb = $cb;
 	my $seen = {};
@@ -189,7 +189,7 @@ sub emit_thread_html {
 
 	# there could be a race due to a message being deleted in git
 	# but still being in the Xapian index:
-	return missing_thread($cb) if ($orig_cb eq $cb);
+	return missing_thread($cb, $ctx) if ($orig_cb eq $cb);
 
 	my $final_anchor = $state->{anchor_idx};
 	my $next = "<a\nid=\"s$final_anchor\">";
@@ -637,12 +637,10 @@ sub thread_results {
 }
 
 sub missing_thread {
-	my ($cb) = @_;
-	my $title = 'Thread does not exist';
-	$cb->([404, ['Content-Type' => 'text/html']])->write(<<EOF);
-<html><head><title>$title</title></head><body><pre>$title
-<a href="../../">Return to index</a></pre></body></html>
-EOF
+	my ($cb, $ctx) = @_;
+	require PublicInbox::ExtMsg;
+
+	$cb->(PublicInbox::ExtMsg::ext_msg($ctx))
 }
 
 sub _msg_date {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 9ae7f7b..16fd16a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -88,7 +88,14 @@ sub preload {
 
 # private functions below
 
-sub r404 { r(404, 'Not Found') }
+sub r404 {
+	my ($ctx) = @_;
+	if ($ctx && $ctx->{mid}) {
+		require PublicInbox::ExtMsg;
+		return PublicInbox::ExtMsg::ext_msg($ctx);
+	}
+	r(404, 'Not Found');
+}
 
 # simple response for errors
 sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
@@ -151,7 +158,7 @@ sub mid2blob {
 # /$LISTNAME/$MESSAGE_ID/raw                    -> raw mbox
 sub get_mid_txt {
 	my ($ctx) = @_;
-	my $x = mid2blob($ctx) or return r404();
+	my $x = mid2blob($ctx) or return r404($ctx);
 	require PublicInbox::Mbox;
 	PublicInbox::Mbox::emit1($x);
 }
@@ -159,7 +166,7 @@ sub get_mid_txt {
 # /$LISTNAME/$MESSAGE_ID/                   -> HTML content (short quotes)
 sub get_mid_html {
 	my ($ctx) = @_;
-	my $x = mid2blob($ctx) or return r404();
+	my $x = mid2blob($ctx) or return r404($ctx);
 
 	require PublicInbox::View;
 	my $foot = footer($ctx);
@@ -173,7 +180,7 @@ sub get_mid_html {
 # /$LISTNAME/$MESSAGE_ID/f/                   -> HTML content (fullquotes)
 sub get_full_html {
 	my ($ctx) = @_;
-	my $x = mid2blob($ctx) or return r404();
+	my $x = mid2blob($ctx) or return r404($ctx);
 
 	require PublicInbox::View;
 	my $foot = footer($ctx);
diff --git a/public-inbox.cgi b/public-inbox.cgi
index 75d510c..1fcc04f 100755
--- a/public-inbox.cgi
+++ b/public-inbox.cgi
@@ -18,6 +18,7 @@ BEGIN {
 	%HTTP_CODES = (
 		200 => 'OK',
 		301 => 'Moved Permanently',
+		302 => 'Found',
 		404 => 'Not Found',
 		405 => 'Method Not Allowed',
 		501 => 'Not Implemented',
-- 
EW


      parent reply	other threads:[~2015-09-02  6:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-02  6:59 [PATCH 0/7] improved thread views and 404 reductions Eric Wong
2015-09-02  6:59 ` [PATCH 1/7] view: close possible race condition in thread view Eric Wong
2015-09-02  6:59 ` [PATCH 2/7] view: optional flat view for recent messages Eric Wong
2015-09-02  6:59 ` [PATCH 3/7] view: account for missing In-Reply-To header Eric Wong
2015-09-02  6:59 ` [PATCH 4/7] view: simplify parent anchoring code Eric Wong
2015-09-02  6:59 ` [PATCH 5/7] view: pre-anchor entries for flat view Eric Wong
2015-09-02  6:59 ` [PATCH 6/7] view: avoid links to unknown compressed Message-IDs Eric Wong
2015-09-02  6:59 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1441177179-16628-8-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).