diff options
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/Address.pm | 25 | ||||
-rw-r--r-- | lib/PublicInbox/Config.pm | 59 | ||||
-rw-r--r-- | lib/PublicInbox/Daemon.pm | 36 | ||||
-rw-r--r-- | lib/PublicInbox/EvCleanup.pm | 41 | ||||
-rw-r--r-- | lib/PublicInbox/Feed.pm | 30 | ||||
-rw-r--r-- | lib/PublicInbox/GetlineBody.pm | 35 | ||||
-rw-r--r-- | lib/PublicInbox/Git.pm | 3 | ||||
-rw-r--r-- | lib/PublicInbox/GitHTTPBackend.pm | 293 | ||||
-rw-r--r-- | lib/PublicInbox/HTTP.pm | 164 | ||||
-rw-r--r-- | lib/PublicInbox/HTTPD.pm | 5 | ||||
-rw-r--r-- | lib/PublicInbox/HTTPD/Async.pm | 48 | ||||
-rw-r--r-- | lib/PublicInbox/Import.pm | 39 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 17 | ||||
-rw-r--r-- | lib/PublicInbox/MDA.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/Mbox.pm | 157 | ||||
-rw-r--r-- | lib/PublicInbox/NNTP.pm | 47 | ||||
-rw-r--r-- | lib/PublicInbox/NNTPD.pm | 27 | ||||
-rw-r--r-- | lib/PublicInbox/NewsGroup.pm | 84 | ||||
-rw-r--r-- | lib/PublicInbox/NewsWWW.pm | 41 | ||||
-rw-r--r-- | lib/PublicInbox/Qspawn.pm | 52 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/SearchMsg.pm | 12 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/Spawn.pm | 1 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 67 | ||||
-rw-r--r-- | lib/PublicInbox/WWW.pm | 58 |
26 files changed, 675 insertions, 691 deletions
diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm new file mode 100644 index 00000000..ef4cbdc6 --- /dev/null +++ b/lib/PublicInbox/Address.pm @@ -0,0 +1,25 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::Address; +use strict; +use warnings; + +# very loose regexes, here. We don't need RFC-compliance, +# just enough to make thing sanely displayable and pass to git + +sub emails { ($_[0] =~ /([^<\s]+\@[^>\s]+)/g) } + +sub from_name { + my ($val) = @_; + my $name = $val; + $name =~ s/\s*\S+\@\S+\s*\z//; + if ($name !~ /\S/ || $name =~ /[<>]/) { # git does not like [<>] + ($name) = emails($val); + $name =~ s/\@.*//; + } + $name =~ tr/\r\n\t/ /; + $name =~ s/\A\s*//; + $name; +} + +1; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index b5f0fcb1..a8c5105e 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -5,9 +5,8 @@ package PublicInbox::Config; use strict; use warnings; -use base qw/Exporter/; -our @EXPORT_OK = qw/try_cat/; require PublicInbox::Inbox; +use PublicInbox::Spawn qw(popen_rd); use File::Path::Expand qw/expand_filename/; # returns key-value pairs of config directives in a hash @@ -21,6 +20,7 @@ sub new { # caches $self->{-by_addr} ||= {}; $self->{-by_name} ||= {}; + $self->{-by_newsgroup} ||= {}; $self; } @@ -55,7 +55,25 @@ sub lookup_name { my ($self, $name) = @_; my $rv = $self->{-by_name}->{$name}; return $rv if $rv; - $self->{-by_name}->{$name} = _fill($self, "publicinbox.$name"); + $rv = _fill($self, "publicinbox.$name") or return; +} + +sub lookup_newsgroup { + my ($self, $ng) = @_; + $ng = lc($ng); + my $rv = $self->{-by_newsgroup}->{$ng}; + return $rv if $rv; + + foreach my $k (keys %$self) { + $k =~ /\A(publicinbox\.[\w-]+)\.newsgroup\z/ or next; + my $v = $self->{$k}; + my $pfx = $1; + if ($v eq $ng) { + $rv = _fill($self, $pfx); + return $rv; + } + } + undef; } sub get { @@ -77,9 +95,9 @@ sub git_config_dump { my ($in, $out); my @cmd = (qw/git config/, "--file=$file", '-l'); my $cmd = join(' ', @cmd); - my $pid = open(my $fh, '-|', @cmd); - defined $pid or die "$cmd failed: $!"; + my $fh = popen_rd(\@cmd); my %rv; + local $/ = "\n"; foreach my $line (<$fh>) { chomp $line; my ($k, $v) = split(/=/, $line, 2); @@ -95,42 +113,35 @@ sub git_config_dump { $rv{$k} = $v; } } - close $fh or die "failed to close ($cmd) pipe: $!"; - $? and warn "$$ $cmd exited with: ($pid) $?"; + close $fh or die "failed to close ($cmd) pipe: $?"; \%rv; } -sub try_cat { - my ($path) = @_; - my $rv; - if (open(my $fh, '<', $path)) { - local $/; - $rv = <$fh>; - } - $rv; -} - sub _fill { my ($self, $pfx) = @_; my $rv = {}; - foreach my $k (qw(mainrepo address filter url)) { + foreach my $k (qw(mainrepo address filter url newsgroup)) { my $v = $self->{"$pfx.$k"}; $rv->{$k} = $v if defined $v; } - my $inbox = $pfx; - $inbox =~ s/\Apublicinbox\.//; - $rv->{name} = $inbox; + return unless $rv->{mainrepo}; + my $name = $pfx; + $name =~ s/\Apublicinbox\.//; + $rv->{name} = $name; my $v = $rv->{address} ||= 'public-inbox@example.com'; - $rv->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v; + my $p = $rv->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v; + $rv->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost'; $rv = PublicInbox::Inbox->new($rv); if (ref($v) eq 'ARRAY') { $self->{-by_addr}->{lc($_)} = $rv foreach @$v; } else { $self->{-by_addr}->{lc($v)} = $rv; } - $rv; + if (my $ng = $rv->{newsgroup}) { + $self->{-by_newsgroup}->{$ng} = $rv; + } + $self->{-by_name}->{$name} = $rv; } - 1; diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index 8de7ff24..b64ec874 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -141,15 +141,15 @@ sub daemonize () { }; if ($daemonize) { - my ($pid, $err) = do_fork(); - die "could not fork: $err\n" unless defined $pid; + my $pid = fork; + die "could not fork: $!\n" unless defined $pid; exit if $pid; open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n"; open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; POSIX::setsid(); - ($pid, $err) = do_fork(); - die "could not fork: $err\n" unless defined $pid; + $pid = fork; + die "could not fork: $!\n" unless defined $pid; exit if $pid; } if (defined $pid_file) { @@ -278,9 +278,9 @@ sub upgrade () { $pid_file .= '.oldbin'; write_pid($pid_file); } - my ($pid, $err) = do_fork(); + my $pid = fork; unless (defined $pid) { - warn "fork failed: $err\n"; + warn "fork failed: $!\n"; return; } if ($pid == 0) { @@ -305,17 +305,6 @@ sub kill_workers ($) { } } -sub do_fork () { - my $new = POSIX::SigSet->new; - $new->fillset; - my $old = POSIX::SigSet->new; - POSIX::sigprocmask(&POSIX::SIG_BLOCK, $new, $old) or die "SIG_BLOCK: $!"; - my $pid = fork; - my $err = $!; - POSIX::sigprocmask(&POSIX::SIG_SETMASK, $old) or die "SIG_SETMASK: $!"; - ($pid, $err); -} - sub upgrade_aborted ($) { my ($p) = @_; warn "reexec PID($p) died with: $?\n"; @@ -350,6 +339,7 @@ sub unlink_pid_file_safe_ish ($$) { return unless defined $unlink_pid && $unlink_pid == $$; open my $fh, '<', $file or return; + local $/ = "\n"; defined(my $read_pid = <$fh>) or return; chomp $read_pid; if ($read_pid == $unlink_pid) { @@ -385,7 +375,13 @@ sub master_loop { exit if $quit++; kill_workers($s); } elsif ($s eq 'WINCH') { - $worker_processes = 0; + if (-t STDIN || -t STDOUT || -t STDERR) { + warn +"ignoring SIGWINCH while connected to terminal\n"; + $SIG{WINCH} = 'IGNORE'; + } else { + $worker_processes = 0; + } } elsif ($s eq 'HUP') { $worker_processes = $set_workers; kill_workers($s); @@ -417,9 +413,9 @@ sub master_loop { $n = $worker_processes; } foreach my $i ($n..($worker_processes - 1)) { - my ($pid, $err) = do_fork(); + my $pid = fork; if (!defined $pid) { - warn "failed to fork worker[$i]: $err\n"; + warn "failed to fork worker[$i]: $!\n"; } elsif ($pid == 0) { $set_user->() if $set_user; return $p0; # run normal work code diff --git a/lib/PublicInbox/EvCleanup.pm b/lib/PublicInbox/EvCleanup.pm new file mode 100644 index 00000000..5efb0930 --- /dev/null +++ b/lib/PublicInbox/EvCleanup.pm @@ -0,0 +1,41 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# event cleanups (currently for Danga::Socket) +package PublicInbox::EvCleanup; +use strict; +use warnings; + +my $asapq = { queue => [], timer => undef }; +my $laterq = { queue => [], timer => undef }; + +sub _run_all ($) { + my ($q) = @_; + + my $run = $q->{queue}; + $q->{queue} = []; + $q->{timer} = undef; + $_->() foreach @$run; +} + +sub _run_asap () { _run_all($asapq) } +sub _run_later () { _run_all($laterq) } + +sub asap ($) { + my ($cb) = @_; + push @{$asapq->{queue}}, $cb; + $asapq->{timer} ||= Danga::Socket->AddTimer(0, *_run_asap); +} + +sub later ($) { + my ($cb) = @_; + push @{$laterq->{queue}}, $cb; + $laterq->{timer} ||= Danga::Socket->AddTimer(60, *_run_later); +} + +END { + _run_asap(); + _run_later(); +} + +1; diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index e2df97b1..07774cbf 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -5,13 +5,13 @@ package PublicInbox::Feed; use strict; use warnings; -use Email::Address; use Email::MIME; use Date::Parse qw(strptime); use PublicInbox::Hval qw/ascii_html/; use PublicInbox::Git; use PublicInbox::View; use PublicInbox::MID qw/mid_clean mid2path/; +use PublicInbox::Address; use POSIX qw/strftime/; use constant { DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # Atom standard @@ -86,7 +86,6 @@ sub _no_thread { sub end_feed { my ($fh) = @_; - Email::Address->purge_cache; $fh->write('</feed>'); $fh->close; } @@ -141,8 +140,8 @@ sub emit_html_index { # if the 'r' query parameter is given, it is a legacy permalink # which we must continue supporting: - my $cgi = $ctx->{cgi}; - if ($cgi && !$cgi->param('r') && $srch) { + my $qp = $ctx->{qp}; + if ($qp && !$qp->{r} && $srch) { $state->{srch} = $srch; $last = PublicInbox::View::emit_index_topics($state); $param = 'o'; @@ -150,7 +149,7 @@ sub emit_html_index { $last = emit_index_nosrch($ctx, $state); $param = 'r'; } - $footer = nav_footer($cgi, $last, $feed_opts, $state, $param); + $footer = nav_footer($ctx, $last, $feed_opts, $state, $param); if ($footer) { my $list_footer = $ctx->{footer}; $footer .= "\n\n" . $list_footer if $list_footer; @@ -171,14 +170,13 @@ sub emit_index_nosrch { PublicInbox::View::index_entry($mime, 0, $state); 1; }); - Email::Address->purge_cache; $last; } sub nav_footer { - my ($cgi, $last, $feed_opts, $state, $param) = @_; - $cgi or return ''; - my $old_r = $cgi->param($param); + my ($ctx, $last, $feed_opts, $state, $param) = @_; + my $qp = $ctx->{qp} or return ''; + my $old_r = $qp->{$param}; my $head = ' '; my $next = ' '; my $first = $state->{first}; @@ -188,7 +186,7 @@ sub nav_footer { $next = qq!<a\nhref="?$param=$last"\nrel=next>next</a>!; } if ($old_r) { - $head = $cgi->path_info; + $head = $ctx->{env}->{PATH_INFO}; $head = qq!<a\nhref="$head">head</a>!; } my $atom = "<a\nhref=\"$feed_opts->{atomurl}\">Atom feed</a>"; @@ -202,11 +200,11 @@ sub each_recent_blob { my $addmsg = qr!^:000000 100644 \S+ \S+ A\t(${hex}{2}/${hex}{38})$!; my $delmsg = qr!^:100644 000000 \S+ \S+ D\t(${hex}{2}/${hex}{38})$!; my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~\d+)?/; - my $cgi = $ctx->{cgi}; + my $qp = $ctx->{qp}; # revision ranges may be specified my $range = 'HEAD'; - my $r = $cgi->param('r') if $cgi; + my $r = $qp->{r} if $qp; if ($r && ($r =~ /\A(?:$refhex\.\.)?$refhex\z/o)) { $range = $r; } @@ -224,6 +222,7 @@ sub each_recent_blob { my $nr = 0; my ($cur_commit, $first_commit, $last_commit); my ($ts, $subj, $u); + local $/ = "\n"; while (defined(my $line = <$log>)) { if ($line =~ /$addmsg/o) { my $add = $1; @@ -244,6 +243,7 @@ sub each_recent_blob { } if ($last) { + local $/ = "\n"; while (my $line = <$log>) { if ($line =~ /^(${hex}{7,40})/o) { $last_commit = $1; @@ -328,9 +328,9 @@ sub add_to_feed { $title = title_tag($title); my $from = $header_obj->header('From') or return 0; - my @from = Email::Address->parse($from) or return 0; - my $name = ascii_html($from[0]->name); - my $email = $from[0]->address; + my ($email) = PublicInbox::Address::emails($from); + my $name = PublicInbox::Address::from_name($from); + $name = ascii_html($name); $email = ascii_html($email); if (delete $feed_opts->{emit_header}) { diff --git a/lib/PublicInbox/GetlineBody.pm b/lib/PublicInbox/GetlineBody.pm new file mode 100644 index 00000000..5f327828 --- /dev/null +++ b/lib/PublicInbox/GetlineBody.pm @@ -0,0 +1,35 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Wrap a pipe or file for PSGI streaming response bodies and calls the +# end callback when the object goes out-of-scope. +# This depends on rpipe being _blocking_ on getline. +package PublicInbox::GetlineBody; +use strict; +use warnings; + +sub new { + my ($class, $rpipe, $end, $buf) = @_; + bless { rpipe => $rpipe, end => $end, buf => $buf }, $class; +} + +# close should always be called after getline returns undef, +# but a client aborting a connection can ruin our day; so lets +# hope our underlying PSGI server does not leak references, here. +sub DESTROY { $_[0]->close } + +sub getline { + my ($self) = @_; + my $buf = delete $self->{buf}; # initial buffer + defined $buf ? $buf : $self->{rpipe}->getline; +} + +sub close { + my ($self) = @_; + my $rpipe = delete $self->{rpipe}; + close $rpipe if $rpipe; + my $end = delete $self->{end}; + $end->() if $end; +} + +1; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index d8211827..bc0e5064 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -41,6 +41,7 @@ sub cat_file { $self->{out}->print($obj, "\n") or fail($self, "write error: $!"); my $in = $self->{in}; + local $/ = "\n"; my $head = $in->getline; $head =~ / missing$/ and return undef; $head =~ /^[0-9a-f]{40} \S+ (\d+)$/ or @@ -90,6 +91,7 @@ sub check { my ($self, $obj) = @_; $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!"); + local $/ = "\n"; chomp(my $line = $self->{in_c}->getline); my ($hex, $type, $size) = split(' ', $line); return if $type eq 'missing'; @@ -120,6 +122,7 @@ sub popen { sub qx { my ($self, @cmd) = @_; my $fh = $self->popen(@cmd); + local $/ = "\n"; return <$fh> if wantarray; local $/; <$fh> diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index b58cc30f..9660d21e 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -8,16 +8,9 @@ use strict; use warnings; use Fcntl qw(:seek); use IO::File; -use PublicInbox::Spawn qw(spawn); use HTTP::Date qw(time2str); - -# TODO: make configurable, but keep in mind it's better to have -# multiple -httpd worker processes which are already scaled to -# the proper number of CPUs and memory. git-pack-objects(1) may -# also use threads and bust memory limits, too, so I recommend -# limiting threads to 1 (via `pack.threads` knob in git) for serving. -my $LIMIT = 1; -my $nr_running = 0; +use HTTP::Status qw(status_message); +use PublicInbox::Qspawn; # n.b. serving "description" and "cloneurl" should be innocuous enough to # not cause problems. serving "config" might... @@ -38,31 +31,26 @@ my @no_cache = ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT', 'Pragma', 'no-cache', 'Cache-Control', 'no-cache, max-age=0, must-revalidate'); -my $nextq; -sub do_next () { - my $q = $nextq; - $nextq = undef; - while (my $cb = shift @$q) { - $cb->(); # this may redefine nextq - } -} - -sub r ($) { - my ($s) = @_; - [ $s, [qw(Content-Type text/plain Content-Length 0), @no_cache ], [] ] +sub r ($;$) { + my ($code, $msg) = @_; + $msg ||= status_message($code); + my $len = length($msg); + [ $code, [qw(Content-Type text/plain Content-Length), $len, @no_cache], + [$msg] ] } sub serve { - my ($cgi, $git, $path) = @_; - return serve_dumb($cgi, $git, $path) if $nr_running >= $LIMIT; + my ($env, $git, $path) = @_; - my $service = $cgi->param('service') || ''; - if ($service =~ /\Agit-\w+-pack\z/ || $path =~ /\Agit-\w+-pack\z/) { - my $ok = serve_smart($cgi, $git, $path); + # Documentation/technical/http-protocol.txt in git.git + # requires one and exactly one query parameter: + if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ || + $path =~ /\Agit-\w+-pack\z/) { + my $ok = serve_smart($env, $git, $path); return $ok if $ok; } - serve_dumb($cgi, $git, $path); + serve_dumb($env, $git, $path); } sub err ($@) { @@ -77,7 +65,7 @@ sub drop_client ($) { } sub serve_dumb { - my ($cgi, $git, $path) = @_; + my ($env, $git, $path) = @_; my @h; my $type; @@ -96,71 +84,42 @@ sub serve_dumb { return r(404) unless -f $f && -r _; # just in case it's a FIFO :P my @st = stat(_); my $size = $st[7]; - my $env = $cgi->{env}; # TODO: If-Modified-Since and Last-Modified? open my $in, '<', $f or return r(404); my $len = $size; - my $n = 65536; # try to negotiate a big TCP window, first - my ($next, $fh); - my $cb = sub { - $n = $len if $len < $n; - my $r = sysread($in, my $buf, $n); - if (!defined $r) { - err($env, "$f read error: $!"); - drop_client($env); - } elsif ($r <= 0) { - err($env, "$f EOF with $len bytes left"); - drop_client($env); - } else { - $len -= $r; - $fh->write($buf); - if ($len == 0) { - $fh->close; - } elsif ($next) { - # avoid recursion in Danga::Socket::write - unless ($nextq) { - $nextq = []; - Danga::Socket->AddTimer(0, *do_next); - } - # avoid buffering too much in case we have - # slow clients: - $n = 8192; - push @$nextq, $next; - return; - } - } - # all done, cleanup references: - $fh = $next = undef; - }; - my $code = 200; push @h, 'Content-Type', $type; - my $range = $env->{HTTP_RANGE}; - if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { - ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); + if (($env->{HTTP_RANGE} || '') =~ /\bbytes=(\d*)-(\d*)\z/) { + ($code, $len) = prepare_range($env, $in, \@h, $1, $2, $size); if ($code == 416) { push @h, 'Content-Range', "bytes */$size"; return [ 416, \@h, [] ]; } } push @h, 'Content-Length', $len; - - sub { - my ($res) = @_; # Plack callback - $fh = $res->([ $code, \@h ]); - if (defined $env->{'pi-httpd.async'}) { - my $pi_http = $env->{'psgix.io'}; - $next = sub { $pi_http->write($cb) }; - $cb->(); # start it off! - } else { - $cb->() while $fh; - } - } + my $n = 65536; + [ $code, \@h, Plack::Util::inline_object(close => sub { close $in }, + getline => sub { + return if $len == 0; + $n = $len if $len < $n; + my $r = sysread($in, my $buf, $n); + if (!defined $r) { + err($env, "$f read error: $!"); + } elsif ($r <= 0) { + err($env, "$f EOF with $len bytes left"); + } else { + $len -= $r; + $n = 8192; + return $buf; + } + drop_client($env); + return; + })] } sub prepare_range { - my ($cgi, $in, $h, $beg, $end, $size) = @_; + my ($env, $in, $h, $beg, $end, $size) = @_; my $code = 200; my $len = $size; if ($beg eq '') { @@ -194,7 +153,7 @@ sub prepare_range { push @$h, "bytes $beg-$end/$size"; # FIXME: Plack::Middleware::Deflater bug? - $cgi->{env}->{'psgix.no-compress'} = 1; + $env->{'psgix.no-compress'} = 1; } } ($code, $len); @@ -202,23 +161,12 @@ sub prepare_range { # returns undef if 403 so it falls back to dumb HTTP sub serve_smart { - my ($cgi, $git, $path) = @_; - my $env = $cgi->{env}; - - my $input = $env->{'psgi.input'}; - my $buf; - my $in; - my $fd = eval { fileno($input) }; - if (defined $fd && $fd >= 0) { - $in = $input; - } else { + my ($env, $git, $path) = @_; + my $in = $env->{'psgi.input'}; + my $fd = eval { fileno($in) }; + unless (defined $fd && $fd >= 0) { $in = input_to_file($env) or return r(500); } - my ($rpipe, $wpipe); - unless (pipe($rpipe, $wpipe)) { - err($env, "error creating pipe: $! - going static"); - return; - } my %env = %ENV; # GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL # may be set in the server-process and are passed as-is @@ -234,101 +182,69 @@ sub serve_smart { my $git_dir = $git->{git_dir}; $env{GIT_HTTP_EXPORT_ALL} = '1'; $env{PATH_TRANSLATED} = "$git_dir/$path"; - my %rdr = ( 0 => fileno($in), 1 => fileno($wpipe) ); - my $pid = spawn([qw(git http-backend)], \%env, \%rdr); - unless (defined $pid) { - err($env, "error spawning: $! - going static"); - return; - } - $wpipe = $in = undef; - $buf = ''; - my ($vin, $fh, $res); - $nr_running++; + my %rdr = ( 0 => fileno($in) ); + my $x = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, \%rdr); + my ($fh, $rpipe); + my $end = sub { + if (my $err = $x->finish) { + err($env, "git http-backend ($git_dir): $err"); + drop_client($env); + } + $fh->close if $fh; # async-only + }; # Danga::Socket users, we queue up the read_enable callback to # fire after pending writes are complete: - my $pi_http = $env->{'psgix.io'}; - my $read_enable = sub { $rpipe->watch_read(1) }; - my $read_disable = sub { - $rpipe->watch_read(0); - $pi_http->write($read_enable); + my $buf = ''; + my $rd_hdr = sub { + my $r = sysread($rpipe, $buf, 1024, length($buf)); + return if !defined($r) && ($!{EINTR} || $!{EAGAIN}); + return r(500, 'http-backend error') unless $r; + $r = parse_cgi_headers(\$buf) or return; # incomplete headers + $r->[0] == 403 ? serve_dumb($env, $git, $path) : $r; }; - - my $end = sub { - if ($fh) { - $fh->close; - $fh = undef; - } - if ($rpipe) { - # _may_ be Danga::Socket::close via - # PublicInbox::HTTPD::Async::close: - $rpipe->close; - $rpipe = undef; - $nr_running--; + my $res; + my $async = $env->{'pi-httpd.async'}; + my $io = $env->{'psgix.io'}; + my $cb = sub { + my $r = $rd_hdr->() or return; + $rd_hdr = undef; + if (scalar(@$r) == 3) { # error: + if ($async) { + $async->close; # calls rpipe->close + } else { + $rpipe->close; + $end->(); + } + return $res->($r); } - if (defined $pid) { - my $e = $pid == waitpid($pid, 0) ? - $? : "PID:$pid still running?"; - err($env, "git http-backend ($git_dir): $e") if $e; + if ($async) { + $fh = $res->($r); + return $async->async_pass($io, $fh, \$buf); } - return unless $res; - my $dumb = serve_dumb($cgi, $git, $path); - ref($dumb) eq 'ARRAY' ? $res->($dumb) : $dumb->($res); - }; - my $fail = sub { - if ($!{EAGAIN} || $!{EINTR}) { - select($vin, undef, undef, undef) if defined $vin; - # $vin is undef on async, so this is a noop on EAGAIN - return; - } - my $e = $!; - $end->(); - err($env, "git http-backend ($git_dir): $e\n"); + + # for synchronous PSGI servers + require PublicInbox::GetlineBody; + $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, $buf); + $res->($r); }; - my $cb = sub { # read git-http-backend output and stream to client - my $r = $rpipe ? $rpipe->sysread($buf, 8192, length($buf)) : 0; - return $fail->() unless defined $r; - return $end->() if $r == 0; # EOF - if ($fh) { # stream body from git-http-backend to HTTP client - $fh->write($buf); - $buf = ''; - $read_disable->() if $read_disable; - } elsif ($buf =~ s/\A(.*?)\r\n\r\n//s) { # parse headers - my $h = $1; - my $code = 200; - my @h; - foreach my $l (split(/\r\n/, $h)) { - my ($k, $v) = split(/:\s*/, $l, 2); - if ($k =~ /\AStatus\z/i) { - ($code) = ($v =~ /\b(\d+)\b/); - } else { - push @h, $k, $v; - } - } - if ($code == 403) { - # smart cloning disabled, serve dumbly - # in $end since we never undef $res in here - } else { # write response header: - $fh = $res->([ $code, \@h ]); - $res = undef; - $fh->write($buf); + sub { + ($res) = @_; + + # hopefully this doesn't break any middlewares, + # holding the input here is a waste of FDs and memory + $env->{'psgi.input'} = undef; + + $x->start(sub { # may run later, much later... + ($rpipe) = @_; + $in = undef; + if ($async) { + $async = $async->($rpipe, $cb, $end); + } else { # generic PSGI + $cb->() while $rd_hdr; } - $buf = ''; - } # else { keep reading ... } + }); }; - if (my $async = $env->{'pi-httpd.async'}) { - # $async is PublicInbox::HTTPD::Async->new($rpipe, $cb) - $rpipe = $async->($rpipe, $cb); - sub { ($res) = @_ } # let Danga::Socket handle the rest. - } else { # synchronous loop for other PSGI servers - $read_enable = $read_disable = undef; - $vin = ''; - vec($vin, fileno($rpipe), 1) = 1; - sub { - ($res) = @_; - while ($rpipe) { $cb->() } - } - } } sub input_to_file { @@ -350,4 +266,21 @@ sub input_to_file { return $in; } +sub parse_cgi_headers { + my ($bref) = @_; + $$bref =~ s/\A(.*?)\r\n\r\n//s or return; + my $h = $1; + my $code = 200; + my @h; + foreach my $l (split(/\r\n/, $h)) { + my ($k, $v) = split(/:\s*/, $l, 2); + if ($k =~ /\AStatus\z/i) { + ($code) = ($v =~ /\b(\d+)\b/); + } else { + push @h, $k, $v; + } + } + [ $code, \@h ] +} + 1; diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 1ef3fb31..6df1c3fc 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -4,18 +4,19 @@ # Generic PSGI server for convenience. It aims to provide # a consistent experience for public-inbox admins so they don't have # to learn different ways to admin both NNTP and HTTP components. -# There's nothing public-inbox-specific, here. +# There's nothing which depends on public-inbox, here. # Each instance of this class represents a HTTP client socket package PublicInbox::HTTP; use strict; use warnings; use base qw(Danga::Socket); -use fields qw(httpd env rbuf input_left remote_addr remote_port); +use fields qw(httpd env rbuf input_left remote_addr remote_port forward pull); use Fcntl qw(:seek); use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); +use Scalar::Util qw(weaken); use IO::File; use constant { CHUNK_START => -1, # [a-f0-9]+\r\n @@ -24,15 +25,27 @@ use constant { CHUNK_MAX_HDR => 256, }; -# FIXME: duplicated code with NNTP.pm +# FIXME: duplicated code with NNTP.pm, layering violation my $WEAKEN = {}; # string(inbox) -> inbox -my $WEAKTIMER; +my $weakt; sub weaken_task () { - $WEAKTIMER = undef; + $weakt = undef; $_->weaken_all for values %$WEAKEN; $WEAKEN = {}; } +my $pipelineq = []; +my $pipet; +sub process_pipelineq () { + my $q = $pipelineq; + $pipet = undef; + $pipelineq = []; + foreach (@$q) { + next if $_->{closed}; + rbuf_process($_); + } +} + # Use the same configuration parameter as git since this is primarily # a slow-client sponge for git-http-backend # TODO: support per-respository http.maxRequestBuffer somehow... @@ -180,12 +193,20 @@ sub response_header_write { my $conn = $env->{HTTP_CONNECTION} || ''; my $term = defined($len) || $chunked; - my $alive = $term && - (($proto eq 'HTTP/1.1' && $conn !~ /\bclose\b/i) || - ($conn =~ /\bkeep-alive\b/i)); - - $h .= 'Connection: ' . ($alive ? 'keep-alive' : 'close'); - $h .= "\r\nDate: " . http_date() . "\r\n\r\n"; + my $prot_persist = ($proto eq 'HTTP/1.1') && ($conn !~ /\bclose\b/i); + my $alive; + if (!$term && $prot_persist) { # auto-chunk + $chunked = $alive = 2; + $h .= "Transfer-Encoding: chunked\r\n"; + # no need for "Connection: keep-alive" with HTTP/1.1 + } elsif ($term && ($prot_persist || ($conn =~ /\bkeep-alive\b/i))) { + $alive = 1; + $h .= "Connection: keep-alive\r\n"; + } else { + $alive = 0; + $h .= "Connection: close\r\n"; + } + $h .= 'Date: ' . http_date() . "\r\n\r\n"; if (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') { more($self, $h); @@ -195,29 +216,86 @@ sub response_header_write { $alive; } +# middlewares such as Deflater may write empty strings +sub chunked_wcb ($) { + my ($self) = @_; + sub { + return if $_[0] eq ''; + more($self, sprintf("%x\r\n", bytes::length($_[0]))); + more($self, $_[0]); + $self->write("\r\n"); + } +} + +sub identity_wcb ($) { + my ($self) = @_; + sub { $self->write(\($_[0])) if $_[0] ne '' } +} + +sub next_request ($) { + my ($self) = @_; + $self->watch_write(0); + if ($self->{rbuf} eq '') { # wait for next request + $self->watch_read(1); + } else { # avoid recursion for pipelined requests + push @$pipelineq, $self; + $pipet ||= PublicInbox::EvCleanup::asap(*process_pipelineq); + } +} + +sub response_done ($$) { + my ($self, $alive) = @_; + my $env = $self->{env}; + $self->{env} = undef; + $self->write("0\r\n\r\n") if $alive == 2; + $self->write(sub { $alive ? next_request($self) : $self->close }); + + # FIXME: layering violation + if (my $obj = $env->{'pi-httpd.inbox'}) { + # grace period for reaping resources + $WEAKEN->{"$obj"} = $obj; + PublicInbox::EvCleanup::later(*weaken_task); + } +} + +sub getline_response { + my ($self, $body, $write, $close) = @_; + $self->{forward} = $body; + weaken($self); + my $pull = $self->{pull} = sub { + local $/ = \8192; + my $forward = $self->{forward}; + # limit our own running time for fairness with other + # clients and to avoid buffering too much: + my $n = 100; + while ($forward && defined(my $buf = $forward->getline)) { + $write->($buf); + last if $self->{closed}; + if ((--$n) <= 0 || $self->{write_buf_size}) { + $self->write($self->{pull}); + return; + } + } + $self->{forward} = $self->{pull} = undef; + $forward->close if $forward; # avoid recursion + $close->(); + }; + $pull->(); +} + sub response_write { my ($self, $env, $res) = @_; my $alive = response_header_write($self, $env, $res); - # middlewares such as Deflater may write empty strings - my $write = sub { $self->write($_[0]) if $_[0] ne '' }; - my $close = sub { - if ($alive) { - $self->event_write; # watch for readability if done + my $write = $alive == 2 ? chunked_wcb($self) : identity_wcb($self); + my $close = sub { response_done($self, $alive) }; + if (defined(my $body = $res->[2])) { + if (ref $body eq 'ARRAY') { + $write->($_) foreach @$body; + $close->(); } else { - $self->write(sub { $self->close }); + getline_response($self, $body, $write, $close); } - if (my $obj = $env->{'pi-httpd.inbox'}) { - # grace period for reaping resources - $WEAKEN->{"$obj"} = $obj; - $WEAKTIMER ||= Danga::Socket->AddTimer(60, *weaken_task); - } - $self->{env} = undef; - }; - - if (defined $res->[2]) { - Plack::Util::foreach($res->[2], $write); - $close->(); } else { # this is returned to the calling application: Plack::Util::inline_object(write => $write, close => $close); @@ -227,6 +305,7 @@ sub response_write { use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub more ($$) { my $self = $_[0]; + return if $self->{closed}; if (MSG_MORE && !$self->{write_buf_size}) { my $n = send($self->{sock}, $_[1], MSG_MORE); if (defined $n) { @@ -239,29 +318,6 @@ sub more ($$) { $self->write($_[1]); } -my $pipelineq = []; -my $next_tick; -sub process_pipelineq () { - $next_tick = undef; - my $q = $pipelineq; - $pipelineq = []; - rbuf_process($_) foreach @$q; -} - -# overrides existing Danga::Socket method -sub event_write { - my ($self) = @_; - # only continue watching for readability when we are done writing: - return if $self->write(undef) != 1; - - if ($self->{rbuf} eq '') { # wait for next request - $self->watch_read(1); - } else { # avoid recursion for pipelined requests - push @$pipelineq, $self; - $next_tick ||= Danga::Socket->AddTimer(0, *process_pipelineq); - } -} - sub input_prepare { my ($self, $env) = @_; my $input = $null_io; @@ -402,7 +458,11 @@ sub event_err { $_[0]->close } sub close { my $self = shift; - $self->{env} = undef; + my $forward = $self->{forward}; + my $env = $self->{env}; + delete $env->{'psgix.io'} if $env; # prevent circular referernces + $self->{pull} = $self->{forward} = $self->{env} = undef; + $forward->close if $forward; $self->SUPER::close(@_); } diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm index 78efaa50..433d6da7 100644 --- a/lib/PublicInbox/HTTPD.pm +++ b/lib/PublicInbox/HTTPD.pm @@ -8,10 +8,7 @@ use Plack::Util; require PublicInbox::HTTPD::Async; require PublicInbox::Daemon; -sub pi_httpd_async { - my ($io, $cb) = @_; - PublicInbox::HTTPD::Async->new($io, $cb); -} +sub pi_httpd_async { PublicInbox::HTTPD::Async->new(@_) } sub new { my ($class, $sock, $app) = @_; diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm index bedb397d..fadf2d3a 100644 --- a/lib/PublicInbox/HTTPD/Async.pm +++ b/lib/PublicInbox/HTTPD/Async.pm @@ -9,18 +9,56 @@ package PublicInbox::HTTPD::Async; use strict; use warnings; use base qw(Danga::Socket); -use fields qw(cb); +use fields qw(cb cleanup); +use Scalar::Util qw(weaken); +require PublicInbox::EvCleanup; sub new { - my ($class, $io, $cb) = @_; + my ($class, $io, $cb, $cleanup) = @_; my $self = fields::new($class); IO::Handle::blocking($io, 0); $self->SUPER::new($io); $self->{cb} = $cb; + $self->{cleanup} = $cleanup; $self->watch_read(1); $self; } +sub restart_read_cb ($) { + my ($self) = @_; + sub { $self->watch_read(1) } +} + +sub async_pass { + my ($self, $io, $fh, $bref) = @_; + # In case the client HTTP connection ($io) dies, it + # will automatically close this ($self) object. + $io->{forward} = $self; + $fh->write($$bref); + my $restart_read = restart_read_cb($self); + weaken($self); + $self->{cb} = sub { + my $r = sysread($self->{sock}, $$bref, 8192); + if ($r) { + $fh->write($$bref); + if ($io->{write_buf_size}) { + $self->watch_read(0); + $io->write($restart_read); # D::S::write + } + # stay in watch_read, but let other clients + # get some work done, too. + return; + } elsif (!defined $r) { + return if $!{EAGAIN} || $!{EINTR}; + } + + # Done! Error handling will happen in $fh->close + # called by the {cleanup} handler + $io->{forward} = undef; + $self->close; + } +} + sub event_read { $_[0]->{cb}->() } sub event_hup { $_[0]->{cb}->() } sub event_err { $_[0]->{cb}->() } @@ -28,8 +66,12 @@ sub sysread { shift->{sock}->sysread(@_) } sub close { my $self = shift; - $self->{cb} = undef; + my $cleanup = $self->{cleanup}; + $self->{cleanup} = $self->{cb} = undef; $self->SUPER::close(@_); + + # we defer this to the next timer loop since close is deferred + PublicInbox::EvCleanup::asap($cleanup) if $cleanup; } # do not let ourselves be closed during graceful termination diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 1db2a0b8..e3d65f4a 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -8,7 +8,6 @@ package PublicInbox::Import; use strict; use warnings; use Fcntl qw(:flock :DEFAULT); -use Email::Address; use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mid_mime mid2path); @@ -51,6 +50,7 @@ sub gfi_start { $self->{out} = $out_w; $self->{lockfh} = $lockfh; $self->{pid} = $pid; + $self->{nchg} = 0; ($in_r, $out_w); } @@ -131,6 +131,7 @@ sub remove { "data 3\nrm\n\n", 'from ', ($parent ? $parent : $tip), "\n" or wfail; print $w "D $path\n\n" or wfail; + $self->{nchg}++; (($self->{tip} = ":$commit"), $cur); } @@ -139,21 +140,21 @@ sub add { my ($self, $mime) = @_; # mime = Email::MIME my $from = $mime->header('From'); - my @from = Email::Address->parse($from); - my $name = $from[0]->name; - my $email = $from[0]->address; - my $date = $mime->header('Date'); - my $subject = $mime->header('Subject'); - $subject = '(no subject)' unless defined $subject; - my $mid = mid_mime($mime); - my $path = mid2path($mid); - + my ($email) = ($from =~ /([^<\s]+\@[^>\s]+)/g); + my $name = $from; + $name =~ s/\s*\S+\@\S+\s*\z//; # git gets confused with: # "'A U Thor <u@example.com>' via foo" <foo@example.com> # ref: # <CAD0k6qSUYANxbjjbE4jTW4EeVwOYgBD=bXkSu=akiYC_CB7Ffw@mail.gmail.com> $name =~ tr/<>// and $name = $email; + my $date = $mime->header('Date'); + my $subject = $mime->header('Subject'); + $subject = '(no subject)' unless defined $subject; + my $mid = mid_mime($mime); + my $path = mid2path($mid); + my ($r, $w) = $self->gfi_start; my $tip = $self->{tip}; if ($tip ne '') { @@ -191,6 +192,7 @@ sub add { print $w 'from ', ($parent ? $parent : $tip), "\n" or wfail; } print $w "M 100644 :$blob $path\n\n" or wfail; + $self->{nchg}++; $self->{tip} = ":$commit"; } @@ -202,14 +204,15 @@ sub done { my $pid = delete $self->{pid} or die 'BUG: missing {pid} when done'; waitpid($pid, 0) == $pid or die 'fast-import did not finish'; $? == 0 or die "fast-import failed: $?"; + my $nchg = delete $self->{nchg}; # for compatibility with existing ssoma installations # we can probably remove this entirely by 2020 my $git_dir = $self->{git}->{git_dir}; - my $index = "$git_dir/ssoma.index"; # XXX: change the following scope to: if (-e $index) # in 2018 or so.. my @cmd = ('git', "--git-dir=$git_dir"); - unless ($ENV{FAST}) { + if ($nchg && !$ENV{FAST}) { + my $index = "$git_dir/ssoma.index"; my $env = { GIT_INDEX_FILE => $index }; my @rt = (@cmd, qw(read-tree -m -v -i), $self->{ref}); $pid = spawn(\@rt, $env, undef); @@ -217,11 +220,13 @@ sub done { waitpid($pid, 0) == $pid or die 'read-tree did not finish'; $? == 0 or die "failed to update $git_dir/ssoma.index: $?\n"; } - - $pid = spawn([@cmd, 'update-server-info'], undef, undef); - defined $pid or die "spawn update-server-info failed: $!\n"; - waitpid($pid, 0) == $pid or die 'update-server-info did not finish'; - $? == 0 or die "failed to update-server-info: $?\n"; + if ($nchg) { + $pid = spawn([@cmd, 'update-server-info'], undef, undef); + defined $pid or die "spawn update-server-info failed: $!\n"; + waitpid($pid, 0) == $pid or + die 'update-server-info did not finish'; + $? == 0 or die "failed to update-server-info: $?\n"; + } my $lockfh = delete $self->{lockfh} or die "BUG: not locked: $!"; flock($lockfh, LOCK_UN) or die "unlock failed: $!"; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 4bcab96f..27218de7 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -64,16 +64,6 @@ sub cloneurl { $self->{cloneurl} = \@url; } -# TODO: can we remove this? -sub footer_html { - my ($self) = @_; - my $footer = $self->{footer}; - return $footer if defined $footer; - $footer = try_cat("$self->{mainrepo}/public-inbox/footer.html"); - chomp $footer; - $self->{footer} = $footer; -} - sub base_url { my ($self, $prq) = @_; # Plack::Request if (defined $prq) { @@ -93,4 +83,11 @@ sub base_url { } } +sub nntp_usable { + my ($self) = @_; + my $ret = $self->mm && $self->search; + $self->{mm} = $self->{search} = undef; + $ret; +} + 1; diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm index e1207b56..2e6e9ec5 100644 --- a/lib/PublicInbox/MDA.pm +++ b/lib/PublicInbox/MDA.pm @@ -6,7 +6,6 @@ package PublicInbox::MDA; use strict; use warnings; use Email::Simple; -use Email::Address; use Date::Parse qw(strptime); use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian @@ -62,13 +61,13 @@ sub alias_specified { my @address = ref($address) eq 'ARRAY' ? @$address : ($address); my %ok = map { - my @recip = Email::Address->parse($_); - lc(__drop_plus($recip[0]->address)) => 1; + lc(__drop_plus($_)) => 1; } @address; foreach my $line ($filter->cc, $filter->to) { - foreach my $addr (Email::Address->parse($line)) { - if ($ok{lc(__drop_plus($addr->address))}) { + my @addrs = ($line =~ /([^<\s]+\@[^>\s]+)/g); + foreach my $addr (@addrs) { + if ($ok{lc(__drop_plus($addr))}) { return 1; } } diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 4c4b74fb..40ca6114 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -6,18 +6,11 @@ package PublicInbox::Mbox; use strict; use warnings; -use PublicInbox::MID qw/mid2path mid_clean/; +use PublicInbox::MID qw/mid_clean/; use URI::Escape qw/uri_escape_utf8/; +use Plack::Util; require Email::Simple; -sub thread_mbox { - my ($ctx, $srch, $sfx) = @_; - sub { - my ($response) = @_; # Plack callback - emit_mbox($response, $ctx, $srch, $sfx); - } -} - sub emit1 { my $simple = Email::Simple->new(pop); my $ctx = pop; @@ -84,104 +77,35 @@ sub emit_msg { $fh->write($buf .= "\n"); } -sub emit_mbox { - my ($response, $ctx, $srch, $sfx) = @_; - my $type = 'mbox'; - if ($sfx) { - eval { require IO::Compress::Gzip }; - return need_gzip($response) if $@; - $type = 'gzip'; - } - - # http://www.iana.org/assignments/media-types/application/gzip - # http://www.iana.org/assignments/media-types/application/mbox - my $fh = $response->([200, ['Content-Type' => "application/$type"]]); - $fh = PublicInbox::MboxGz->new($fh) if $sfx; - - require PublicInbox::Git; - my $mid = $ctx->{mid}; - my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); - my %opts = (offset => 0, asc => 1); - my $nr; - do { - my $res = $srch->get_thread($mid, \%opts); - my $msgs = $res->{msgs}; - $nr = scalar @$msgs; - while (defined(my $smsg = shift @$msgs)) { - my $msg = eval { - my $p = 'HEAD:'.mid2path($smsg->mid); - Email::Simple->new($git->cat_file($p)); - }; - emit_msg($ctx, $fh, $msg) if $msg; - } +sub noop {} - $opts{offset} += $nr; - } while ($nr > 0); +sub thread_mbox { + my ($ctx, $srch, $sfx) = @_; + eval { require IO::Compress::Gzip }; + return sub { need_gzip(@_) } if $@; - $fh->close; + my $cb = sub { $srch->get_thread($ctx->{mid}, @_) }; + # http://www.iana.org/assignments/media-types/application/gzip + [200, ['Content-Type' => 'application/gzip'], + PublicInbox::MboxGz->new($ctx, $cb) ]; } sub emit_range { my ($ctx, $range) = @_; - sub { _emit_range($_[0], $ctx, $range) }; -} - -sub _emit_range { - my ($res, $ctx, $range) = @_; eval { require IO::Compress::Gzip }; - return need_gzip($res) if $@; + return sub { need_gzip(@_) } if $@; my $query; if ($range eq 'all') { # TODO: YYYY[-MM] $query = ''; } else { - $res->([404, [qw(Content-Type text/plain)], []]); - return; + return [404, [qw(Content-Type text/plain)], []]; } + my $cb = sub { $ctx->{srch}->query($query, @_) }; # http://www.iana.org/assignments/media-types/application/gzip - my $fh = $res->([200, [qw(Content-Type application/gzip)]]); - $fh = PublicInbox::MboxGz->new($fh); - my $env = $ctx->{cgi}->env; - my $srch = $ctx->{srch}; - my $git = $ctx->{git}; - my %opts = (offset => 0, asc => 1); - my $nr; - my $cb = sub { - my $res = $srch->query($query, \%opts); - my $msgs = $res->{msgs}; - $nr = scalar @$msgs; - while (defined(my $smsg = shift @$msgs)) { - my $msg = eval { - my $p = 'HEAD:'.mid2path($smsg->mid); - Email::Simple->new($git->cat_file($p)); - }; - emit_msg($ctx, $fh, $msg) if $msg; - } - - $opts{offset} += $nr; - }; - - $cb->(); # first part is free - return $fh->close if $nr == 0; - - if ($env->{'pi-httpd.async'}) { - my $io = $env->{'psgix.io'} or die "no IO"; - my $next; - $next = sub { - $cb->(); - if ($nr > 0) { - $io->write($next); - } else { - $next = undef; - $fh->close; - } - }; - $io->write($next); # Danga::Socket::write - return; - } - $cb->() while ($nr > 0); - $fh->close; + [200, [qw(Content-Type application/gzip)], + PublicInbox::MboxGz->new($ctx, $cb) ]; } sub need_gzip { @@ -198,40 +122,59 @@ EOF 1; -# fh may not be a proper IO, so we wrap the write and close methods -# to prevent IO::Compress::Gzip from complaining package PublicInbox::MboxGz; use strict; use warnings; +use PublicInbox::MID qw(mid2path); sub new { - my ($class, $fh) = @_; + my ($class, $ctx, $cb) = @_; my $buf; bless { buf => \$buf, gz => IO::Compress::Gzip->new(\$buf), - fh => $fh, + cb => $cb, + ctx => $ctx, + msgs => [], + opts => { asc => 1, offset => 0 }, }, $class; } sub _flush_buf { my ($self) = @_; - if (defined ${$self->{buf}}) { - $self->{fh}->write(${$self->{buf}}); - ${$self->{buf}} = undef; - } + my $ret = $self->{buf}; + $ret = $$ret; + ${$self->{buf}} = undef; + $ret; } -sub write { - $_[0]->{gz}->write($_[1]); - _flush_buf($_[0]); -} - -sub close { +# called by Plack::Util::foreach or similar +sub getline { my ($self) = @_; + my $res; + my $ctx = $self->{ctx}; + my $git = $ctx->{git}; + do { + while (defined(my $smsg = shift @{$self->{msgs}})) { + my $msg = eval { + my $p = 'HEAD:'.mid2path($smsg->mid); + Email::Simple->new($git->cat_file($p)); + }; + $msg or next; + + PublicInbox::Mbox::emit_msg($ctx, $self->{gz}, $msg); + my $ret = _flush_buf($self); + return $ret if $ret; + } + $res = $self->{cb}->($self->{opts}); + $self->{msgs} = $res->{msgs}; + $res = scalar @{$self->{msgs}}; + $self->{opts}->{offset} += $res; + } while ($res); $self->{gz}->close; _flush_buf($self); - $self->{fh}->close; } +sub close {} # noop + 1; diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index ac536f71..e4e3de4a 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -11,12 +11,11 @@ use PublicInbox::Search; use PublicInbox::Msgmap; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); +require PublicInbox::EvCleanup; use Email::Simple; use POSIX qw(strftime); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); use URI::Escape qw(uri_escape_utf8); -use Encode qw(find_encoding); -my $enc_utf8 = find_encoding('UTF-8'); use constant { r501 => '501 command syntax error', r221 => '221 Header follows', @@ -38,15 +37,15 @@ my $LIST_HEADERS = join("\r\n", @OVERVIEW, my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr); my $EXPMAP; # fd -> [ idle_time, $self ] -my $EXPTIMER; +my $expt; our $EXPTIME = 180; # 3 minutes my $WEAKEN = {}; # string(nntpd) -> nntpd -my $WEAKTIMER; +my $weakt; +my $nextt; -my $next_tick; my $nextq = []; sub next_tick () { - $next_tick = undef; + $nextt = undef; my $q = $nextq; $nextq = []; foreach my $nntp (@$q) { @@ -70,7 +69,7 @@ sub update_idle_time ($) { # reduce FD pressure by closing some "git cat-file --batch" processes # and unused FDs for msgmap and Xapian indices sub weaken_groups () { - $WEAKTIMER = undef; + $weakt = undef; foreach my $nntpd (values %$WEAKEN) { $_->weaken_all foreach (@{$nntpd->{grouplist}}); } @@ -81,7 +80,6 @@ sub expire_old () { my $now = now(); my $exp = $EXPTIME; my $old = $now - $exp; - my $next = $now + $exp; my $nr = 0; my %new; while (my ($fd, $v) = each %$EXPMAP) { @@ -89,26 +87,22 @@ sub expire_old () { if ($idle_time < $old) { $nntp->close; # idempotent } else { - my $nexp = $idle_time + $exp; - $next = $nexp if ($nexp < $next); ++$nr; $new{$fd} = $v; } } $EXPMAP = \%new; if ($nr) { - $next -= $now; - $next = 0 if $next < 0; - $EXPTIMER = Danga::Socket->AddTimer($next, *expire_old); + $expt = PublicInbox::EvCleanup::later(*expire_old); weaken_groups(); } else { - $EXPTIMER = undef; + $expt = undef; # noop to kick outselves out of the loop ASAP so descriptors # really get closed - Danga::Socket->AddTimer(0, sub {}); + PublicInbox::EvCleanup::asap(sub {}); # grace period for reaping resources - $WEAKTIMER ||= Danga::Socket->AddTimer(30, *weaken_groups); + $weakt ||= PublicInbox::EvCleanup::later(*weaken_groups); } } @@ -122,7 +116,7 @@ sub new ($$$) { $self->watch_read(1); update_idle_time($self); $WEAKEN->{"$nntpd"} = $nntpd; - $EXPTIMER ||= Danga::Socket->AddTimer($EXPTIME, *expire_old); + $expt ||= PublicInbox::EvCleanup::later(*expire_old); $self; } @@ -199,7 +193,7 @@ sub list_active_times ($;$) { foreach my $ng (@{$self->{nntpd}->{grouplist}}) { $ng->{newsgroup} =~ $wildmat or next; my $c = eval { $ng->mm->created_at } || time; - more($self, "$ng->{newsgroup} $c $ng->{address}"); + more($self, "$ng->{newsgroup} $c $ng->{-primary_address}"); } } @@ -417,7 +411,8 @@ sub cmd_last ($) { article_adj($_[0], -1) } sub cmd_post ($) { my ($self) = @_; my $ng = $self->{ng}; - $ng ? "440 mailto:$ng->{address} to post" : '440 posting not allowed' + $ng ? "440 mailto:$ng->{-primary_address} to post" + : '440 posting not allowed' } sub cmd_quit ($) { @@ -442,8 +437,8 @@ sub set_nntp_headers { # clobber some $hdr->header_set('Newsgroups', $ng->{newsgroup}); $hdr->header_set('Xref', xref($ng, $n)); - header_append($hdr, 'List-Post', "<mailto:$ng->{address}>"); - if (my $url = $ng->{url}) { + header_append($hdr, 'List-Post', "<mailto:$ng->{-primary_address}>"); + if (my $url = $ng->base_url) { $mid = uri_escape_utf8($mid); header_append($hdr, 'Archived-At', "<$url$mid/>"); header_append($hdr, 'List-Archive', "<$url>"); @@ -488,7 +483,7 @@ find_mid: found: my $o = 'HEAD:' . mid2path($mid); my $bytes; - my $s = eval { Email::Simple->new($ng->gcf->cat_file($o, \$bytes)) }; + my $s = eval { Email::Simple->new($ng->git->cat_file($o, \$bytes)) }; return $err unless $s; my $lines; if ($set_headers) { @@ -633,7 +628,7 @@ sub long_response ($$$$) { update_idle_time($self); push @$nextq, $self; - $next_tick ||= Danga::Socket->AddTimer(0, *next_tick); + $nextt ||= PublicInbox::EvCleanup::asap(*next_tick); } else { # all done! $self->{long_res} = undef; $self->watch_read(1); @@ -901,7 +896,7 @@ sub cmd_xpath ($$) { sub res ($$) { my ($self, $line) = @_; - $line = $enc_utf8->encode($line); + utf8::encode($line); do_write($self, $line . "\r\n"); } @@ -936,7 +931,7 @@ use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub do_more ($$) { my ($self, $data) = @_; - $data = $enc_utf8->encode($data); + utf8::encode($data); if (MSG_MORE && !$self->{write_buf_size}) { my $n = send($self->{sock}, $data, MSG_MORE); if (defined $n) { @@ -996,7 +991,7 @@ sub watch_read { # in case we really did dispatch a read event and started # another long response. push @$nextq, $self; - $next_tick ||= Danga::Socket->AddTimer(0, *next_tick); + $nextt ||= PublicInbox::EvCleanup::asap(*next_tick); } $rv; } diff --git a/lib/PublicInbox/NNTPD.pm b/lib/PublicInbox/NNTPD.pm index fc26c5c0..50d022be 100644 --- a/lib/PublicInbox/NNTPD.pm +++ b/lib/PublicInbox/NNTPD.pm @@ -6,7 +6,6 @@ package PublicInbox::NNTPD; use strict; use warnings; -require PublicInbox::NewsGroup; require PublicInbox::Config; sub new { @@ -26,28 +25,16 @@ sub refresh_groups () { my @list; foreach my $k (keys %$pi_config) { $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next; - my $g = $1; + my $name = $1; my $git_dir = $pi_config->{$k}; - my $addr = $pi_config->{"publicinbox.$g.address"}; - my $ngname = $pi_config->{"publicinbox.$g.newsgroup"}; - my $url = $pi_config->{"publicinbox.$g.url"}; - if (defined $ngname) { - next if ($ngname eq ''); # disabled - $g = $ngname; - } - my $ng = PublicInbox::NewsGroup->new($g, $git_dir, $addr, $url); - my $old_ng = $self->{groups}->{$g}; - - # Reuse the old one if possible since it can hold - # references to valid mm and gcf objects - if ($old_ng) { - $old_ng->update($ng); - $ng = $old_ng; - } + my $ngname = $pi_config->{"publicinbox.$name.newsgroup"}; + next unless defined $ngname; + next if ($ngname eq ''); # disabled + my $ng = $pi_config->lookup_newsgroup($ngname) or next; # Only valid if msgmap and search works - if ($ng->usable) { - $new->{$g} = $ng; + if ($ng->nntp_usable) { + $new->{$ngname} = $ng; push @list, $ng; } } diff --git a/lib/PublicInbox/NewsGroup.pm b/lib/PublicInbox/NewsGroup.pm deleted file mode 100644 index 500f61e3..00000000 --- a/lib/PublicInbox/NewsGroup.pm +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (C) 2015 all contributors <meta@public-inbox.org> -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) -# -# Used only by the NNTP server to represent a public-inbox git repository -# as a newsgroup -package PublicInbox::NewsGroup; -use strict; -use warnings; -use Scalar::Util qw(weaken); -require Danga::Socket; -require PublicInbox::Msgmap; -require PublicInbox::Search; -require PublicInbox::Git; - -sub new { - my ($class, $newsgroup, $git_dir, $address, $url) = @_; - - # first email address is preferred - $address = $address->[0] if ref($address); - if ($url) { - # assume protocol-relative URLs which start with '//' means - # the server supports both HTTP and HTTPS, favor HTTPS. - $url = "https:$url" if $url =~ m!\A//!; - $url .= '/' if $url !~ m!/\z!; - } - my $self = bless { - newsgroup => $newsgroup, - git_dir => $git_dir, - address => $address, - url => $url, - }, $class; - $self->{domain} = ($address =~ /\@(\S+)\z/) ? $1 : 'localhost'; - $self; -} - -sub weaken_all { - my ($self) = @_; - weaken($self->{$_}) foreach qw(gcf mm search); -} - -sub gcf { - my ($self) = @_; - $self->{gcf} ||= eval { PublicInbox::Git->new($self->{git_dir}) }; -} - -sub usable { - my ($self) = @_; - eval { - PublicInbox::Msgmap->new($self->{git_dir}); - PublicInbox::Search->new($self->{git_dir}); - }; -} - -sub mm { - my ($self) = @_; - $self->{mm} ||= eval { PublicInbox::Msgmap->new($self->{git_dir}) }; -} - -sub search { - my ($self) = @_; - $self->{search} ||= eval { PublicInbox::Search->new($self->{git_dir}) }; -} - -sub description { - my ($self) = @_; - open my $fh, '<', "$self->{git_dir}/description" or return ''; - my $desc = eval { local $/; <$fh> }; - chomp $desc; - $desc =~ s/\s+/ /smg; - $desc; -} - -sub update { - my ($self, $new) = @_; - $self->{address} = $new->{address}; - $self->{domain} = $new->{domain}; - if ($self->{git_dir} ne $new->{git_dir}) { - # new git_dir requires a new mm and gcf - $self->{mm} = $self->{gcf} = undef; - $self->{git_dir} = $new->{git_dir}; - } -} - -1; diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm index 19eb596c..908c4351 100644 --- a/lib/PublicInbox/NewsWWW.pm +++ b/lib/PublicInbox/NewsWWW.pm @@ -19,7 +19,6 @@ sub new { sub call { my ($self, $env) = @_; - my $ng_map = $self->newsgroup_map; my $path = $env->{PATH_INFO}; $path =~ s!\A/+!!; $path =~ s!/+\z!!; @@ -27,12 +26,11 @@ sub call { # some links may have the article number in them: # /inbox.foo.bar/123456 my ($ng, $article) = split(m!/+!, $path, 2); - if (my $info = $ng_map->{$ng}) { - my $url = PublicInbox::Hval::prurl($env, $info->{url}); + if (my $inbox = $self->{pi_config}->lookup_newsgroup($ng)) { + my $url = PublicInbox::Hval::prurl($env, $inbox->{url}); my $code = 301; - my $h = [ Location => $url, 'Content-Type' => 'text/plain' ]; if (defined $article && $article =~ /\A\d+\z/) { - my $mid = eval { ng_mid_for($ng, $info, $article) }; + my $mid = eval { $inbox->mm->mid_for($article) }; if (defined $mid) { # article IDs are not stable across clones, # do not encourage caching/bookmarking them @@ -41,40 +39,11 @@ sub call { } } + my $h = [ Location => $url, 'Content-Type' => 'text/plain' ]; + return [ $code, $h, [ "Redirecting to $url\n" ] ] } [ 404, [ 'Content-Type' => 'text/plain' ], [] ]; } -sub ng_mid_for { - my ($ng, $info, $article) = @_; - # may fail due to lack of Danga::Socket - # for defer_weaken: - require PublicInbox::NewsGroup; - $ng = $info->{ng} ||= - PublicInbox::NewsGroup->new($ng, $info->{git_dir}, ''); - $ng->mm->mid_for($article); -} - -sub newsgroup_map { - my ($self) = @_; - my $rv; - $rv = $self->{ng_map} and return $rv; - my $pi_config = $self->{pi_config}; - my %ng_map; - foreach my $k (keys %$pi_config) { - $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next; - my $inbox = $1; - my $git_dir = $pi_config->{"publicinbox.$inbox.mainrepo"}; - my $url = $pi_config->{"publicinbox.$inbox.url"}; - defined $url or next; - my $ng = $pi_config->{"publicinbox.$inbox.newsgroup"}; - next if (!defined $ng) || ($ng eq ''); # disabled - - $url =~ m!/\z! or $url .= '/'; - $ng_map{$ng} = { url => $url, git_dir => $git_dir }; - } - $self->{ng_map} = \%ng_map; -} - 1; diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm new file mode 100644 index 00000000..9e4c8e08 --- /dev/null +++ b/lib/PublicInbox/Qspawn.pm @@ -0,0 +1,52 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::Qspawn; +use strict; +use warnings; +use PublicInbox::Spawn qw(popen_rd); +our $LIMIT = 1; +my $running = 0; +my @run_queue; + +sub new ($$$;) { + my ($class, $cmd, $env, $opt) = @_; + bless { args => [ $cmd, $env, $opt ] }, $class; +} + +sub _do_spawn { + my ($self, $cb) = @_; + my $err; + ($self->{rpipe}, $self->{pid}) = popen_rd(@{$self->{args}}); + if ($self->{pid}) { + $running++; + } else { + $self->{err} = $!; + } + $cb->($self->{rpipe}); +} + +sub finish ($) { + my ($self) = @_; + if (delete $self->{rpipe}) { + my $pid = delete $self->{pid}; + $self->{err} = $pid == waitpid($pid, 0) ? $? : + "PID:$pid still running?"; + $running--; + } + if (my $next = shift @run_queue) { + _do_spawn(@$next); + } + $self->{err}; +} + +sub start ($$) { + my ($self, $cb) = @_; + + if ($running < $LIMIT) { + _do_spawn($self, $cb); + } else { + push @run_queue, [ $self, $cb ]; + } +} + +1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 9192bb07..4a4b2bdb 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -338,6 +338,7 @@ sub rlog { --raw -r --no-abbrev/, $range); my $latest; my $bytes; + local $/ = "\n"; while (defined(my $line = <$log>)) { if ($line =~ /$addmsg/o) { my $mime = do_cat_mail($git, $1, \$bytes) or next; @@ -445,6 +446,7 @@ sub _read_git_config_perm { my ($self) = @_; my @cmd = qw(config core.sharedRepository); my $fh = PublicInbox::Git->new($self->{git_dir})->popen(@cmd); + local $/ = "\n"; my $perm = <$fh>; chomp $perm if defined $perm; $perm; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 1244aeea..28c2037b 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -7,12 +7,10 @@ package PublicInbox::SearchMsg; use strict; use warnings; use Search::Xapian; -use Email::Address qw//; use POSIX qw//; use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean/; -use Encode qw/find_encoding/; -my $enc_utf8 = find_encoding('UTF-8'); +use PublicInbox::Address; our $PFX2TERM_RE = undef; use constant EPOCH_822 => 'Thu, 01 Jan 1970 00:00:00 +0000'; use POSIX qw(strftime); @@ -39,7 +37,7 @@ sub load_doc { my ($class, $doc) = @_; my $data = $doc->get_data; my $ts = get_val($doc, &PublicInbox::Search::TS); - $data = $enc_utf8->decode($data); + utf8::decode($data); my ($subj, $from, $refs, $to, $cc) = split(/\n/, $data); bless { doc => $doc, @@ -80,16 +78,14 @@ sub date ($) { return $date if defined $date; my $ts = $self->{ts}; return unless defined $ts; - $self->{date} = strftime('%a, %d %b %Y %T %z', gmtime($ts)); + $self->{date} = strftime('%a, %d %b %Y %T +0000', gmtime($ts)); } sub from ($) { my ($self) = @_; my $from = __hdr($self, 'from'); if (defined $from && !defined $self->{from_name}) { - $from =~ tr/\t\r\n/ /; - my @from = Email::Address->parse($from); - $self->{from_name} = $from[0]->name; + $self->{from_name} = PublicInbox::Address::from_name($from); } $from; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index c0cd1ffd..2ec7ddf8 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -16,7 +16,7 @@ our $LIM = 50; sub sres_top_html { my ($ctx) = @_; - my $q = PublicInbox::SearchQuery->new($ctx->{cgi}); + my $q = PublicInbox::SearchQuery->new($ctx->{qp}); my $code = 200; # double the limit for expanded views: @@ -176,7 +176,6 @@ sub tdump { $ctx->{searchview} = 1; tdump_ent($git, $state, $_, 0) for @rootset; PublicInbox::View::thread_adj_level($state, 0); - Email::Address->purge_cache; $fh->write(search_nav_bot($mset, $q). "\n\n" . foot($ctx). '</pre></body></html>'); @@ -261,12 +260,13 @@ use warnings; use PublicInbox::Hval; sub new { - my ($class, $cgi) = @_; - my $r = $cgi->param('r'); + my ($class, $qp) = @_; + + my $r = $qp->{r}; bless { - q => $cgi->param('q'), - x => $cgi->param('x') || '', - o => int($cgi->param('o') || 0) || 0, + q => $qp->{'q'}, + x => $qp->{x} || '', + o => (($qp->{o} || '0') =~ /(\d+)/), r => (defined $r && $r ne '0'), }, $class; } diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index c5a5c012..66dce335 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -144,6 +144,7 @@ unless (defined $vfork_spawn) { *public_inbox_fork_exec = *PublicInbox::SpawnPP::public_inbox_fork_exec } +# n.b. we never use absolute paths with this sub which ($) { my ($file) = @_; foreach my $p (split(':', $ENV{PATH})) { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 21949812..2057face 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -8,19 +8,20 @@ use strict; use warnings; use URI::Escape qw/uri_escape_utf8/; use Date::Parse qw/str2time/; -use Encode qw/find_encoding/; use Encode::MIME::Header; use PublicInbox::Hval qw/ascii_html/; use PublicInbox::Linkify; use PublicInbox::MID qw/mid_clean id_compress mid2path mid_mime/; use PublicInbox::MsgIter; +use PublicInbox::Address; require POSIX; use constant INDENT => ' '; use constant TCHILD => '` '; sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD }; -# public functions: +# public functions: (unstable) +# TODO: stream this, since threading is expensive but also oh-so-important sub msg_html { my ($ctx, $mime, $footer) = @_; $footer = defined($footer) ? "\n$footer" : ''; @@ -28,47 +29,34 @@ sub msg_html { headers_to_html_header($hdr, $ctx) . multipart_text_as_html($mime, '') . '</pre><hr /><pre>' . - html_footer($hdr, 1, $ctx, 'R/') . - $footer . - '</pre></body></html>'; + html_footer($hdr, 1, $ctx) . + '</pre>' . msg_reply($ctx, $hdr) . + '<hr /><pre>'. $footer . '</pre></body></html>'; } -# /$INBOX/$MESSAGE_ID/R/ +# /$INBOX/$MESSAGE_ID/#R sub msg_reply { - my ($ctx, $hdr, $footer) = @_; - my $s = $hdr->header('Subject'); - $s = '(no subject)' if (!defined $s) || ($s eq ''); - my $f = $hdr->header('From'); - $f = '' unless defined $f; - my $mid = $hdr->header_raw('Message-ID'); - $mid = PublicInbox::Hval->new_msgid($mid); - my $t = ascii_html($s); + my ($ctx, $hdr) = @_; my $se_url = 'https://kernel.org/pub/software/scm/git/docs/git-send-email.html'; my ($arg, $link) = mailto_arg_link($hdr); push @$arg, '/path/to/YOUR_REPLY'; - "<html><head><title>replying to \"$t\"</title></head><body><pre>" . - "replying to message:\n\n" . - "Subject: <b>$t</b>\n" . - "From: ". ascii_html($f) . - "\nDate: " . ascii_html($hdr->header('Date')) . - "\nMessage-ID: <" . $mid->as_html . ">\n\n" . - "There are multiple ways to reply:\n\n" . + "<hr /><pre\nid=R>". + "You may reply publically to <a\nhref=#t>this message</a> via\n". + "plain-text email using any one of the following methods:\n\n" . "* Save the following mbox file, import it into your mail client,\n" . - " and reply-to-all from there: <a\nhref=../raw>mbox</a>\n\n" . + " and reply-to-all from there: <a\nhref=raw>mbox</a>\n\n" . "* Reply to all the recipients using the <b>--to</b>, <b>--cc</b>,\n" . " and <b>--in-reply-to</b> switches of git-send-email(1):\n\n" . "\tgit send-email \\\n\t\t" . - join(" \\ \n\t\t", @$arg ). "\n\n" . + join(" \\\n\t\t", @$arg ). "\n\n" . qq( <a\nhref="$se_url">$se_url</a>\n\n) . "* If your mail client supports setting the <b>In-Reply-To</b>" . " header\n via mailto: links, try the " . qq(<a\nhref="$link">mailto: link</a>\n) . - "\nFor context, the original <a\nhref=../>message</a> or " . - qq(<a\nhref="../t/#u">thread</a>) . - '</pre><hr /><pre>' . $footer . '</pre></body></html>'; + '</pre>'; } sub in_reply_to { @@ -99,9 +87,7 @@ sub index_entry { $seen->{$id} = "#$id"; # save the anchor for children, later my $mid = PublicInbox::Hval->new_msgid($mid_raw); - my $from = $hdr->header('From'); - my @from = Email::Address->parse($from); - $from = $from[0]->name; + my $from = PublicInbox::Address::from_name($hdr->header('From')); my $root_anchor = $state->{root_anchor} || ''; my $path = $root_anchor ? '../../' : ''; @@ -125,7 +111,7 @@ sub index_entry { # scan through all parts, looking for displayable text msg_iter($mime, sub { index_walk($fh, $mhref, $_[0]) }); - $rv = "\n" . html_footer($hdr, 0, $ctx, "$path$href/R/"); + $rv = "\n" . html_footer($hdr, 0, $ctx, "$path$href/#R"); if (defined $irt) { unless (defined $parent_anchor) { @@ -191,7 +177,6 @@ sub emit_thread_html { ('</ul></li>' x ($max - 1)) . '</ul>'); } } - Email::Address->purge_cache; # there could be a race due to a message being deleted in git # but still being in the Xapian index: @@ -339,8 +324,8 @@ sub headers_to_html_header { $v = PublicInbox::Hval->new($v); if ($h eq 'From') { - my @from = Email::Address->parse($v->raw); - $title[1] = ascii_html($from[0]->name); + my $n = PublicInbox::Address::from_name($v->raw); + $title[1] = ascii_html($n); } elsif ($h eq 'Subject') { $title[0] = $v->as_html; if ($srch) { @@ -449,15 +434,13 @@ sub mailto_arg_link { foreach my $h (qw(From To Cc)) { my $v = $hdr->header($h); defined($v) && ($v ne '') or next; - my @addrs = Email::Address->parse($v); - foreach my $recip (@addrs) { - my $address = $recip->address; + my @addrs = PublicInbox::Address::emails($v); + foreach my $address (@addrs) { my $dst = lc($address); $cc{$dst} ||= $address; $to ||= $dst; } } - Email::Address->purge_cache; my @arg; my $subj = $hdr->header('Subject') || ''; @@ -507,8 +490,9 @@ sub html_footer { } else { $irt = ''; } - - $irt . qq(<a\nhref="$rhref">reply</a>) . $idx; + $rhref ||= '#R'; + $irt .= qq(<a\nhref="$rhref">reply</a>); + $irt .= $idx; } sub linkify_ref_nosrch { @@ -840,13 +824,12 @@ sub emit_topics { sub emit_index_topics { my ($state) = @_; - my $off = $state->{ctx}->{cgi}->param('o'); - $off = 0 unless defined $off; + my ($off) = (($state->{ctx}->{cgi}->param('o') || '0') =~ /(\d+)/); $state->{order} = []; $state->{subjs} = {}; $state->{latest} = {}; my $max = 25; - my %opts = ( offset => int $off, limit => $max * 4 ); + my %opts = ( offset => $off, limit => $max * 4 ); while (scalar @{$state->{order}} < $max) { my $sres = $state->{srch}->query('', \%opts); my $nr = scalar @{$sres->{msgs}} or last; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 5b4d6c18..5fa4e380 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -14,7 +14,7 @@ use 5.008; use strict; use warnings; use Plack::Request; -use PublicInbox::Config qw(try_cat); +use PublicInbox::Config; use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => '//ssoma.public-inbox.org/'; use constant PI_URL => '//public-inbox.org/'; @@ -22,7 +22,7 @@ require PublicInbox::Git; use PublicInbox::GitHTTPBackend; our $INBOX_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; -our $END_RE = qr!(T/|t/|R/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; +our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; sub new { @@ -40,15 +40,24 @@ sub run { sub call { my ($self, $env) = @_; my $cgi = Plack::Request->new($env); - my $ctx = {cgi => $cgi, pi_config => $self->{pi_config}, www => $self}; - my $path_info = $cgi->path_info; + my $ctx = { cgi => $cgi, env => $env, www => $self, + pi_config => $self->{pi_config} }; + + # we don't care about multi-value + my %qp = map { + my ($k, $v) = split('=', $_, 2); + ($k, $v) + } split(/[&;]/, uri_unescape($env->{QUERY_STRING})); + $ctx->{qp} = \%qp; + + my $path_info = $env->{PATH_INFO}; + my $method = $env->{REQUEST_METHOD}; - my $method = $cgi->method; if ($method eq 'POST' && $path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { my $path = $2; return (invalid_inbox($self, $ctx, $1) || - serve_git($cgi, $ctx->{git}, $path)); + serve_git($env, $ctx->{git}, $path)); } elsif ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); @@ -68,7 +77,7 @@ sub call { ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my $path = $2; invalid_inbox($self, $ctx, $1) || - serve_git($cgi, $ctx->{git}, $path); + serve_git($env, $ctx->{git}, $path); } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { serve_mbox_range($self, $ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { @@ -79,11 +88,15 @@ sub call { invalid_inbox_mid($self, $ctx, $1, $2) || get_attach($ctx, $idx, $fn); # in case people leave off the trailing slash: - } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t|R)\z!o) { + } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) { my ($inbox, $mid, $suffix) = ($1, $2, $3); $suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/'; r301($ctx, $inbox, $mid, $suffix); + } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) { + my ($inbox, $mid) = ($1, $2); + r301($ctx, $inbox, $mid, '#R'); + } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); @@ -107,7 +120,7 @@ sub preload { foreach (qw(PublicInbox::Search PublicInbox::SearchView PublicInbox::Mbox IO::Compress::Gzip - PublicInbox::NewsWWW PublicInbox::NewsGroup)) { + PublicInbox::NewsWWW)) { eval "require $_;"; } } @@ -180,7 +193,7 @@ sub get_index { require PublicInbox::Feed; my $srch = searcher($ctx); footer($ctx); - if (defined $ctx->{cgi}->param('q')) { + if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { require PublicInbox::SearchView; PublicInbox::SearchView::sres_top_html($ctx); } else { @@ -218,19 +231,6 @@ sub get_mid_html { [ PublicInbox::View::msg_html($ctx, $mime, $foot) ] ]; } -# /$INBOX/$MESSAGE_ID/R/ -> HTML content (fullquotes) -sub get_reply_html { - my ($ctx) = @_; - my $x = mid2blob($ctx) or return r404($ctx); - - require PublicInbox::View; - my $foot = footer($ctx); - require Email::MIME; - my $hdr = Email::MIME->new($x)->header_obj; - [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], - [ PublicInbox::View::msg_reply($ctx, $hdr, $foot)] ]; -} - # /$INBOX/$MESSAGE_ID/t/ sub get_thread { my ($ctx, $flat) = @_; @@ -252,8 +252,6 @@ sub footer { my ($ctx) = @_; return '' unless $ctx; my $obj = $ctx->{-inbox} or return ''; - my $footer = $obj->footer_html; - return $ctx->{footer} = $footer if $footer; # auto-generate a footer chomp(my $desc = $obj->description); @@ -264,7 +262,7 @@ sub footer { my $cgi = $ctx->{cgi}; my $http = $cgi->base->as_string . $obj->{name}; $seen{$http} or unshift @urls, $http; - my $ssoma_url = PublicInbox::Hval::prurl($cgi->{env}, SSOMA_URL); + my $ssoma_url = PublicInbox::Hval::prurl($ctx->{env}, SSOMA_URL); if (scalar(@urls) == 1) { $urls = "URL for <a\nhref=\"" . $ssoma_url . qq(">ssoma</a> or <b>git clone --mirror $urls[0]</b>); @@ -396,7 +394,7 @@ sub r301 { $obj = $ctx->{-inbox}; } my $url = $obj->base_url($cgi); - my $qs = $cgi->env->{QUERY_STRING}; + my $qs = $ctx->{env}->{QUERY_STRING}; $url .= (uri_escape_utf8($mid) . '/') if (defined $mid); $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; @@ -420,14 +418,12 @@ sub msg_page { # legacy, but no redirect for compatibility: 'f/' eq $e and return get_mid_html($ctx); - - 'R/' eq $e and return get_reply_html($ctx); r404($ctx); } sub serve_git { - my ($cgi, $git, $path) = @_; - PublicInbox::GitHTTPBackend::serve($cgi, $git, $path); + my ($env, $git, $path) = @_; + PublicInbox::GitHTTPBackend::serve($env, $git, $path); } sub serve_mbox_range { |