diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Daemon.pm | 3 | ||||
-rw-r--r-- | lib/PublicInbox/HTTP.pm | 1 | ||||
-rw-r--r-- | lib/PublicInbox/Import.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 47 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 1 | ||||
-rw-r--r-- | lib/PublicInbox/WWW.pm | 12 |
7 files changed, 51 insertions, 26 deletions
diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index 68ba9876..227ba5f9 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -13,6 +13,7 @@ use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); STDOUT->autoflush(1); STDERR->autoflush(1); require PublicInbox::DS; +require PublicInbox::EvCleanup; require POSIX; require PublicInbox::Listener; require PublicInbox::ParentPipe; @@ -463,6 +464,7 @@ sub master_loop { sub daemon_loop ($$) { my ($refresh, $post_accept) = @_; + PublicInbox::EvCleanup::enable(); # early for $refresh my $parent_pipe; if ($worker_processes > 0) { $refresh->(); # preload by default @@ -485,7 +487,6 @@ sub daemon_loop ($$) { @listeners = map { PublicInbox::Listener->new($_, $post_accept) } @listeners; - PublicInbox::EvCleanup::enable(); PublicInbox::DS->EventLoop; $parent_pipe = undef; } diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 11bd241e..10e6d6a4 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -18,6 +18,7 @@ use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); use IO::Handle; +require PublicInbox::EvCleanup; use constant { CHUNK_START => -1, # [a-f0-9]+\r\n CHUNK_END => -2, # \r\n diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 12abf399..81a38fb6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -367,10 +367,14 @@ sub add { my @ct = msg_timestamp($hdr); my $author_time_raw = git_timestamp(@at); my $commit_time_raw = git_timestamp(@ct); + my $subject = $mime->header('Subject'); $subject = '(no subject)' unless defined $subject; - my $path_type = $self->{path_type}; + # Mime decoding can create nulls replace them with spaces to protect git + $subject =~ tr/\0/ /; + utf8::encode($subject); + my $path_type = $self->{path_type}; my $path; if ($path_type eq '2/38') { $path = mid2path(v1_mid0($mime)); @@ -411,9 +415,6 @@ sub add { print $w "reset $ref\n" or wfail; } - # Mime decoding can create nulls replace them with spaces to protect git - $subject =~ tr/\0/ /; - utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", "author $name <$email> $author_time_raw\n", "committer $self->{ident} $commit_time_raw\n" or wfail; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 813ed997..0d86771f 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -7,26 +7,30 @@ use strict; use warnings; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); -use Devel::Peek qw(SvREFCNT); use PublicInbox::MIME; -use POSIX qw(strftime); +# Long-running "git-cat-file --batch" processes won't notice +# unlinked packs, so we need to restart those processes occasionally. +# Xapian and SQLite file handles are mostly stable, but sometimes an +# admin will attempt to replace them atomically after compact/vacuum +# and we need to be prepared for that. my $cleanup_timer; -eval { - $cleanup_timer = 'disabled'; - require PublicInbox::EvCleanup; - $cleanup_timer = undef; # OK if we get here -}; -my $cleanup_broken = $@; - +my $cleanup_avail = -1; # 0, or 1 +my $have_devel_peek; my $CLEANUP = {}; # string(inbox) -> inbox sub cleanup_task () { $cleanup_timer = undef; my $next = {}; for my $ibx (values %$CLEANUP) { my $again; - foreach my $f (qw(mm search over)) { - delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1; + if ($have_devel_peek) { + foreach my $f (qw(mm search over)) { + # we bump refcnt by assigning tmp, here: + my $tmp = $ibx->{$f} or next; + next if Devel::Peek::SvREFCNT($tmp) > 2; + delete $ibx->{$f}; + # refcnt is zero when tmp is out-of-scope + } } my $expire = time - 60; if (my $git = $ibx->{git}) { @@ -37,16 +41,31 @@ sub cleanup_task () { $again = 1 if $git->cleanup($expire); } } - $again ||= !!($ibx->{over} || $ibx->{mm} || $ibx->{search}); + if ($have_devel_peek) { + $again ||= !!($ibx->{over} || $ibx->{mm} || + $ibx->{search}); + } $next->{"$ibx"} = $ibx if $again; } $CLEANUP = $next; } +sub cleanup_possible () { + # no need to require EvCleanup, here, if it were enabled another + # module would've require'd it, already + eval { PublicInbox::EvCleanup::enabled() } or return 0; + + eval { + require Devel::Peek; # needs separate package in Fedora + $have_devel_peek = 1; + }; + 1; +} + sub _cleanup_later ($) { my ($self) = @_; - return if $cleanup_broken; - return unless PublicInbox::EvCleanup::enabled(); + $cleanup_avail = cleanup_possible() if $cleanup_avail < 0; + return if $cleanup_avail != 1; $cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task); $CLEANUP->{"$self"} = $self; } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1e62f4c..eae10d8e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -24,6 +24,10 @@ sub load_xapian () { # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector + # FLAG_PHRASE also seems to cause performance problems + # sometimes. + # TODO: make this an option, maybe? + # or make indexlevel=medium as default FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD(); }; }; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f96f0d03..114420e4 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -19,7 +19,6 @@ use POSIX qw(strftime); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); use PublicInbox::Git qw(git_unquote); -use Compress::Zlib qw(compress); use constant { BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ? diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 8e1b1afe..b6f18f8d 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -59,14 +59,14 @@ sub call { my $ctx = { env => $env, www => $self }; # we don't care about multi-value - my %qp = map { + %{$ctx->{qp}} = map { utf8::decode($_); - my ($k, $v) = split('=', uri_unescape($_), 2); - $v = '' unless defined $v; - $v =~ tr/+/ /; - ($k, $v) + tr/+/ /; + my ($k, $v) = split('=', $_, 2); + $v = uri_unescape($v // ''); + # none of the keys we care about will need escaping + $k => $v; } split(/[&;]+/, $env->{QUERY_STRING}); - $ctx->{qp} = \%qp; # avoiding $env->{PATH_INFO} here since that's already decoded my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env)); |