diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Admin.pm | 13 | ||||
-rw-r--r-- | lib/PublicInbox/Config.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/Import.pm | 31 | ||||
-rw-r--r-- | lib/PublicInbox/Inbox.pm | 27 | ||||
-rw-r--r-- | lib/PublicInbox/InboxWritable.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/Mbox.pm | 7 | ||||
-rw-r--r-- | lib/PublicInbox/NNTP.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/Qspawn.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 28 | ||||
-rw-r--r-- | lib/PublicInbox/SearchThread.pm | 67 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/TestCommon.pm | 26 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 20 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 55 | ||||
-rw-r--r-- | lib/PublicInbox/WatchMaildir.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/WwwAtomStream.pm | 5 | ||||
-rw-r--r-- | lib/PublicInbox/WwwText.pm | 4 |
17 files changed, 188 insertions, 127 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 336b7d4c..62ddbe82 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -71,7 +71,7 @@ sub resolve_inboxes ($;$$) { my ($argv, $opt, $cfg) = @_; $opt ||= {}; - $cfg //= eval { PublicInbox::Config->new }; + $cfg //= PublicInbox::Config->new; if ($opt->{all}) { my $cfgfile = PublicInbox::Config::default_file(); $cfg or die "--all specified, but $cfgfile not readable\n"; @@ -234,4 +234,15 @@ sub progress_prepare ($) { } } +# same unit factors as git: +sub parse_unsigned ($) { + my ($max_size) = @_; + + $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return; + my ($n, $unit_factor) = ($1, $2 // ''); + my %u = ( k => 1024, m => 1024**2, g => 1024**3 ); + $$max_size = $n * ($u{lc($unit_factor)} // 1); + 1; +} + 1; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 917939ca..458f29b2 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -367,7 +367,7 @@ sub _fill { my $ibx = {}; foreach my $k (qw(inboxdir filter newsgroup - watch watchheader httpbackendmax + watch httpbackendmax replyto feedmax nntpserver indexlevel)) { my $v = $self->{"$pfx.$k"}; $ibx->{$k} = $v if defined $v; @@ -388,7 +388,7 @@ sub _fill { # TODO: more arrays, we should support multi-value for # more things to encourage decentralization foreach my $k (qw(address altid nntpmirror coderepo hide listid url - infourl)) { + infourl watchheader)) { if (defined(my $v = $self->{"$pfx.$k"})) { $ibx->{$k} = _array($v); } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index c72c1e92..95d654f6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -440,14 +440,31 @@ sub run_die ($;$$) { $? == 0 or die join(' ', @$cmd) . " failed: $?\n"; } +my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n", + 'description' => <<EOD, +Unnamed repository; edit this file 'description' to name the repository. +EOD + 'config' => <<EOC); +[core] + repositoryFormatVersion = 0 + filemode = true + bare = true +[repack] + writeBitmaps = true +EOC + sub init_bare { - my ($dir) = @_; - my @cmd = (qw(git init --bare -q), $dir); - run_die(\@cmd); - # set a reasonable default: - @cmd = (qw/git config/, "--file=$dir/config", - 'repack.writeBitmaps', 'true'); - run_die(\@cmd); + my ($dir) = @_; # or self + $dir = $dir->{git}->{git_dir} if ref($dir); + require File::Path; + File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]); + for (my $i = 0; $i < @INIT_FILES; $i++) { + my $f = $dir.'/'.$INIT_FILES[$i++]; + next if -f $f; + open my $fh, '>', $f or die "open $f: $!"; + print $fh $INIT_FILES[$i] or die "print $f: $!"; + close $fh or die "close $f: $!"; + } } sub done { diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 95ffd039..186eb420 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -82,7 +82,7 @@ sub _set_uint ($$$) { sub _set_limiter ($$$) { my ($self, $pi_config, $pfx) = @_; my $lkey = "-${pfx}_limiter"; - $self->{$lkey} ||= eval { + $self->{$lkey} ||= do { # full key is: publicinbox.$NAME.httpbackendmax my $mkey = $pfx.'max'; my $val = $self->{$mkey} or return; @@ -130,7 +130,7 @@ sub version { $_[0]->{version} // 1 } sub git_epoch { my ($self, $epoch) = @_; $self->version == 2 or return; - $self->{"$epoch.git"} ||= eval { + $self->{"$epoch.git"} ||= do { my $git_dir = "$self->{inboxdir}/git/$epoch.git"; my $g = PublicInbox::Git->new($git_dir); $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter}; @@ -141,7 +141,7 @@ sub git_epoch { sub git { my ($self) = @_; - $self->{git} ||= eval { + $self->{git} ||= do { my $git_dir = $self->{inboxdir}; $git_dir .= '/all.git' if $self->version == 2; my $g = PublicInbox::Git->new($git_dir); @@ -219,19 +219,22 @@ sub try_cat { sub description { my ($self) = @_; - $self->{description} //= do { + ($self->{description} //= do { my $desc = try_cat("$self->{inboxdir}/description"); local $/ = "\n"; chomp $desc; $desc =~ s/\s+/ /smg; - $desc eq '' ? '($INBOX_DIR/description missing)' : $desc; - }; + $desc eq '' ? undef : $desc; + }) // '($INBOX_DIR/description missing)'; } sub cloneurl { my ($self) = @_; - $self->{cloneurl} //= - [ split(/\s+/s, try_cat("$self->{inboxdir}/cloneurl")) ]; + ($self->{cloneurl} //= do { + my $s = try_cat("$self->{inboxdir}/cloneurl"); + my @urls = split(/\s+/s, $s); + scalar(@urls) ? \@urls : undef + }) // []; } sub base_url { @@ -308,9 +311,7 @@ sub nntp_usable { # for v1 users w/o SQLite only sub msg_by_path ($$;$) { my ($self, $path, $ref) = @_; - my $str = git($self)->cat_file('HEAD:'.$path, $ref); - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; - $str; + git($self)->cat_file('HEAD:'.$path, $ref); } sub msg_by_smsg ($$;$) { @@ -321,9 +322,7 @@ sub msg_by_smsg ($$;$) { return unless defined $smsg; defined(my $blob = $smsg->{blob}) or return; - my $str = git($self)->cat_file($blob, $ref); - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s if $str; - $str; + git($self)->cat_file($blob, $ref); } sub smsg_mime { diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index f2ba21fc..31aa76c6 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -111,7 +111,7 @@ sub is_maildir_path ($) { (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; } -sub maildir_path_load ($) { +sub mime_from_path ($) { my ($path) = @_; if (open my $fh, '<', $path) { local $/; @@ -138,7 +138,7 @@ sub import_maildir { opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; while (defined(my $fn = readdir($dh))) { next unless is_maildir_basename($fn); - my $mime = maildir_path_load("$dir/$fn") or next; + my $mime = mime_from_path("$dir/$fn") or next; if (my $filter = $self->filter($im)) { my $ret = $filter->scrub($mime) or return; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index d5beceaf..9995140c 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -106,8 +106,11 @@ sub msg_hdr ($$;$) { 'List-Post', "<mailto:$ibx->{-primary_address}>", ); my $crlf = $header_obj->crlf; - my $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970\n" . - $header_obj->as_string; + my $buf = $header_obj->as_string; + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + $buf = "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf; + for (my $i = 0; $i < @append; $i += 2) { my $k = $append[$i]; my $v = $append[$i + 1]; diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index d1f75f6f..c79f198b 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -506,6 +506,8 @@ sub set_art { sub msg_hdr_write ($$$) { my ($self, $hdr, $body_follows) = @_; $hdr = $hdr->as_string; + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; utf8::encode($hdr); $hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm index aebcb1f7..c09e8d2c 100644 --- a/lib/PublicInbox/Qspawn.pm +++ b/lib/PublicInbox/Qspawn.pm @@ -281,10 +281,6 @@ sub psgi_return_init_cb { ${$self->{hdr_buf}}, $filter); $wcb->($r); } - - # Workaround a leak under Perl 5.16.3 when combined with - # Plack::Middleware::Deflater: - $wcb = undef; } sub psgi_return_start { # may run later, much later... diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 05689941..25118f43 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -64,6 +64,7 @@ sub new { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); + $self->{index_max_size} = $ibx->{index_max_size}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number @@ -551,13 +552,9 @@ sub unindex_both { sub do_cat_mail { my ($git, $blob, $sizeref) = @_; - my $mime = eval { - my $str = $git->cat_file($blob, $sizeref); - # fixup bugs from import: - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - PublicInbox::MIME->new($str); - }; - $@ ? undef : $mime; + my $str = $git->cat_file($blob, $sizeref) or + die "BUG: $blob not found in $git->{git_dir}"; + PublicInbox::MIME->new($str); } # called by public-inbox-index @@ -576,6 +573,16 @@ sub batch_adjust ($$$$$) { } } +sub too_big ($$$) { + my ($self, $git, $oid) = @_; + my $max_size = $self->{index_max_size} or return; + my (undef, undef, $size) = $git->check($oid); + die "E: bad $oid in $git->{git_dir}\n" if !defined($size); + return if $size <= $max_size; + warn "W: skipping $oid ($size > $max_size)\n"; + 1; +} + # only for v1 sub read_log { my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_; @@ -602,7 +609,8 @@ sub read_log { } next; } - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + next if too_big($self, $git, $blob); + my $mime = do_cat_mail($git, $blob, \$bytes); my $smsg = bless {}, 'PublicInbox::Smsg'; batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); $smsg->{blob} = $blob; @@ -610,7 +618,7 @@ sub read_log { $add_cb->($self, $mime, $smsg); } elsif ($line =~ /$delmsg/o) { my $blob = $1; - $D{$blob} = 1; + $D{$blob} = 1 unless too_big($self, $git, $blob); } elsif ($line =~ /^commit ($h40)/o) { $latest = $1; $newest ||= $latest; @@ -623,7 +631,7 @@ sub read_log { close($log) or die "git log failed: \$?=$?"; # get the leftovers foreach my $blob (keys %D) { - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $mime = do_cat_mail($git, $blob, \$bytes); $del_cb->($self, $mime); } $batch_cb->($nr, $latest, $newest); diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 38d1aa6e..60f692b2 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -24,7 +24,16 @@ use PublicInbox::MID qw($MID_EXTRACT); sub thread { my ($msgs, $ordersub, $ctx) = @_; - my $id_table = {}; + + # A. put all current $msgs (non-ghosts) into %id_table + my %id_table = map {; + # this delete saves around 4K across 1K messages + # TODO: move this to a more appropriate place, breaks tests + # if we do it during psgi_cull + delete $_->{num}; + + $_->{mid} => PublicInbox::SearchThread::Msg::cast($_); + } @$msgs; # Sadly, we sort here anyways since the fill-in-the-blanks References: # can be shakier if somebody used In-Reply-To with multiple, disparate @@ -32,36 +41,21 @@ sub thread { # always determine ordering when somebody uses multiple In-Reply-To. # We'll trust the client Date: header here instead of the Received: # time since this is for display (and not retrieval) - _add_message($id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs; + _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs; my $ibx = $ctx->{-inbox}; my $rootset = [ grep { !delete($_->{parent}) && $_->visible($ibx) - } values %$id_table ]; - $id_table = undef; + } values %id_table ]; $rootset = $ordersub->($rootset); $_->order_children($ordersub, $ctx) for @$rootset; $rootset; } -sub _get_cont_for_id ($$) { - my ($id_table, $mid) = @_; - $id_table->{$mid} ||= PublicInbox::SearchThread::Msg->new($mid); -} - -sub _add_message ($$) { - my ($id_table, $smsg) = @_; - - # A. if id_table... - my $this = _get_cont_for_id($id_table, $smsg->{mid}); - $this->{smsg} = $smsg; - - # saves around 4K across 1K messages - # TODO: move this to a more appropriate place, breaks tests - # if we do it during psgi_cull - delete $smsg->{num}; +sub _set_parent ($$) { + my ($id_table, $this) = @_; # B. For each element in the message's References field: - defined(my $refs = $smsg->{references}) or return; + defined(my $refs = $this->{references}) or return; # This loop exists to help fill in gaps left from missing # messages. It is not needed in a perfect world where @@ -70,7 +64,8 @@ sub _add_message ($$) { my $prev; foreach my $ref ($refs =~ m/$MID_EXTRACT/go) { # Find a Container object for the given Message-ID - my $cont = _get_cont_for_id($id_table, $ref); + my $cont = $id_table->{$ref} //= + PublicInbox::SearchThread::Msg::ghost($ref); # Link the References field's Containers together in # the order implied by the References header @@ -96,22 +91,31 @@ sub _add_message ($$) { } package PublicInbox::SearchThread::Msg; +use base qw(PublicInbox::Smsg); use strict; use warnings; use Carp qw(croak); -sub new { +# declare a ghost smsg (determined by absence of {blob}) +sub ghost { bless { - id => $_[1], + mid => $_[0], children => {}, # becomes an array when sorted by ->order(...) - }, $_[0]; + }, __PACKAGE__; +} + +# give a existing smsg the methods of this class +sub cast { + my ($smsg) = @_; + $smsg->{children} = {}; + bless $smsg, __PACKAGE__; } sub topmost { my ($self) = @_; my @q = ($self); while (my $cont = shift @q) { - return $cont if $cont->{smsg}; + return $cont if $cont->{blob}; push @q, values %{$cont->{children}}; } undef; @@ -122,7 +126,7 @@ sub add_child { croak "Cowardly refusing to become my own parent: $self" if $self == $child; - my $cid = $child->{id}; + my $cid = $child->{mid}; # reparenting: if (defined(my $parent = $child->{parent})) { @@ -148,8 +152,13 @@ sub has_descendent { # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { my ($self, $ibx) = @_; - ($self->{smsg} ||= eval { $ibx->smsg_by_mid($self->{id}) }) || - (scalar values %{$self->{children}}); + return 1 if $self->{blob}; + if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) { + %$self = (%$self, %$by_mid); + 1; + } else { + (scalar values %{$self->{children}}); + } } sub order_children { diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 4fbf59ef..4336e4d9 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -243,8 +243,8 @@ sub search_nav_bot { sub sort_relevance { [ sort { - (eval { $b->topmost->{smsg}->{pct} } // 0) <=> - (eval { $a->topmost->{smsg}->{pct} } // 0) + (eval { $b->topmost->{pct} } // 0) <=> + (eval { $a->topmost->{pct} } // 0) } @{$_[0]} ] } diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index e9efbac7..b50871e8 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -9,7 +9,7 @@ use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek); use POSIX qw(dup2); use IO::Socket::INET; our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods - run_script start_script key2sub); + run_script start_script key2sub xsys xqx); sub tmpdir (;$) { my ($base) = @_; @@ -87,7 +87,7 @@ sub require_mods { sub key2script ($) { my ($key) = @_; - return $key if (index($key, '/') >= 0); + return $key if ($key eq 'git' || index($key, '/') >= 0); # n.b. we may have scripts which don't start with "public-inbox" in # the future: $key =~ s/\A([-\.])/public-inbox$1/; @@ -244,6 +244,28 @@ sub run_script ($;$$) { sub wait_for_tail () { sleep(2) } +# like system() built-in, but uses spawn() for env/rdr + vfork +sub xsys { + my ($cmd, $env, $rdr) = @_; + if (ref($cmd)) { + $rdr ||= {}; + } else { + $cmd = [ @_ ]; + $env = undef; + $rdr = {}; + } + run_script($cmd, $env, { %$rdr, run_mode => 0 }); + $? >> 8 +} + +# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork +sub xqx { + my ($cmd, $env, $rdr) = @_; + $rdr //= {}; + run_script($cmd, $env, { %$rdr, run_mode => 0, 1 => \(my $out) }); + wantarray ? split(/^/m, $out) : $out; +} + sub start_script { my ($cmd, $env, $opt) = @_; my ($key, @argv) = @$cmd; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 1c78ef24..01b8bed6 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -120,6 +120,7 @@ sub new { last_commit => [], # git repo -> commit }; $self->{shards} = count_shards($self) || nproc_shards($creat); + $self->{index_max_size} = $v2ibx->{index_max_size}; bless $self, $class; } @@ -730,9 +731,8 @@ sub fill_alternates ($$) { sub git_init { my ($self, $epoch) = @_; my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git"; - my @cmd = (qw(git init --bare -q), $git_dir); - PublicInbox::Import::run_die(\@cmd); - @cmd = (qw/git config/, "--file=$git_dir/config", + PublicInbox::Import::init_bare($git_dir); + my @cmd = (qw/git config/, "--file=$git_dir/config", 'include.path', '../../all.git/config'); PublicInbox::Import::run_die(\@cmd); fill_alternates($self, $epoch); @@ -868,6 +868,7 @@ sub atfork_child { sub mark_deleted ($$$$) { my ($self, $sync, $git, $oid) = @_; + return if PublicInbox::SearchIdx::too_big($self, $git, $oid); my $msgref = $git->cat_file($oid); my $mime = PublicInbox::MIME->new($$msgref); my $mids = mids($mime->header_obj); @@ -980,18 +981,6 @@ sub check_unindexed ($$$) { } } -# reuse Msgmap to store num => oid mapping (rather than num => mid) -sub multi_mid_q_new () { - my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1); - my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1); - $multi_mid->{dbh}->do('PRAGMA synchronous = OFF'); - # for Msgmap->DESTROY: - $multi_mid->{tmp_name} = $fn; - $multi_mid->{pid} = $$; - close $fh or die "failed to close $fn: $!"; - $multi_mid -} - sub multi_mid_q_push ($$$) { my ($self, $sync, $oid) = @_; my $multi_mid = $sync->{multi_mid} //= PublicInbox::MultiMidQueue->new; @@ -1006,6 +995,7 @@ sub multi_mid_q_push ($$$) { sub reindex_oid ($$$$) { my ($self, $sync, $git, $oid) = @_; + return if PublicInbox::SearchIdx::too_big($self, $git, $oid); my ($num, $mid0, $len); my $msgref = $git->cat_file($oid, \$len); return if $len == 0; # purged diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index b6d7acaf..9b62ed3c 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -297,11 +297,9 @@ sub _th_index_lite { my $nr_c = scalar @$children; my $nr_s = 0; my $siblings; - if (my $smsg = $node->{smsg}) { - # delete saves about 200KB on a 1K message thread - if (my $refs = delete $smsg->{references}) { - ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); - } + # delete saves about 200KB on a 1K message thread + if (my $refs = delete $node->{references}) { + ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); } my $irt_map = $mapping->{$$irt} if defined $$irt; if (defined $irt_map) { @@ -310,12 +308,12 @@ sub _th_index_lite { $rv .= $pad . $irt_map->[0]; if ($idx > 0) { my $prev = $siblings->[$idx - 1]; - my $pmid = $prev->{id}; + my $pmid = $prev->{mid}; if ($idx > 2) { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { - my $ppmid = $siblings->[0]->{id}; + my $ppmid = $siblings->[0]->{mid}; $rv .= $pad . $mapping->{$ppmid}->[0]; } $rv .= $pad . $mapping->{$pmid}->[0]; @@ -328,26 +326,26 @@ sub _th_index_lite { $attr =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self $rv .= "<b>@ $attr"; if ($nr_c) { - my $cmid = $children->[0]->{id}; + my $cmid = $children->[0]->{mid}; $rv .= $pad . $mapping->{$cmid}->[0]; if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); } elsif (my $cn = $children->[1]) { - $rv .= $pad . $mapping->{$cn->{id}}->[0]; + $rv .= $pad . $mapping->{$cn->{mid}}->[0]; } } my $next = $siblings->[$idx+1] if $siblings && $idx >= 0; if ($next) { - my $nmid = $next->{id}; + my $nmid = $next->{mid}; $rv .= $pad . $mapping->{$nmid}->[0]; my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); } elsif (my $nn = $siblings->[$idx + 2]) { - $rv .= $pad . $mapping->{$nn->{id}}->[0]; + $rv .= $pad . $mapping->{$nn->{mid}}->[0]; } } $rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n"; @@ -369,7 +367,7 @@ sub walk_thread ($$$) { sub pre_thread { # walk_thread callback my ($ctx, $level, $node, $idx) = @_; - $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ]; + $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); } @@ -388,8 +386,8 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback my $node = shift @$q or next; my $cl = $level + 1; unshift @$q, map { ($cl, $_) } @{$node->{children}}; - if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) { - return thread_index_entry($ctx, $level, $smsg); + if ($ctx->{-inbox}->smsg_mime($node)) { + return thread_index_entry($ctx, $level, $node); } else { return ghost_index_entry($ctx, $level, $node); } @@ -407,7 +405,7 @@ sub stream_thread ($$) { my $node = shift @q or next; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; - $smsg = $ibx->smsg_mime($node->{smsg}) and last; + $smsg = $ibx->smsg_mime($node) and last; } return missing_thread($ctx) unless $smsg; @@ -825,7 +823,7 @@ sub indent_for { sub find_mid_root { my ($ctx, $level, $node, $idx) = @_; ++$ctx->{root_idx} if $level == 0; - if ($node->{id} eq $ctx->{mid}) { + if ($node->{mid} eq $ctx->{mid}) { $ctx->{found_mid_at} = $ctx->{root_idx}; return 0; } @@ -899,8 +897,8 @@ sub dedupe_subject { } sub skel_dump { # walk_thread callback - my ($ctx, $level, $node) = @_; - my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node); + my ($ctx, $level, $smsg) = @_; + $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg); my $skel = $ctx->{skel}; my $cur = $ctx->{cur}; @@ -983,7 +981,7 @@ sub skel_dump { # walk_thread callback sub _skel_ghost { my ($ctx, $level, $node) = @_; - my $mid = $node->{id}; + my $mid = $node->{mid}; my $d = ' [not found] '; $d .= ' ' if exists $ctx->{searchview}; $d .= indent_for($level) . th_pfx($level); @@ -1006,18 +1004,23 @@ sub _skel_ghost { sub sort_ds { [ sort { - (eval { $a->topmost->{smsg}->{ds} } || 0) <=> - (eval { $b->topmost->{smsg}->{ds} } || 0) + (eval { $a->topmost->{ds} } || 0) <=> + (eval { $b->topmost->{ds} } || 0) } @{$_[0]} ]; } # accumulate recent topics if search is supported # returns 200 if done, 404 if not sub acc_topic { # walk_thread callback - my ($ctx, $level, $node) = @_; - my $mid = $node->{id}; - my $smsg = $node->{smsg} // $ctx->{-inbox}->smsg_by_mid($mid); - if ($smsg) { + my ($ctx, $level, $smsg) = @_; + my $mid = $smsg->{mid}; + my $has_blob = $smsg->{blob} // do { + if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) { + %$smsg = (%$smsg, %$by_mid); + 1; + } + }; + if ($has_blob) { my $subj = subject_normalized($smsg->{subject}); $subj = '(no subject)' if $subj eq ''; my $ds = $smsg->{ds}; @@ -1208,7 +1211,7 @@ sub thread_adj_level { sub ghost_index_entry { my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); - $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id}) + $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{mid}) . '</pre>' . $end; } diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index e2024640..7b9e8915 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -11,7 +11,7 @@ use PublicInbox::InboxWritable; use File::Temp 0.19 (); # 0.19 for ->newdir use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; -*maildir_path_load = *PublicInbox::InboxWritable::maildir_path_load; +*mime_from_path = \&PublicInbox::InboxWritable::mime_from_path; sub new { my ($class, $config) = @_; @@ -59,9 +59,11 @@ sub new { my $watch = $ibx->{watch} or return; if (is_maildir($watch)) { my $watch_hdrs = []; - if (my $wh = $ibx->{watchheader}) { - my ($k, $v) = split(/:/, $wh, 2); - push @$watch_hdrs, [ $k, qr/\Q$v\E/ ]; + if (my $whs = $ibx->{watchheader}) { + for (@$whs) { + my ($k, $v) = split(/:/, $_, 2); + push @$watch_hdrs, [ $k, qr/\Q$v\E/ ]; + } } if (my $list_ids = $ibx->{listid}) { for (@$list_ids) { @@ -123,7 +125,7 @@ sub _remove_spam { my ($self, $path) = @_; # path must be marked as (S)een $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return; - my $mime = maildir_path_load($path) or return; + my $mime = mime_from_path($path) or return; $self->{config}->each_inbox(sub { my ($ibx) = @_; eval { @@ -165,7 +167,7 @@ sub _try_path { $warn_cb->(@_); }; foreach my $ibx (@$inboxes) { - my $mime = maildir_path_load($path) or next; + my $mime = mime_from_path($path) or next; my $im = _importer_for($self, $ibx); # any header match means it's eligible for the inbox: diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index aa917ed8..c3fbb1a7 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -20,9 +20,8 @@ sub close {} sub new { my ($class, $ctx, $cb) = @_; - $ctx->{emit_header} = 1; $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env}); - bless { cb => $cb || \&close, ctx => $ctx }, $class; + bless { cb => $cb || \&close, ctx => $ctx, emit_header => 1 }, $class; } sub response { @@ -130,7 +129,7 @@ sub feed_entry { $email = ascii_html($email); my $s = ''; - if (delete $ctx->{emit_header}) { + if (delete $self->{emit_header}) { $s .= atom_header($ctx, $title); } $s .= "<entry><author><name>$name</name><email>$email</email>" . diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 2008ba09..b23a415e 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -151,7 +151,7 @@ sub inbox_config ($$$) { url = https://example.com/$name/ url = http://example.onion/$name/ EOS - for my $k (qw(address listid infourl)) { + for my $k (qw(address listid infourl watchheader)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } @@ -171,7 +171,7 @@ EOF } } - for my $k (qw(filter newsgroup obfuscate replyto watchheader)) { + for my $k (qw(filter newsgroup obfuscate replyto)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $v\n"; } |