# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # Extends read-only Inbox for writing package PublicInbox::InboxWritable; use strict; use v5.10.1; use parent qw(PublicInbox::Inbox Exporter); use PublicInbox::Import; use PublicInbox::Filter::Base qw(REJECT); use Errno qw(ENOENT); our @EXPORT_OK = qw(eml_from_path warn_ignore_cb); use constant { PERM_UMASK => 0, OLD_PERM_GROUP => 1, OLD_PERM_EVERYBODY => 2, PERM_GROUP => 0660, PERM_EVERYBODY => 0664, }; sub new { my ($class, $ibx, $creat_opt) = @_; return $ibx if ref($ibx) eq $class; my $self = bless $ibx, $class; # TODO: maybe stop supporting this if ($creat_opt) { # for { nproc => $N } $self->{-creat_opt} = $creat_opt; init_inbox($self) if $self->version == 1; } $self; } sub assert_usable_dir { my ($self) = @_; my $dir = $self->{inboxdir}; return $dir if defined($dir) && $dir ne ''; die "no inboxdir defined for $self->{name}\n"; } sub _init_v1 { my ($self, $skip_artnum) = @_; if (defined($self->{indexlevel}) || defined($skip_artnum)) { require PublicInbox::SearchIdx; require PublicInbox::Msgmap; my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create $sidx->begin_txn_lazy; my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1); if (defined $skip_artnum) { $mm->{dbh}->begin_work; $mm->skip_artnum($skip_artnum); $mm->{dbh}->commit; } undef $mm; # ->created_at set $sidx->commit_txn_lazy; } else { open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or die "$self->{inboxdir}/ssoma.lock: $!\n"; } } sub init_inbox { my ($self, $shards, $skip_epoch, $skip_artnum) = @_; if ($self->version == 1) { my $dir = assert_usable_dir($self); PublicInbox::Import::init_bare($dir); $self->with_umask(\&_init_v1, $self, $skip_artnum); } else { my $v2w = importer($self); $v2w->init_inbox($shards, $skip_epoch, $skip_artnum); } } sub importer { my ($self, $parallel) = @_; my $v = $self->version; if ($v == 2) { eval { require PublicInbox::V2Writable }; die "v2 not supported: $@\n" if $@; my $opt = $self->{-creat_opt}; my $v2w = PublicInbox::V2Writable->new($self, $opt); $v2w->{parallel} = $parallel if defined $parallel; $v2w; } elsif ($v == 1) { my @arg = (undef, undef, undef, $self); PublicInbox::Import->new(@arg); } else { $! = 78; # EX_CONFIG 5.3.5 local configuration error die "unsupported inbox version: $v\n"; } } sub filter { my ($self, $im) = @_; my $f = $self->{filter}; if ($f && $f =~ /::/) { # v2 keeps msgmap open, which causes conflicts for filters # such as PublicInbox::Filter::RubyLang which overload msgmap # for a predictable serial number. if ($im && $self->version >= 2 && $self->{altid}) { $im->done; } my @args = (ibx => $self); # basic line splitting, only # Perhaps we can have proper quote splitting one day... ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/; eval "require $f"; if ($@) { warn $@; } else { # e.g: PublicInbox::Filter::Vger->new(@args) return $f->new(@args); } } undef; } sub is_maildir_basename ($) { my ($bn) = @_; return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; if ($bn =~ /:2,([A-Z]+)\z/i) { my $flags = $1; return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail } 1; } sub is_maildir_path ($) { my ($path) = @_; my @p = split(m!/+!, $path); (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; } sub eml_from_path ($) { my ($path) = @_; if (open my $fh, '<', $path) { my $str = do { local $/; <$fh> } or return; PublicInbox::Eml->new(\$str); } else { # ENOENT is common with Maildir warn "failed to open $path: $!\n" if $! != ENOENT; undef; } } sub import_maildir { my ($self, $dir) = @_; my $im = $self->importer(1); foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } foreach my $sub (qw(cur new)) { opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; while (defined(my $fn = readdir($dh))) { next unless is_maildir_basename($fn); my $mime = eml_from_path("$dir/$fn") or next; if (my $filter = $self->filter($im)) { my $ret = $filter->scrub($mime) or return; return if $ret == REJECT(); $mime = $ret; } $im->add($mime); } } $im->done; } # asctime: From example@example.com Fri Jun 23 02:56:55 2000 my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; sub mb_add ($$$$) { my ($im, $variant, $filter, $msg) = @_; $$msg =~ s/(\r?\n)+\z/$1/s; if ($variant eq 'mboxrd') { $$msg =~ s/^>(>*From )/$1/gms; } elsif ($variant eq 'mboxo') { $$msg =~ s/^>From /From /gms; } my $mime = PublicInbox::Eml->new($msg); if ($filter) { my $ret = $filter->scrub($mime) or return; return if $ret == REJECT(); $mime = $ret; } $im->add($mime) } sub import_mbox { my ($self, $fh, $variant) = @_; if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { die "variant must be 'mboxrd' or 'mboxo'\n"; } my $im = $self->importer(1); my $prev = undef; my $msg = ''; my $filter = $self->filter; while (defined(my $l = <$fh>)) { if ($l =~ /$from_strict/o) { if (!defined($prev) || $prev =~ /^\r?$/) { mb_add($im, $variant, $filter, \$msg) if $msg; $msg = ''; $prev = $l; next; } warn "W[$.] $l\n"; } $prev = $l; $msg .= $l; } mb_add($im, $variant, $filter, \$msg) if $msg; $im->done; } sub _read_git_config_perm { my ($self) = @_; chomp(my $perm = $self->git->qx('config', 'core.sharedRepository')); $perm; } sub _git_config_perm { my $self = shift; my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self); return PERM_UMASK if (!defined($perm) || $perm eq ''); return PERM_UMASK if ($perm eq 'umask'); return PERM_GROUP if ($perm eq 'group'); if ($perm =~ /\A(?:all|world|everybody)\z/) { return PERM_EVERYBODY; } return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/); return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/); my $i = oct($perm); return PERM_UMASK if ($i == PERM_UMASK); return PERM_GROUP if ($i == OLD_PERM_GROUP); return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY); if (($i & 0600) != 0600) { die "core.sharedRepository mode invalid: ". sprintf('%.3o', $i) . "\nOwner must have permissions\n"; } ($i & 0666); } sub _umask_for { my ($perm) = @_; # _git_config_perm return value my $rv = $perm; return umask if $rv == 0; # set +x bit if +r or +w were set $rv |= 0100 if ($rv & 0600); $rv |= 0010 if ($rv & 0060); $rv |= 0001 if ($rv & 0006); (~$rv & 0777); } sub with_umask { my ($self, $cb, @arg) = @_; my $old = umask($self->{umask} //= umask_prepare($self)); my $rv = eval { $cb->(@arg) }; my $err = $@; umask $old; die $err if $err; $rv; } sub umask_prepare { my ($self) = @_; my $perm = _git_config_perm($self); _umask_for($perm); } sub cleanup ($) { delete @{$_[0]}{qw(over mm git search)}; } # warnings to ignore when handling spam mailboxes and maybe other places sub warn_ignore { my $s = "@_"; # Email::Address::XS warnings $s =~ /^Argument contains empty address at / || $s =~ /^Element at index [0-9]+ contains / # PublicInbox::MsgTime || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/ || $s =~ /^bad Date: .+? in / # Encode::Unicode::UTF7 || $s =~ /^Bad UTF7 data escape at / } # this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..." sub warn_ignore_cb { my $cb = $SIG{__WARN__} // \&CORE::warn; sub { return if warn_ignore(@_); $cb->(@_); } } # v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove sub git_dir_latest { my ($self, $max) = @_; defined($$max = $self->max_git_epoch) ? "$self->{inboxdir}/git/$$max.git" : undef; } 1;