# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use warnings; use Cwd qw(getcwd); use PublicInbox::MID qw(mid2path); use PublicInbox::Git; use PublicInbox::InboxWritable; use PublicInbox::TestCommon; use PublicInbox::Import; use PublicInbox::IO qw(write_file); use File::Path qw(remove_tree); my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $pi_home = "$home/.public-inbox"; my $pi_config = "$pi_home/config"; my $maindir = "$tmpdir/main.git"; my $main_bin = getcwd()."/t/main-bin"; my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock my $fail_bin = getcwd()."/t/fail-bin"; my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my $faildir = "$home/faildir/"; my $git = PublicInbox::Git->new($maindir); my $fail_bad_header = sub ($$$) { my ($good_rev, $msg, $in) = @_; my @f = glob("$faildir/*/*"); unlink @f if @f; my ($out, $err) = ("", ""); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; local $ENV{PATH} = $main_path; ok(run_script(['-mda'], undef, $opt), "no error on undeliverable ($msg)"); my $rev = $git->qx(qw(rev-list HEAD)); chomp $rev; is($rev, $good_rev, "bad revision not committed ($msg)"); @f = glob("$faildir/*/*"); is(scalar @f, 1, "faildir written to"); [ $in, $out, $err ]; }; { ok(-x "$main_bin/spamc", "spamc ham mock found (run in top of source tree"); ok(-x "$fail_bin/spamc", "spamc mock found (run in top of source tree"); is(1, mkdir($home, 0755), "setup ~/ for testing"); is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); PublicInbox::Import::init_bare($maindir); write_file '>>', $pi_config, <header('From'); my ($author) = PublicInbox::Address::names($from); my ($email) = PublicInbox::Address::emails($from); my $date = $msg->header('Date'); is('Eléanor', encode('us-ascii', my $tmp = $author, Encode::HTMLCREF), 'HTML conversion is correct'); is($email, 'e@example.com', 'email parsed correctly'); is($date, 'Thu, 01 Jan 1970 00:00:00 +0000', 'message date parsed correctly'); $author; }; die $@ if $@; { my $good_rev; local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; ok(run_script([qw(-mda --help)], undef, { 1 => \my $out, 2 => \my $err }), '-mda --help'); like $out, qr/usage:/, 'usage shown w/ --help'; ok(!run_script([qw(-mda --bogus)], undef, { 1 => \$out, 2 => \$err }), '-mda --bogus fails'); like $err, qr/usage:/, 'usage shown on bogus switch'; my $in = < To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF { local $ENV{PATH} = $main_path; ok(!run_script(['-mda'], { ORIGINAL_RECIPIENT => undef }, { 0 => \$in, 2 => \$err }), 'missing ORIGINAL_RECIPIENT fails'); is($? >> 8, 67, 'got EX_NOUSER'); like $err, qr/\bORIGINAL_RECIPIENT\b/, 'ORIGINAL_RECIPIENT noted in stderr'; is unlink(glob("$faildir/*/*")), 1, 'unlinked failed message'; } # ensure successful message delivery { local $ENV{PATH} = $main_path; ok(run_script(['-mda'], undef, { 0 => \$in })); my $rev = $git->qx(qw(rev-list HEAD)); like($rev, qr/\A[a-f0-9]{40,64}/, "good revision committed"); chomp $rev; my $cmt = $git->cat_file($rev); like($$cmt, qr/^author Me 0 \+0000\n/m, "author info set correctly"); like($$cmt, qr/^committer test /m, "committer info set correctly"); $good_rev = $rev; } # ensure failures work, fail with bad spamc { my @prev = <$faildir/new/*>; is(scalar @prev, 0 , "nothing in PI_EMERGENCY before"); local $ENV{PATH} = $fail_path; ok(run_script(['-mda'], undef, { 0 => \$in })); my @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 1, "bad revision not committed"); my @new = <$faildir/new/*>; is(scalar @new, 1, "PI_EMERGENCY is written to"); } $fail_bad_header->($good_rev, "bad recipient", <<""); From: Me To: You Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 my $fail = $fail_bad_header->($good_rev, "duplicate Message-ID", <<""); From: Me To: You Cc: $addr Message-ID: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 like($fail->[2], qr/CONFLICT/, "duplicate Message-ID message"); $fail_bad_header->($good_rev, "missing From:", <<""); To: $addr Message-ID: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 $fail_bad_header->($good_rev, "short subject:", <<""); To: $addr From: cat\@example.com Message-ID: Subject: a Date: Thu, 01 Jan 1970 00:00:00 +0000 $fail_bad_header->($good_rev, "no date", <<""); To: $addr From: u\@example.com Message-ID: Subject: hihi $fail_bad_header->($good_rev, "bad date", <<""); To: $addr From: u\@example.com Message-ID: Subject: hihi Date: deadbeef } # spam training { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; local $ENV{PATH} = $main_path; my $mid = 'spam-train@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF { # deliver the spam message, first ok(run_script(['-mda'], undef, { 0 => \$in })); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$mid\E/, "message delivered"); # now train it local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_NAME}; delete $ENV{GIT_COMMITTER_NAME}; ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), "no failure from learning spam"); ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), "no failure from learning spam idempotently"); } } # train ham message { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; local $ENV{PATH} = $main_path; my $mid = 'ham-train@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF # now train it # these should be overridden local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham without failure"); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$mid\E/, "ham message delivered"); ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham idempotently "); # ensure trained email is filtered, too my $mime = eml_load 't/mda-mime.eml'; ($mid) = ($mime->header_raw('message-id') =~ /<([^>]+)>/); { $in = $mime->as_string; ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham without failure"); my $path = mid2path($mid); $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/<\Q$mid\E>/, "ham message delivered"); unlike($$msg, qr//i, ' filtered'); } } # List-ID based delivery { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = undef; delete $ENV{ORIGINAL_RECIPIENT}; local $ENV{PATH} = $main_path; my $list_id = 'foo.example.com'; my $mid = 'list-id-delivery@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> List-Id: <$list_id> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", uc $list_id); $? == 0 or die "failed to set listid $?"; ok(run_script(['-mda'], undef, { 0 => \$in }), 'mda OK with List-Id match'); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$list_id\E/, 'delivered message w/ List-ID matches'); # try a message w/o precheck $in = < List-Id: <$list_id> this message would not be accepted without --no-precheck EOF my ($out, $err) = ('', ''); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; ok(run_script(['-mda', '--no-precheck'], undef, $rdr), 'mda OK with List-Id match and --no-precheck'); my $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/this message would not be accepted without --no-precheck/, '--no-precheck delivered message anyways'); # try a message with multiple List-ID headers $in = < List-ID: <$list_id> Message-ID: <2lids\@example> Subject: two List-IDs From: user To: $addr Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF ($out, $err) = ('', ''); ok(run_script(['-mda'], undef, $rdr), 'mda OK with multiple List-Id matches'); $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/^\+Message-ID: <2lids\@example>/sm, 'multi List-ID match delivered'); like($err, qr/multiple List-ID/, 'warned about multiple List-ID'); # ensure -learn rm works after inbox address is updated ($out, $err) = ('', ''); xsys(qw(git config --file), $pi_config, "$cfgpfx.address", $addr = 'updated-address@example.com'); ok(run_script(['-learn', 'rm'], undef, $rdr), 'rm-ed via -learn'); $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git'); # ensure we can strip List-Unsubscribe $in = < List-Id: <$list_id> Message-ID: Subject: unsubscribe-1 From: user To: $addr Date: Fri, 02 Oct 1993 00:00:00 +0000 List-Unsubscribe: List-Unsubscribe-Post: List-Unsubscribe=One-Click List-Unsubscribe should be stripped EOF write_file '>>', $pi_config, <qx(qw(diff HEAD~1..HEAD)))); like $cur, qr/Message-ID: qx(qw(diff HEAD~1..HEAD)))); like $cur, qr/Message-ID: \(my $out = ''), 2 => \(my $err = '') }; ok(run_script([qw(-index -L medium), $maindir], undef, $rdr), 'index inbox'); my $in = <<'EOM'; From: a@example.com To: updated-address@example.com Subject: this is a ham message for learn Date: Fri, 02 Oct 1993 00:00:00 +0000 Message-ID: yum EOM $rdr->{0} = \$in; ok(run_script([qw(-learn ham)], undef, $rdr), 'learn medium ham'); is($err, '', 'nothing in stderr after medium -learn'); my $msg = $git->cat_file('HEAD:'.mid2path('medium-ham@example')); like($$msg, qr/medium-ham/, 'medium ham added via -learn'); my @xap = grep(!m!/over\.sqlite3!, glob("$maindir/public-inbox/xapian*/*")); ok(remove_tree(@xap), 'rm Xapian files to convert to indexlevel=basic'); $in =~ s/medium-ham/basic-ham/g or xbail 'BUG: no s//'; ok(run_script([qw(-learn ham)], undef, $rdr), 'learn basic ham'); is($err, '', 'nothing in stderr after basic -learn'); $msg = $git->cat_file('HEAD:'.mid2path('basic-ham@example')); like($$msg, qr/basic-ham/, 'basic ham added via -learn'); @xap = grep(!m!/over\.sqlite3!, glob("$maindir/public-inbox/xapian*/*")); is_deeply(\@xap, [], 'no Xapian files created by -learn'); }; done_testing();