From fe3d2447badd595df784541556311137b920b0a2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 25 Apr 2016 09:50:01 +0000 Subject: remove ssoma dependency By converting to using ourt git-fast-import-based Import module. This should allow us to be more easily installed. --- Documentation/dc-dlvr-spam-flow.txt | 6 +--- INSTALL | 1 - lib/PublicInbox/Filter.pm | 7 ---- lib/PublicInbox/Import.pm | 14 ++++++++ lib/PublicInbox/MDA.pm | 12 ------- script/public-inbox-learn | 31 ++++++----------- script/public-inbox-mda | 24 +++++++------ t/feed.t | 67 ++++++++++++++++++++++++++++--------- t/html_index.t | 23 +++++++------ t/mda.t | 18 +++++++--- 10 files changed, 118 insertions(+), 85 deletions(-) diff --git a/Documentation/dc-dlvr-spam-flow.txt b/Documentation/dc-dlvr-spam-flow.txt index 978763ad..5a7e2902 100644 --- a/Documentation/dc-dlvr-spam-flow.txt +++ b/Documentation/dc-dlvr-spam-flow.txt @@ -35,8 +35,7 @@ script delivers to a second recipient for training, the "pi" user: public-inbox-learn public-inbox-learn will then internally handle the "spamc -> spamd" -delivery path as well as calling ssoma-rm on falsely trained - +delivery path as well as removing the message from the git tree. * incron - run commands based on filesystem events: http://incron.aiken.cz/ @@ -47,6 +46,3 @@ delivery path as well as calling ssoma-rm on falsely trained * report-spam / dc-dlvr - distributed with public-inbox in the scripts/ directory: git clone git://80x24.org/public-inbox - -* ssoma-rm - part of ssoma: some sort of mail archiver, a dependency of - public-inbox: git clone git://80x24.org/ssoma diff --git a/INSTALL b/INSTALL index 013e8d15..e75c4e27 100644 --- a/INSTALL +++ b/INSTALL @@ -23,7 +23,6 @@ Requirements (server MDA) ------------------------- * git -* ssoma - http://ssoma.public-inbox.org/INSTALL.html * SpamAssassin (spamc/spamd) * MTA - postfix is recommended * lynx (for converting HTML messages to text) diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm index 10173ad1..8b78a441 100644 --- a/lib/PublicInbox/Filter.pm +++ b/lib/PublicInbox/Filter.pm @@ -28,13 +28,6 @@ sub run { my $content_type = $mime->header('Content-Type') || 'text/plain'; - # kill potentially bad/confusing headers - # Note: ssoma already does this, but since we mangle the message, - # we should do this before it gets to ssoma. - foreach my $d (qw(status lines content-length)) { - $mime->header_set($d); - } - if ($content_type =~ m!\btext/plain\b!i) { return 1; # yay, nothing to do } elsif ($content_type =~ $MIME_HTML) { diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 8a40fb53..8dd11d03 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -195,6 +195,20 @@ sub done { my $pid = delete $self->{pid} or die 'BUG: missing {pid} when done'; waitpid($pid, 0) == $pid or die 'fast-import did not finish'; $? == 0 or die "fast-import failed: $?"; + + # for compatibility with existing ssoma installations + # we can probably remove this entirely by 2020 + my $git_dir = $self->{git}->{git_dir}; + my $index = "$git_dir/ssoma.index"; + # XXX: change the following scope to: if (-e $index) # in 2018 or so.. + unless ($ENV{FAST}) { + local $ENV{GIT_INDEX_FILE} = $index; + system('git', "--git-dir=$git_dir", qw(read-tree -m -v -i), + $self->{ref}) == 0 or + die "failed to update $git_dir/ssoma.index: $?\n"; + } + + my $lockfh = delete $self->{lockfh} or die "BUG: not locked: $!"; flock($lockfh, LOCK_UN) or die "unlock failed: $!"; close $lockfh or die "close lock failed: $!"; diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm index 2d3b9bd8..b1471022 100644 --- a/lib/PublicInbox/MDA.pm +++ b/lib/PublicInbox/MDA.pm @@ -10,7 +10,6 @@ use Email::Address; use Date::Parse qw(strptime); use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian -use constant cmd => qw/ssoma-mda -1/; # drop plus addressing for matching sub __drop_plus { @@ -84,15 +83,4 @@ sub set_list_headers { } } -# returns a 3-element array: name, email, date -sub author_info { - my ($class, $mime) = @_; - - my $from = $mime->header('From'); - my @from = Email::Address->parse($from); - my $name = $from[0]->name; - my $email = $from[0]->address; - ($name, $email, $mime->header('Date')); -} - 1; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 81675d02..0af1e94c 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -8,6 +8,8 @@ my $usage = "$0 (spam|ham) < /path/to/message"; use strict; use warnings; use PublicInbox::Config; +use PublicInbox::Git; +use PublicInbox::Import; use Email::MIME; use Email::Address; use IPC::Run qw/run/; @@ -32,16 +34,12 @@ foreach my $h (qw(Cc To)) { } } -my ($name, $email, $date); - if ($train eq "ham") { require PublicInbox::MDA; require PublicInbox::Filter; PublicInbox::Filter->run($mime); - ($name, $email, $date) = PublicInbox::MDA->author_info($mime); } -my $in = $mime->as_string; my $err = 0; my @output = qw(> /dev/null > /dev/null); @@ -50,37 +48,30 @@ foreach my $recipient (keys %dests) { my $dst = $pi_config->lookup($recipient) or next; my $git_dir = $dst->{mainrepo} or next; my ($out, $err) = ("", ""); - + my $git = PublicInbox::Git->new($git_dir); # We do not touch GIT_COMMITTER_* env here so we can track # who trained the message. - # We will not touch GIT_AUTHOR_* when learning spam messages, either + my $name = $ENV{GIT_COMMITTER_NAME} || $dst->{listname}; + my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient; + my $im = PublicInbox::Import->new($git, $name, $email); + if ($train eq "spam") { # This needs to be idempotent, as my inotify trainer # may train for each cross-posted message, and this # script already learns for every list in # ~/.public-inbox/config - if (!run(["ssoma-rm", $git_dir], \$in, \$out, \$err)) { - if ($err !~ /^git cat-file .+ failed: 32768$/) { - $err = 1; - } - } + $im->remove($mime); } else { # $train eq "ham" # no checking for spam here, we assume the message has # been reviewed by a human at this point: PublicInbox::MDA->set_list_headers($mime, $dst); - my $s = $mime->as_string; - - local $ENV{GIT_AUTHOR_NAME} = $name; - local $ENV{GIT_AUTHOR_EMAIL} = $email; - local $ENV{GIT_AUTHOR_DATE} = $date; # Ham messages are trained when they're marked into # a SEEN state, so this is idempotent: - run([PublicInbox::MDA->cmd, $git_dir], \$s, \$out, \$err); - if ($err !~ /CONFLICT/) { - $err = 1; - } + $im->add($mime); } + $im->done; + my $in = $mime->as_string; if (!run([qw(spamc -L), $train], \$in, @output)) { $err = 1; } diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 24feeb81..6c76734c 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -15,6 +15,8 @@ use IPC::Run qw(run); use PublicInbox::MDA; use PublicInbox::Filter; use PublicInbox::Config; +use PublicInbox::Import; +use PublicInbox::Git; # n.b: hopefully we can setup the emergency path without bailing due to # user error, we really want to setup the emergency destination ASAP @@ -58,20 +60,20 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) && PublicInbox::MDA->set_list_headers($msg, $dst); $filter->simple($msg); - my ($name, $email, $date) = - PublicInbox::MDA->author_info($msg); - END { index_sync($main_repo) if ($? == 0); }; - - local $ENV{GIT_AUTHOR_NAME} = $name; - local $ENV{GIT_AUTHOR_EMAIL} = $email; - local $ENV{GIT_AUTHOR_DATE} = $date; - local $ENV{GIT_COMMITTER_EMAIL} = $recipient; - local $ENV{GIT_COMMITTER_NAME} = $dst->{listname}; - - $filter->pipe(PublicInbox::MDA->cmd, $main_repo); + my $git = PublicInbox::Git->new($main_repo); + my $im = PublicInbox::Import->new($git, + $dst->{listname}, $recipient); + if (defined $im->add($msg)) { + $im->done; + $filter->ignore; # exits + } + # this message is similar to what ssoma-mda shows: + print STDERR "CONFLICT: Message-ID: ", + $msg->header_obj->header_raw('Message-ID'), + " exists\n"; } } } else { diff --git a/t/feed.t b/t/feed.t index 2096b73e..906552e5 100644 --- a/t/feed.t +++ b/t/feed.t @@ -3,8 +3,10 @@ use strict; use warnings; use Test::More; -use Email::Simple; +use Email::MIME; use PublicInbox::Feed; +use PublicInbox::Git; +use PublicInbox::Import; use PublicInbox::Config; use IPC::Run qw/run/; use File::Temp qw/tempdir/; @@ -15,15 +17,36 @@ sub string_feed { stream_to_string(PublicInbox::Feed::generate($_[0])); } +# ensure we are compatible with existing ssoma installations which +# do not use fast-import. We can probably remove this in 2018 +my %SSOMA; +sub rand_use ($) { + return 0 if $ENV{FAST}; + my $cmd = $_[0]; + my $x = $SSOMA{$cmd}; + unless ($x) { + $x = -1; + foreach my $p (split(':', $ENV{PATH})) { + -x "$p/$cmd" or next; + $x = 1; + last; + } + $SSOMA{$cmd} = $x; + } + (($x > 0 && (int(rand(10)) % 2) == 1) || $x < 0); +} + my $tmpdir = tempdir('pi-feed-XXXXXX', TMPDIR => 1, CLEANUP => 1); my $git_dir = "$tmpdir/gittest"; +my $git = PublicInbox::Git->new($git_dir); +my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); { is(0, system(qw(git init -q --bare), $git_dir), "git init"); local $ENV{GIT_DIR} = $git_dir; foreach my $i (1..6) { - my $simple = Email::Simple->new(<new(< To: U Message-Id: <$i\@example.com> @@ -53,10 +76,16 @@ msg $i keep me EOF - my $str = $simple->as_string; - run(['ssoma-mda', $git_dir], \$str) or - die "mda failed: $?\n"; + if (rand_use('ssoma-mda')) { + $im->done; + my $str = $mime->as_string; + run(['ssoma-mda', $git_dir], \$str) or + die "mda failed: $?\n"; + } else { + like($im->add($mime), qr/\A:\d+/, 'added'); + } } + $im->done; } # spam check @@ -84,13 +113,7 @@ EOF # add a new spam message my $spam; { - my $pid = open(my $pipe, "|-"); - defined $pid or die "fork/pipe failed: $!\n"; - if ($pid == 0) { - exec("ssoma-mda", $git_dir); - } - - $spam = Email::Simple->new(<new(< To: U Message-Id: @@ -98,8 +121,19 @@ Subject: SPAM!!!!!!!! Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF - print $pipe $spam->as_string or die "print failed: $!\n"; - close $pipe or die "close pipe failed: $!\n"; + if (rand_use('ssoma-mda')) { + my $pid = open(my $pipe, "|-"); + defined $pid or die "fork/pipe failed: $!"; + if ($pid == 0) { + exec("ssoma-mda", $git_dir); + } + + print $pipe $spam->as_string or die "print failed: $!"; + close $pipe or die "close pipe failed: $!"; + } else { + $im->add($spam); + $im->done; + } } # check spam shows up @@ -118,10 +152,13 @@ EOF } # nuke spam - { + if (rand_use('ssoma-rm')) { my $spam_str = $spam->as_string; run(["ssoma-rm", $git_dir], \$spam_str) or die "ssoma-rm failed: $?\n"; + } else { + $im->remove($spam); + $im->done; } # spam no longer shows up diff --git a/t/html_index.t b/t/html_index.t index adbadaf4..6896eb41 100644 --- a/t/html_index.t +++ b/t/html_index.t @@ -3,11 +3,15 @@ use strict; use warnings; use Test::More; -use Email::Simple; +use Email::MIME; use PublicInbox::Feed; +use PublicInbox::Git; +use PublicInbox::Import; use File::Temp qw/tempdir/; my $tmpdir = tempdir('pi-http-XXXXXX', TMPDIR => 1, CLEANUP => 1); my $git_dir = "$tmpdir/gittest"; +my $git = PublicInbox::Git->new($git_dir); +my $im = PublicInbox::Import->new($git, 'tester', 'test@example'); # setup { @@ -15,19 +19,18 @@ my $git_dir = "$tmpdir/gittest"; my $prev = ""; foreach my $i (1..6) { - local $ENV{GIT_DIR} = $git_dir; - my $pid = open(my $pipe, "|-"); - defined $pid or die "fork/pipe failed: $!\n"; - if ($pid == 0) { - exec("ssoma-mda", $git_dir); - } + # my $pid = open(my $pipe, "|-"); + # defined $pid or die "fork/pipe failed: $!\n"; + # if ($pid == 0) { + # exec("ssoma-mda", $git_dir); + # } my $mid = "<$i\@example.com>"; my $mid_line = "Message-ID: $mid"; if ($prev) { $mid_line .= "In-Reply-To: $prev"; } $prev = $mid; - my $simple = Email::Simple->new(<new(< To: U $mid_line @@ -43,9 +46,9 @@ msg $i keep me EOF - print $pipe $simple->as_string or die "print failed: $!\n"; - close $pipe or die "close pipe failed: $!\n"; + like($im->add($mime), qr/\A:\d+\z/, 'inserted message'); } + $im->done; } # check HTML index diff --git a/t/mda.t b/t/mda.t index 03bad871..6b7527d4 100644 --- a/t/mda.t +++ b/t/mda.t @@ -8,6 +8,7 @@ use Email::Filter; use File::Temp qw/tempdir/; use Cwd; use IPC::Run qw(run); +use PublicInbox::MID qw(mid2path); my $mda = "blib/script/public-inbox-mda"; my $learn = "blib/script/public-inbox-learn"; @@ -54,7 +55,13 @@ local $ENV{GIT_COMMITTER_NAME} = eval { close $fh; my $msg = Email::Filter->new(data => $str); $msg = Email::MIME->new($msg->simple->as_string); - my ($author, $email, $date) = PublicInbox::MDA->author_info($msg); + + my $from = $msg->header('From'); + my @from = Email::Address->parse($from); + my $author = $from[0]->name; + my $email = $from[0]->address; + my $date = $msg ->header('Date'); + is('Eléanor', encode('us-ascii', my $tmp = $author, Encode::HTMLCREF), 'HTML conversion is correct'); @@ -174,7 +181,8 @@ EOF { # deliver the spam message, first run([$mda], \$in); - my $msg = `ssoma cat $mid $maindir`; + my $path = mid2path($mid); + my $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`; like($msg, qr/\Q$mid\E/, "message delivered"); # now train it @@ -212,7 +220,8 @@ EOF run([$learn, "ham"], \$in); is($?, 0, "learned ham without failure"); - my $msg = `ssoma cat $mid $maindir`; + my $path = mid2path($mid); + my $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`; like($msg, qr/\Q$mid\E/, "ham message delivered"); run([$learn, "ham"], \$in); is($?, 0, "learned ham idempotently "); @@ -251,7 +260,8 @@ EOF $in = $mime->as_string; run([$learn, "ham"], \$in); is($?, 0, "learned ham without failure"); - $msg = `ssoma cat $mid $maindir`; + my $path = mid2path($mid); + $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`; like($msg, qr/<\Q$mid\E>/, "ham message delivered"); unlike($msg, qr//i, ' filtered'); } -- cgit v1.2.3-24-ge0c7