about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/dc-dlvr-spam-flow.txt6
-rw-r--r--INSTALL1
-rw-r--r--lib/PublicInbox/Filter.pm7
-rw-r--r--lib/PublicInbox/Import.pm14
-rw-r--r--lib/PublicInbox/MDA.pm12
-rwxr-xr-xscript/public-inbox-learn31
-rwxr-xr-xscript/public-inbox-mda24
-rw-r--r--t/feed.t67
-rw-r--r--t/html_index.t23
-rw-r--r--t/mda.t18
10 files changed, 118 insertions, 85 deletions
diff --git a/Documentation/dc-dlvr-spam-flow.txt b/Documentation/dc-dlvr-spam-flow.txt
index 978763ad..5a7e2902 100644
--- a/Documentation/dc-dlvr-spam-flow.txt
+++ b/Documentation/dc-dlvr-spam-flow.txt
@@ -35,8 +35,7 @@ script delivers to a second recipient for training, the "pi" user:
                                            public-inbox-learn
 
 public-inbox-learn will then internally handle the "spamc -> spamd"
-delivery path as well as calling ssoma-rm on falsely trained
-
+delivery path as well as removing the message from the git tree.
 
 * incron - run commands based on filesystem events: http://incron.aiken.cz/
 
@@ -47,6 +46,3 @@ delivery path as well as calling ssoma-rm on falsely trained
 
 * report-spam / dc-dlvr - distributed with public-inbox in the scripts/
   directory: git clone git://80x24.org/public-inbox
-
-* ssoma-rm - part of ssoma: some sort of mail archiver, a dependency of
-  public-inbox: git clone git://80x24.org/ssoma
diff --git a/INSTALL b/INSTALL
index 013e8d15..e75c4e27 100644
--- a/INSTALL
+++ b/INSTALL
@@ -23,7 +23,6 @@ Requirements (server MDA)
 -------------------------
 
 * git
-* ssoma - http://ssoma.public-inbox.org/INSTALL.html
 * SpamAssassin (spamc/spamd)
 * MTA - postfix is recommended
 * lynx (for converting HTML messages to text)
diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index 10173ad1..8b78a441 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -28,13 +28,6 @@ sub run {
 
         my $content_type = $mime->header('Content-Type') || 'text/plain';
 
-        # kill potentially bad/confusing headers
-        # Note: ssoma already does this, but since we mangle the message,
-        # we should do this before it gets to ssoma.
-        foreach my $d (qw(status lines content-length)) {
-                $mime->header_set($d);
-        }
-
         if ($content_type =~ m!\btext/plain\b!i) {
                 return 1; # yay, nothing to do
         } elsif ($content_type =~ $MIME_HTML) {
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 8a40fb53..8dd11d03 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -195,6 +195,20 @@ sub done {
         my $pid = delete $self->{pid} or die 'BUG: missing {pid} when done';
         waitpid($pid, 0) == $pid or die 'fast-import did not finish';
         $? == 0 or die "fast-import failed: $?";
+
+        # for compatibility with existing ssoma installations
+        # we can probably remove this entirely by 2020
+        my $git_dir = $self->{git}->{git_dir};
+        my $index = "$git_dir/ssoma.index";
+        # XXX: change the following scope to: if (-e $index) # in 2018 or so..
+        unless ($ENV{FAST}) {
+                local $ENV{GIT_INDEX_FILE} = $index;
+                system('git', "--git-dir=$git_dir", qw(read-tree -m -v -i),
+                        $self->{ref}) == 0 or
+                        die "failed to update $git_dir/ssoma.index: $?\n";
+        }
+
+
         my $lockfh = delete $self->{lockfh} or die "BUG: not locked: $!";
         flock($lockfh, LOCK_UN) or die "unlock failed: $!";
         close $lockfh or die "close lock failed: $!";
diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm
index 2d3b9bd8..b1471022 100644
--- a/lib/PublicInbox/MDA.pm
+++ b/lib/PublicInbox/MDA.pm
@@ -10,7 +10,6 @@ use Email::Address;
 use Date::Parse qw(strptime);
 use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable
 use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
-use constant cmd => qw/ssoma-mda -1/;
 
 # drop plus addressing for matching
 sub __drop_plus {
@@ -84,15 +83,4 @@ sub set_list_headers {
         }
 }
 
-# returns a 3-element array: name, email, date
-sub author_info {
-        my ($class, $mime) = @_;
-
-        my $from = $mime->header('From');
-        my @from = Email::Address->parse($from);
-        my $name = $from[0]->name;
-        my $email = $from[0]->address;
-        ($name, $email, $mime->header('Date'));
-}
-
 1;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 81675d02..0af1e94c 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -8,6 +8,8 @@ my $usage = "$0 (spam|ham) < /path/to/message";
 use strict;
 use warnings;
 use PublicInbox::Config;
+use PublicInbox::Git;
+use PublicInbox::Import;
 use Email::MIME;
 use Email::Address;
 use IPC::Run qw/run/;
@@ -32,16 +34,12 @@ foreach my $h (qw(Cc To)) {
         }
 }
 
-my ($name, $email, $date);
-
 if ($train eq "ham") {
         require PublicInbox::MDA;
         require PublicInbox::Filter;
         PublicInbox::Filter->run($mime);
-        ($name, $email, $date) = PublicInbox::MDA->author_info($mime);
 }
 
-my $in = $mime->as_string;
 my $err = 0;
 my @output = qw(> /dev/null > /dev/null);
 
@@ -50,37 +48,30 @@ foreach my $recipient (keys %dests) {
         my $dst = $pi_config->lookup($recipient) or next;
         my $git_dir = $dst->{mainrepo} or next;
         my ($out, $err) = ("", "");
-
+        my $git = PublicInbox::Git->new($git_dir);
         # We do not touch GIT_COMMITTER_* env here so we can track
         # who trained the message.
-        # We will not touch GIT_AUTHOR_* when learning spam messages, either
+        my $name = $ENV{GIT_COMMITTER_NAME} || $dst->{listname};
+        my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient;
+        my $im = PublicInbox::Import->new($git, $name, $email);
+
         if ($train eq "spam") {
                 # This needs to be idempotent, as my inotify trainer
                 # may train for each cross-posted message, and this
                 # script already learns for every list in
                 # ~/.public-inbox/config
-                if (!run(["ssoma-rm", $git_dir], \$in, \$out, \$err)) {
-                        if ($err !~ /^git cat-file .+ failed: 32768$/) {
-                                $err = 1;
-                        }
-                }
+                $im->remove($mime);
         } else { # $train eq "ham"
                 # no checking for spam here, we assume the message has
                 # been reviewed by a human at this point:
                 PublicInbox::MDA->set_list_headers($mime, $dst);
-                my $s  = $mime->as_string;
-
-                local $ENV{GIT_AUTHOR_NAME} = $name;
-                local $ENV{GIT_AUTHOR_EMAIL} = $email;
-                local $ENV{GIT_AUTHOR_DATE} = $date;
 
                 # Ham messages are trained when they're marked into
                 # a SEEN state, so this is idempotent:
-                run([PublicInbox::MDA->cmd, $git_dir], \$s, \$out, \$err);
-                if ($err !~ /CONFLICT/) {
-                        $err = 1;
-                }
+                $im->add($mime);
         }
+        $im->done;
+        my $in = $mime->as_string;
         if (!run([qw(spamc -L), $train], \$in, @output)) {
                 $err = 1;
         }
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 24feeb81..6c76734c 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -15,6 +15,8 @@ use IPC::Run qw(run);
 use PublicInbox::MDA;
 use PublicInbox::Filter;
 use PublicInbox::Config;
+use PublicInbox::Import;
+use PublicInbox::Git;
 
 # n.b: hopefully we can setup the emergency path without bailing due to
 # user error, we really want to setup the emergency destination ASAP
@@ -58,20 +60,20 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) &&
                         PublicInbox::MDA->set_list_headers($msg, $dst);
                         $filter->simple($msg);
 
-                        my ($name, $email, $date) =
-                                        PublicInbox::MDA->author_info($msg);
-
                         END {
                                 index_sync($main_repo) if ($? == 0);
                         };
-
-                        local $ENV{GIT_AUTHOR_NAME} = $name;
-                        local $ENV{GIT_AUTHOR_EMAIL} = $email;
-                        local $ENV{GIT_AUTHOR_DATE} = $date;
-                        local $ENV{GIT_COMMITTER_EMAIL} = $recipient;
-                        local $ENV{GIT_COMMITTER_NAME} = $dst->{listname};
-
-                        $filter->pipe(PublicInbox::MDA->cmd, $main_repo);
+                        my $git = PublicInbox::Git->new($main_repo);
+                        my $im = PublicInbox::Import->new($git,
+                                                $dst->{listname}, $recipient);
+                        if (defined $im->add($msg)) {
+                                $im->done;
+                                $filter->ignore; # exits
+                        }
+                        # this message is similar to what ssoma-mda shows:
+                        print STDERR "CONFLICT: Message-ID: ",
+                                $msg->header_obj->header_raw('Message-ID'),
+                                " exists\n";
                 }
         }
 } else {
diff --git a/t/feed.t b/t/feed.t
index 2096b73e..906552e5 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -3,8 +3,10 @@
 use strict;
 use warnings;
 use Test::More;
-use Email::Simple;
+use Email::MIME;
 use PublicInbox::Feed;
+use PublicInbox::Git;
+use PublicInbox::Import;
 use PublicInbox::Config;
 use IPC::Run qw/run/;
 use File::Temp qw/tempdir/;
@@ -15,15 +17,36 @@ sub string_feed {
         stream_to_string(PublicInbox::Feed::generate($_[0]));
 }
 
+# ensure we are compatible with existing ssoma installations which
+# do not use fast-import.  We can probably remove this in 2018
+my %SSOMA;
+sub rand_use ($) {
+        return 0 if $ENV{FAST};
+        my $cmd = $_[0];
+        my $x = $SSOMA{$cmd};
+        unless ($x) {
+                $x = -1;
+                foreach my $p (split(':', $ENV{PATH})) {
+                        -x "$p/$cmd" or next;
+                        $x = 1;
+                        last;
+                }
+                $SSOMA{$cmd} = $x;
+        }
+        (($x > 0 && (int(rand(10)) % 2) == 1) || $x < 0);
+}
+
 my $tmpdir = tempdir('pi-feed-XXXXXX', TMPDIR => 1, CLEANUP => 1);
 my $git_dir = "$tmpdir/gittest";
+my $git = PublicInbox::Git->new($git_dir);
+my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
 
 {
         is(0, system(qw(git init -q --bare), $git_dir), "git init");
         local $ENV{GIT_DIR} = $git_dir;
 
         foreach my $i (1..6) {
-                my $simple = Email::Simple->new(<<EOF);
+                my $mime = Email::MIME->new(<<EOF);
 From: ME <me\@example.com>
 To: U <u\@example.com>
 Message-Id: <$i\@example.com>
@@ -53,10 +76,16 @@ msg $i
 
 keep me
 EOF
-                my $str = $simple->as_string;
-                run(['ssoma-mda', $git_dir], \$str) or
-                        die "mda failed: $?\n";
+                if (rand_use('ssoma-mda')) {
+                        $im->done;
+                        my $str = $mime->as_string;
+                        run(['ssoma-mda', $git_dir], \$str) or
+                                die "mda failed: $?\n";
+                } else {
+                        like($im->add($mime), qr/\A:\d+/, 'added');
+                }
         }
+        $im->done;
 }
 
 # spam check
@@ -84,13 +113,7 @@ EOF
         # add a new spam message
         my $spam;
         {
-                my $pid = open(my $pipe, "|-");
-                defined $pid or die "fork/pipe failed: $!\n";
-                if ($pid == 0) {
-                        exec("ssoma-mda", $git_dir);
-                }
-
-                $spam = Email::Simple->new(<<EOF);
+                $spam = Email::MIME->new(<<EOF);
 From: SPAMMER <spammer\@example.com>
 To: U <u\@example.com>
 Message-Id: <this-is-spam\@example.com>
@@ -98,8 +121,19 @@ Subject: SPAM!!!!!!!!
 Date: Thu, 01 Jan 1970 00:00:00 +0000
 
 EOF
-                print $pipe $spam->as_string or die "print failed: $!\n";
-                close $pipe or die "close pipe failed: $!\n";
+                if (rand_use('ssoma-mda')) {
+                        my $pid = open(my $pipe, "|-");
+                        defined $pid or die "fork/pipe failed: $!";
+                        if ($pid == 0) {
+                                exec("ssoma-mda", $git_dir);
+                        }
+
+                        print $pipe $spam->as_string or die "print failed: $!";
+                        close $pipe or die "close pipe failed: $!";
+                } else {
+                        $im->add($spam);
+                        $im->done;
+                }
         }
 
         # check spam shows up
@@ -118,10 +152,13 @@ EOF
         }
 
         # nuke spam
-        {
+        if (rand_use('ssoma-rm')) {
                 my $spam_str = $spam->as_string;
                 run(["ssoma-rm", $git_dir], \$spam_str) or
                                 die "ssoma-rm failed: $?\n";
+        } else {
+                $im->remove($spam);
+                $im->done;
         }
 
         # spam no longer shows up
diff --git a/t/html_index.t b/t/html_index.t
index adbadaf4..6896eb41 100644
--- a/t/html_index.t
+++ b/t/html_index.t
@@ -3,11 +3,15 @@
 use strict;
 use warnings;
 use Test::More;
-use Email::Simple;
+use Email::MIME;
 use PublicInbox::Feed;
+use PublicInbox::Git;
+use PublicInbox::Import;
 use File::Temp qw/tempdir/;
 my $tmpdir = tempdir('pi-http-XXXXXX', TMPDIR => 1, CLEANUP => 1);
 my $git_dir = "$tmpdir/gittest";
+my $git = PublicInbox::Git->new($git_dir);
+my $im = PublicInbox::Import->new($git, 'tester', 'test@example');
 
 # setup
 {
@@ -15,19 +19,18 @@ my $git_dir = "$tmpdir/gittest";
         my $prev = "";
 
         foreach my $i (1..6) {
-                local $ENV{GIT_DIR} = $git_dir;
-                my $pid = open(my $pipe, "|-");
-                defined $pid or die "fork/pipe failed: $!\n";
-                if ($pid == 0) {
-                        exec("ssoma-mda", $git_dir);
-                }
+                # my $pid = open(my $pipe, "|-");
+                # defined $pid or die "fork/pipe failed: $!\n";
+                # if ($pid == 0) {
+                        # exec("ssoma-mda", $git_dir);
+                # }
                 my $mid = "<$i\@example.com>";
                 my $mid_line = "Message-ID: $mid";
                 if ($prev) {
                         $mid_line .= "In-Reply-To: $prev";
                 }
                 $prev = $mid;
-                my $simple = Email::Simple->new(<<EOF);
+                my $mime = Email::MIME->new(<<EOF);
 From: ME <me\@example.com>
 To: U <u\@example.com>
 $mid_line
@@ -43,9 +46,9 @@ msg $i
 
 keep me
 EOF
-                print $pipe $simple->as_string or die "print failed: $!\n";
-                close $pipe or die "close pipe failed: $!\n";
+                like($im->add($mime), qr/\A:\d+\z/, 'inserted message');
         }
+        $im->done;
 }
 
 # check HTML index
diff --git a/t/mda.t b/t/mda.t
index 03bad871..6b7527d4 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -8,6 +8,7 @@ use Email::Filter;
 use File::Temp qw/tempdir/;
 use Cwd;
 use IPC::Run qw(run);
+use PublicInbox::MID qw(mid2path);
 
 my $mda = "blib/script/public-inbox-mda";
 my $learn = "blib/script/public-inbox-learn";
@@ -54,7 +55,13 @@ local $ENV{GIT_COMMITTER_NAME} = eval {
         close $fh;
         my $msg = Email::Filter->new(data => $str);
         $msg = Email::MIME->new($msg->simple->as_string);
-        my ($author, $email, $date) = PublicInbox::MDA->author_info($msg);
+
+        my $from = $msg->header('From');
+        my @from = Email::Address->parse($from);
+        my $author = $from[0]->name;
+        my $email = $from[0]->address;
+        my $date = $msg ->header('Date');
+
         is('El&#233;anor',
                 encode('us-ascii', my $tmp = $author, Encode::HTMLCREF),
                 'HTML conversion is correct');
@@ -174,7 +181,8 @@ EOF
         {
                 # deliver the spam message, first
                 run([$mda], \$in);
-                my $msg = `ssoma cat $mid $maindir`;
+                my $path = mid2path($mid);
+                my $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`;
                 like($msg, qr/\Q$mid\E/, "message delivered");
 
                 # now train it
@@ -212,7 +220,8 @@ EOF
 
         run([$learn, "ham"], \$in);
         is($?, 0, "learned ham without failure");
-        my $msg = `ssoma cat $mid $maindir`;
+        my $path = mid2path($mid);
+        my $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`;
         like($msg, qr/\Q$mid\E/, "ham message delivered");
         run([$learn, "ham"], \$in);
         is($?, 0, "learned ham idempotently ");
@@ -251,7 +260,8 @@ EOF
                 $in = $mime->as_string;
                 run([$learn, "ham"], \$in);
                 is($?, 0, "learned ham without failure");
-                $msg = `ssoma cat $mid $maindir`;
+                my $path = mid2path($mid);
+                $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`;
                 like($msg, qr/<\Q$mid\E>/, "ham message delivered");
                 unlike($msg, qr/<html>/i, '<html> filtered');
         }