From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.6 required=3.0 tests=AWL,BAYES_00, RCVD_IN_DNSWL_LOW,RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE, SPF_PASS shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from out02.mta.xmission.com (out02.mta.xmission.com [166.70.13.232]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dcvr.yhbt.net (Postfix) with ESMTPS id 87A041F55B; Fri, 15 May 2020 21:05:40 +0000 (UTC) Received: from in01.mta.xmission.com ([166.70.13.51]) by out02.mta.xmission.com with esmtps (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128) (Exim 4.90_1) (envelope-from ) id 1jZhWV-0001QP-Pm; Fri, 15 May 2020 15:05:39 -0600 Received: from ip68-227-160-95.om.om.cox.net ([68.227.160.95] helo=x220.xmission.com) by in01.mta.xmission.com with esmtpsa (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.87) (envelope-from ) id 1jZhWQ-0002MC-S4; Fri, 15 May 2020 15:05:39 -0600 From: ebiederm@xmission.com (Eric W. Biederman) To: Eric Wong Cc: meta@public-inbox.org References: <87eeyvmx74.fsf@x220.int.ebiederm.org> <20200513193144.GA9299@dcvr> <87ftc3mrq6.fsf@x220.int.ebiederm.org> <20200513221715.GA11718@dcvr> <877dxelmsr.fsf@x220.int.ebiederm.org> <87ftc0c3r4.fsf_-_@x220.int.ebiederm.org> Date: Fri, 15 May 2020 16:02:00 -0500 In-Reply-To: <87ftc0c3r4.fsf_-_@x220.int.ebiederm.org> (Eric W. Biederman's message of "Fri, 15 May 2020 16:00:47 -0500") Message-ID: <87a728c3p3.fsf_-_@x220.int.ebiederm.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-XM-SPF: eid=1jZhWQ-0002MC-S4;;;mid=<87a728c3p3.fsf_-_@x220.int.ebiederm.org>;;;hst=in01.mta.xmission.com;;;ip=68.227.160.95;;;frm=ebiederm@xmission.com;;;spf=neutral X-XM-AID: U2FsdGVkX18fnAHJNEGorOC71WzvAXy4hkW7WUPCuUQ= X-SA-Exim-Connect-IP: 68.227.160.95 X-SA-Exim-Mail-From: ebiederm@xmission.com Subject: [PATCH 2/2] imap_fetch: Add a command to continuously fetch from an imap mailbox X-SA-Exim-Version: 4.2.1 (built Thu, 05 May 2016 13:38:54 -0600) X-SA-Exim-Scanned: Yes (on in01.mta.xmission.com) List-Id: The command imap_fetch connects to the specified imap mailbox and fetches any unfetch messages than waits with imap idle until there are more messages to fetch. By default messages are placed in the specified public inbox mailbox. The value of List-ID is consulted and if it is present used to select an alternate public-inbox mailbox to place the messages in. The email messages are placed without modification into the public inbox repository so minimize changes of corruption or of loosing valuable information. I use the command imap_fetch for all of my email and not just a mailling list mirror so I don't want automation to accidentally cause something important to be lost. No email messages are deleted from the server instead IMAPTracker is used to remember which messages were downloaded. Signed-off-by: "Eric W. Biederman" --- scripts/imap_fetch | 336 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100755 scripts/imap_fetch diff --git a/scripts/imap_fetch b/scripts/imap_fetch new file mode 100755 index 000000000000..007f78a71b52 --- /dev/null +++ b/scripts/imap_fetch @@ -0,0 +1,336 @@ +#!/usr/bin/perl -w +# Script to fetch IMAP messages and put then into a public-inbox +=begin usage + ./imap_fetch imap://username@hostname/mailbox inbox +=cut +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use Mail::IMAPClient; +use IO::Socket; +use IO::Socket::SSL; +use File::Sync qw(sync); +use Term::ReadKey; +use PublicInbox::IMAPTracker; +use PublicInbox::InboxWritable; +use POSIX qw(strftime); +sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) } +my $verify_ssl = 1; +my %opts = ( + '--verify-ssl!' => \$verify_ssl, +); +GetOptions(%opts) or die usage(); + +my $mail_url = shift @ARGV or die usage(); +my $inbox_name = shift @ARGV or die usage(); +my $mail_hostname; +my $mail_username; +my $mail_password; +my $mailbox; +if ($mail_url =~ m$\Aimap://([^@]+)[@]([^@]+)/(.+)\z$) { + $mail_username = $1; + $mail_hostname = $2; + $mailbox = $3; +} else { + die usage(); +} + +my $url = 'imap://' . $mail_username . '@' . $mail_hostname . '/' . $mailbox ; + +sub list_hdr_ibx($$) +{ + my ($config, $list_hdr) = @_; + my $list_id; + if ($list_hdr =~ m/\0/) { + warn("Bad List-ID: $list_hdr contains a null\n"); + return undef; + } elsif ($list_hdr =~ m/\A[^<>]*<(\S*)>\s*\z/) { + $list_id = $1; + } else { + warn("Bad List-ID: $list_hdr\n"); + return undef; + } + my $ibx = $config->lookup_list_id($list_id); + if (!defined($ibx)) { + warn("Cound not find inbox for List-ID: $list_id\n"); + } + + print(" List-ID: $list_id\n"); + $ibx; +} + +sub email_dest($$) +{ + my ($config, $mime) = @_; + my %ibxs; + my $hdr = $mime->header_obj; + my @list_hdrs = $hdr->header_raw('List-ID'); + for my $list_hdr (@list_hdrs) { + my $ibx = list_hdr_ibx($config, $list_hdr); + if (defined($ibx)) { + $ibxs{$ibx->{name}} = $ibx; + } + } + my @ibxs = values %ibxs; + return @ibxs; +} + +if (-t STDIN) { + print("Enter your imap password: "); + ReadMode('noecho'); + $mail_password = ReadLine(0); + ReadMode('normal'); + print("\n"); +} else { + print("Not a tty\n"); + $mail_password = readline(); +} +die("No password") unless defined($mail_password); +chomp($mail_password); + +sub imap_ssl_client() +{ + my %ca = eval { IO::Socket::SSL::default_ca(); }; + my $socket = IO::Socket::SSL->new( + PeerAddr => $mail_hostname, + PeerPort => 993, + Timeout => 5, + SSL_verify_mode => $verify_ssl ? SSL_VERIFY_PEER : SSL_VERIFY_NONE, + %ca, + ); + if (!defined($socket)) { + warn("Could not open socket to mailserver: $@\n"); + return undef; + } + my $client = Mail::IMAPClient->new( + Socket => $socket, + User => $mail_username, + Password => $mail_password, + Timeout => 5, + ); + if (!defined($client)) { + warn("Could not initialize imap client $@\n"); + $socket->close(); + return undef; + } + if (!$client->IsConnected()) { + warn("LastIMAPCommand: " . $client->LastIMAPCommand . "\n"); + warn("LastError: " . $client->LastError . "\n"); + warn("Could not connect to the IMAP server: $@\n"); + $client = undef; + $socket->close(); + return undef; + } + if (!$client->IsAuthenticated()) { + warn("LastIMAPCommand: " . $client->LastIMAPCommand . "\n"); + warn("LastError: " . $client->LastError . "\n"); + warn("Could not authenticate against IMAP: $@\n"); + $client->logout(); + $client = undef; + $socket->close(); + return undef; + } + + return $client; +} + +sub setup_mailbox($$) +{ + my ($client, $mailbox) = @_; + + $client->Peek(1); + $client->Uid(1); + + $client->select($mailbox); + my @results = $client->Results(); + my $validity = undef; + foreach (@results) { + if ($_ =~ /^\* OK \[UIDVALIDITY ([0-9]+)\].*$/) { + $validity = $1; + last; + } + } + if (!defined($validity) && $client->IsConnected()) { + $validity = $client->uidvalidity($mailbox); + } + die("No uid validity for $mailbox") unless $validity; + + return ($validity); +} + +sub fetch_mailbox ($$$$$$) +{ + my ($config, $tracker, $client, $mailbox, $validity, $default_ibx) = @_; + my $now = time(); + print("mailbox: $mailbox @ " . + strftime("%Y-%m-%d %H:%M:%S %z", localtime(time())) + . "\n"); + + my %importers; + my ($last_validity, $last_uid) = $tracker->get_last(); + + if (defined($last_validity) and ($validity ne $last_validity)) { + die ("Unexpected uid validity $validity expected $last_validity"); + } + + my $search_str="ALL"; + if (defined($last_uid)) { + # Find the last seen and all higher articles + $search_str = "UID $last_uid:*"; + } + my $uids = $client->search($search_str); + if (!defined($uids) || (scalar(@$uids) == 0)) { + print("$mailbox: No uids found for '$search_str'! $@\n"); + return 0; + } + + my $last = undef; + my @sorted_uids = sort { $a <=> $b } @$uids; + # Cap the number of uids to process at once + my $more = 0; + my $uid_count = scalar(@sorted_uids); + if ($uid_count > 100) { + @sorted_uids = @sorted_uids[0..99]; + $more = $uid_count - 100; + } + for my $uid (@sorted_uids) { + last unless $client->IsConnected(); + + print("$mailbox UID: $validity $uid\n"); + if (defined($last_uid)) { + if ($uid == $last_uid) { + next; + } + if ($uid < $last_uid) { + print("$mailbox: uid $uid not below last $last_uid, skipping.\n"); + next; + } + } + my $email_str = $client->message_string($uid) or die "Could not message_string $@\n"; + my $email_len = length($email_str); + my $mime = Email::MIME->new($email_str); + $mime->{-public_inbox_raw} = $email_str; + + my @dests = email_dest($config, $mime); + if (scalar(@dests) == 0) { + push(@dests, $default_ibx); + } + die ("no destination for the email") unless scalar(@dests) > 0; + #printf("$mailbox dests: %d\n", scalar(@dests)); + for my $ibx (@dests) { + my $name = $ibx->{name}; + my $im; + if (exists($importers{$name})) { + $im = $importers{$name}->[0]; + } else { + my $wibx = PublicInbox::InboxWritable->new($ibx); + die "no wibx" unless defined($wibx); + $im = $wibx->importer(1); + die "no im" unless defined($im); + my @arr = ( $im, $ibx ); + $importers{$name} = \@arr; + } + $im->add($mime); + } + $last = $uid; + } + + if ($last) { + die ("no ibx's for $tracker->{url}") unless scalar(keys %importers) > 0; + for my $name (keys %importers) { + my $ref = delete $importers{$name}; + my ($im, $ibx) = @$ref; + $im->done(); + } + print("updating tracker for $tracker->{url}...\n"); + $tracker->update_last($validity, $last); + } + + return $more; +} + + +sub fetch_mailbox_loop($) +{ + my ($mailbox) = @_; + my $config = eval { PublicInbox::Config->new }; + die("No public inbox config found!") unless $config; + + my $ibx = $config->lookup_name($inbox_name); + die("Public inbox $inbox_name not found!") unless defined($ibx); + + my $tracker = PublicInbox::IMAPTracker->new($url); + my $client = imap_ssl_client() || die("No imap connection"); + my $validity = setup_mailbox($client, $mailbox); + + for (;;) { + return unless $client->IsConnected(); + my $more; + do { + $more = fetch_mailbox($config, $tracker, $client, $mailbox, $validity, $ibx); + return unless $client->IsConnected(); + } while ($more > 0); + + my @untagged; + do { + my $max_idle = 15; + $client->idle() or die("idle failed!\n"); + my @results = $client->idle_data($max_idle*60); + return unless $client->IsConnected(); + my @ret = $client->done(); + push(@results, @ret); + for my $line (@results) { + next if (!defined($line)); + if ($line =~ m/^[*].*$/) { + push(@untagged, $line); + } + } + } while (scalar(@untagged) == 0); + print("$mailbox: untagged: '@untagged'\n"); + } + + $client->close($mailbox); + $client->logout(); + +} + +sub handle_mailbox($) +{ + # Run fetch_mailbox_loop in it's own separate process so + # that if something goes wrong the process exits + # and everything cleans up properly. + # + # Running fetch_mailbox_loop in an eval block is not enough + # to prevent leaks of locks and other resources. + my ($mailbox) = @_; + + for (;;) { + my $child = fork(); + if (!defined($child)) { + warn("fork failed: for $mailbox\n"); + continue; + } + elsif ($child == 0) { + # in the child + fetch_mailbox_loop($mailbox); + exit(0); + } + else { + my $sleep = 5; + warn("------------------------- CHILD: $child $mailbox -------------------------\n"); + my $pid = waitpid($child, 0); + if ($pid != $child) { + exit(1); + } + print("$mailbox done\n"); + sync(); + print("\n$mailbox: Sleeping for $sleep minutes\n\n"); + sleep($sleep*60); + } + } + exit(2); +} + +handle_mailbox($mailbox); + +1; -- 2.20.1