From f5ca5437df27c558b1f2672014ecf43adf3ca2f0 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Tue, 13 Feb 2018 18:39:40 +0000 Subject: scripts/import_vger_from_mbox: support --dry-run option This can be useful for getting baseline of performance of just Email::MIME and Date: header parsing. We'll need to do some Date: header parsing for LKML since there are some wonky date formats which causes the git RFC822 parser to choke. --- scripts/import_vger_from_mbox | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox index 9b3afc88..3fa5c778 100644 --- a/scripts/import_vger_from_mbox +++ b/scripts/import_vger_from_mbox @@ -3,16 +3,21 @@ # License: AGPL-3.0+ use strict; use warnings; +use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; +use Date::Parse qw/str2time/; use Email::MIME; $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect use PublicInbox::Git; use PublicInbox::Import; my $usage = "usage: $0 NAME EMAIL \$dry_run ); +GetOptions(%opts) or die $usage; chomp(my $git_dir = `git rev-parse --git-dir`); my $git = PublicInbox::Git->new($git_dir); my $name = shift or die $usage; # git my $email = shift or die $usage; # git@vger.kernel.org -my $im = PublicInbox::Import->new($git, $name, $email); +my $im = $dry_run ? undef : PublicInbox::Import->new($git, $name, $email); binmode STDIN; my $msg = ''; use PublicInbox::Filter::Vger; @@ -22,9 +27,27 @@ sub do_add ($$) { $$msg =~ s/(\r?\n)+\z/$1/s; $msg = Email::MIME->new($$msg); $msg = $vger->scrub($msg); + my $hdr = $msg->header_obj; + my $date = $hdr->header_raw('Date'); + if ($date) { + eval { str2time($date) }; + if ($@) { + warn "bad Date: $date in ", + $hdr->header_raw('Message-ID'), ": $@\n"; + } + } else { + warn "missing Date: $date in ", + $hdr->header_raw('Message-ID'), ": $@\n"; + my $n = 0; + foreach my $r ($hdr->header_raw('Received')) { + warn "$n Received: $r\n"; + } + warn(('-' x 72), "\n"); + } + return unless $im; $im->add($msg) or warn "duplicate: ", - $msg->header_obj->header_raw('Message-ID'), "\n"; + $hdr->header_raw('Message-ID'), "\n"; } # asctime: From example@example.com Fri Jun 23 02:56:55 2000 @@ -44,4 +67,4 @@ while (defined(my $l = )) { $msg .= $l; } do_add($im, \$msg) if $msg; -$im->done; +$im->done if $im; -- cgit v1.2.3-24-ge0c7