From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6E3031F858 for ; Thu, 28 Jul 2016 21:03:02 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] add script used for importing git from download.gmane.org Date: Thu, 28 Jul 2016 21:03:02 +0000 Message-Id: <20160728210302.25130-1-e@80x24.org> List-Id: In case others want to use it... --- scripts/import_vger_from_mbox | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 scripts/import_vger_from_mbox diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox new file mode 100644 index 0000000..4976e05 --- /dev/null +++ b/scripts/import_vger_from_mbox @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Email::MIME; +$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect +use PublicInbox::Git; +use PublicInbox::Import; +my $usage = "usage: $0 NAME EMAIL new($git_dir); +my $name = shift or die $usage; # git +my $email = shift or die $usage; # git@vger.kernel.org +my $im = PublicInbox::Import->new($git, $name, $email); +binmode STDIN; +my $msg = ''; +use PublicInbox::Filter::Vger; +my $vger = PublicInbox::Filter::Vger->new; +sub do_add ($$) { + my ($im, $msg) = @_; + $$msg =~ s/(\r?\n)+\z/$1/s; + $msg = Email::MIME->new($$msg); + $msg = $vger->scrub($msg); + $im->add($msg) or + warn "duplicate: ", + $msg->header_obj->header_raw('Message-ID'), "\n"; +} + +# asctime: From example@example.com Fri Jun 23 02:56:55 2000 +my $from_strict = qr/^From \S+ \S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; +my $prev = undef; +while (defined(my $l = )) { + if ($l =~ /$from_strict/o) { + if (!defined($prev) || $prev =~ /^\r?$/) { + do_add($im, \$msg) if $msg; + $msg = ''; + $prev = $l; + next; + } + warn "W[$.] $l\n"; + } + $prev = $l; + $msg .= $l; +} +do_add($im, \$msg) if $msg; +$im->done; -- EW