From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 368BC1FAF6 for ; Mon, 19 Mar 2018 08:15:03 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 15/27] import: switch to URL-safe Base64 for Message-IDs Date: Mon, 19 Mar 2018 08:14:47 +0000 Message-Id: <20180319081459.10645-16-e@80x24.org> In-Reply-To: <20180319081459.10645-1-e@80x24.org> References: <20180319081459.10645-1-e@80x24.org> List-Id: Hexdigests are too long and shorter Message-IDs are easier to deal with. --- lib/PublicInbox/Import.pm | 11 ++++++++++- t/v2writable.t | 10 ++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 4c007b6..77e74c1 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -401,7 +401,16 @@ sub atfork_child { sub digest2mid ($) { my ($dig) = @_; - $dig->clone->hexdigest . '@localhost'; + my $b64 = $dig->clone->b64digest; + # Make our own URLs nicer: + # See "Base 64 Encoding with URL and Filename Safe Alphabet" in RFC4648 + $b64 =~ tr!+/=!-_!d; + + # We can make this more meaningful with a date prefix or other things, + # but this is only needed for crap that fails to generate a Message-ID + # or reuses one. In other words, it's usually spammers who hit this + # so they don't deserve nice Message-IDs :P + $b64 . '@localhost'; } 1; diff --git a/t/v2writable.t b/t/v2writable.t index c6bcefd..bbe6d14 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -68,6 +68,7 @@ if ('ensure git configs are correct') { [ $mime->header_obj->header_raw('Message-Id') ], 'no new Message-Id added'); + my $sane_mid = qr/\A<[\w\-]+\@localhost>\z/; @warn = (); $mime->header_set('Message-Id', ''); $mime->body_set('different'); @@ -75,13 +76,14 @@ if ('ensure git configs are correct') { like(join(' ', @warn), qr/reused/, 'warned about reused MID'); my @mids = $mime->header_obj->header_raw('Message-Id'); is($mids[1], '', 'original mid not changed'); - like($mids[0], qr/\A<\w+\@localhost>\z/, 'new MID added'); + like($mids[0], $sane_mid, 'new MID added'); is(scalar(@mids), 2, 'only one new MID added'); @warn = (); $mime->header_set('Message-Id', ''); $mime->body_set('this one needs a random mid'); - my $gen = content_digest($mime)->hexdigest . '@localhost'; + my $gen = PublicInbox::Import::digest2mid(content_digest($mime)); + unlike($gen, qr![\+/=]!, 'no URL-unfriendly chars in Message-Id'); my $fake = PublicInbox::MIME->new($mime->as_string); $fake->header_set('Message-Id', $gen); ok($im->add($fake), 'fake added easily'); @@ -90,14 +92,14 @@ if ('ensure git configs are correct') { like(join(' ', @warn), qr/using random/, 'warned about using random'); @mids = $mime->header_obj->header_raw('Message-Id'); is($mids[1], '', 'original mid not changed'); - like($mids[0], qr/\A<\w+\@localhost>\z/, 'new MID added'); + like($mids[0], $sane_mid, 'new MID added'); is(scalar(@mids), 2, 'only one new MID added'); @warn = (); $mime->header_set('Message-Id'); ok($im->add($mime), 'random MID made for MID free message'); @mids = $mime->header_obj->header_raw('Message-Id'); - like($mids[0], qr/\A<\w+\@localhost>\z/, 'mid was generated'); + like($mids[0], $sane_mid, 'mid was generated'); is(scalar(@mids), 1, 'new generated'); } -- EW