From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6F68B1FA04 for ; Sun, 10 May 2020 22:37:15 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/5] xt/eml_check_limits: check limits against an inbox Date: Sun, 10 May 2020 22:37:11 +0000 Message-Id: <20200510223715.19254-2-e@yhbt.net> In-Reply-To: <20200510223715.19254-1-e@yhbt.net> References: <20200510223715.19254-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This allows maintainers to easily check limits against the contents of existing inboxes. This script covers most of the new limits enforced by PublicInbox::Eml. Usage is similar to most xt/*.t scripts: GIANT_INBOX_DIR=/path/to/inbox prove -bvw xt/eml_check_limits.t Setting `TEST_CLASS=PublicInbox::MIME' allows us to check performance and memory use against the old subclass of Email::MIME. --- MANIFEST | 1 + xt/eml_check_limits.t | 76 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 xt/eml_check_limits.t diff --git a/MANIFEST b/MANIFEST index 9c804a0780e..b1512c7a919 100644 --- a/MANIFEST +++ b/MANIFEST @@ -333,6 +333,7 @@ t/x-unknown-alpine.eml t/xcpdb-reshard.t xt/cmp-msgstr.t xt/cmp-msgview.t +xt/eml_check_limits.t xt/git-http-backend.t xt/git_async_cmp.t xt/mem-msgview.t diff --git a/xt/eml_check_limits.t b/xt/eml_check_limits.t new file mode 100644 index 00000000000..39de047645b --- /dev/null +++ b/xt/eml_check_limits.t @@ -0,0 +1,76 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use Test::More; +use PublicInbox::TestCommon; +use PublicInbox::Eml; +use PublicInbox::Inbox; +use List::Util qw(max); +use Benchmark qw(:all :hireswallclock); +use PublicInbox::Spawn qw(popen_rd); +use Carp (); +require_git(2.19); # for --unordered +require_mods(qw(BSD::Resource)); +BSD::Resource->import(qw(getrusage)); +my $cls = $ENV{TEST_CLASS}; +require_mods($cls) if $cls; +$cls //= 'PublicInbox::Eml'; +my $inboxdir = $ENV{GIANT_INBOX_DIR}; +plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; +local $PublicInbox::Eml::mime_nesting_limit = 0x7fffffff; +local $PublicInbox::Eml::mime_parts_limit = 0x7fffffff; +local $PublicInbox::Eml::header_size_limit = 0x7fffffff; +my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'x' }); +my $git = $ibx->git; +my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); +my $fh = $git->popen(@cat); +my ($m, $n); +my $max_nest = [ 0, '' ]; # [ bytes, blob oid ] +my $max_idx = [ 0, '' ]; +my $max_parts = [ 0, '' ]; +my $max_size = [ 0, '' ]; +my $max_hdr = [ 0, '' ]; +my $info = [ 0, '' ]; +my $each_part_cb = sub { + my ($p) = @_; + my ($part, $depth, $idx) = @$p; + $max_nest = [ $depth, $info->[1] ] if $depth > $max_nest->[0]; + my $max = max(split(/\./, $idx)); + $max_idx = [ $max, $info->[1] ] if $max > $max_idx->[0]; + ++$info->[0]; +}; + +my ($bref, $oid, $size); +local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; +my $cat_cb = sub { + ($bref, $oid, undef, $size) = @_; + ++$m; + $info = [ 0, $oid ]; + my $eml = $cls->new($bref); + my $hdr_len = length($eml->header_obj->as_string); + $max_hdr = [ $hdr_len, $oid ] if $hdr_len > $max_hdr->[0]; + $eml->each_part($each_part_cb, $info, 1); + $max_parts = $info if $info->[0] > $max_parts->[0]; + $max_size = [ $size, $oid ] if $size > $max_size->[0]; +}; + +my $t = timeit(1, sub { + $git->cat_async_begin; + my ($blob, $type); + while (<$fh>) { + ($blob, $type) = split / /; + next if $type ne 'blob'; + ++$n; + $git->cat_async($blob, $cat_cb); + } + $git->cat_async_wait; +}); +is($m, $n, 'scanned all messages'); +diag "$$ $inboxdir took ".timestr($t)." for $n <=> $m messages"; +diag "$$ max_nest $max_nest->[0] @ $max_nest->[1]"; +diag "$$ max_idx $max_idx->[0] @ $max_idx->[1]"; +diag "$$ max_parts $max_parts->[0] @ $max_parts->[1]"; +diag "$$ max_size $max_size->[0] @ $max_size->[1]"; +diag "$$ max_hdr $max_hdr->[0] @ $max_hdr->[1]"; +diag "$$ RSS ".getrusage()->maxrss. ' k'; +done_testing;