about summary refs log tree commit homepage
path: root/xt
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2022-09-04 04:27:49 +0000
committerEric Wong <e@80x24.org>2022-09-04 14:41:41 +0000
commitd20753c2a9d63e60b90942176684370c9dce97fb (patch)
treef229502e0b44bf4e2e0a181b424b30b3cfc6c32c /xt
parentf521bf1ca5f779c3a18ff44f308ba318366ec7f5 (diff)
downloadpublic-inbox-d20753c2a9d63e60b90942176684370c9dce97fb.tar.gz
There'll be a number of upcoming changes to HTML rendering
of messages to hopefully reduce memory usage and speedups
by writing out to the gzip buffer earlier.

Update the tests now so it'll be easier to test before
and after results.
Diffstat (limited to 'xt')
-rw-r--r--xt/cmp-msgview.t94
-rw-r--r--xt/perf-msgview.t24
-rw-r--r--xt/perf-obfuscate.t26
3 files changed, 27 insertions, 117 deletions
diff --git a/xt/cmp-msgview.t b/xt/cmp-msgview.t
deleted file mode 100644
index 9b06f88d..00000000
--- a/xt/cmp-msgview.t
+++ /dev/null
@@ -1,94 +0,0 @@
-#!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use Test::More;
-use Benchmark qw(:all);
-use PublicInbox::Inbox;
-use PublicInbox::View;
-use PublicInbox::TestCommon;
-use PublicInbox::Eml;
-use Digest::MD5;
-require_git(2.19);
-require_mods qw(Data::Dumper Email::MIME Plack::Util);
-Data::Dumper->import('Dumper');
-require PublicInbox::MIME;
-my ($tmpdir, $for_destroy) = tmpdir();
-my $inboxdir = $ENV{GIANT_INBOX_DIR};
-plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
-my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered);
-my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' });
-my $git = $ibx->git;
-my $fh = $git->popen(@cat);
-vec(my $vec = '', fileno($fh), 1) = 1;
-select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
-my $mime_ctx = {
-        env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
-        ibx => $ibx,
-        www => Plack::Util::inline_object(style => sub {''}),
-        obuf => \(my $mime_buf = ''),
-        mhref => '../',
-};
-my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') };
-my $n = 0;
-my $m = 0;
-my $ndiff_html = 0;
-my $dig_cls = 'Digest::MD5';
-my $digest_attach = sub { # ensure ->body (not ->body_raw) matches
-        my ($p, $cmp_arg) = @_;
-        my $part = shift @$p;
-        my $dig = $cmp_arg->[0] //= $dig_cls->new;
-        $dig->add($part->body_raw);
-        push @$cmp_arg, join(', ', @$p);
-};
-
-my $git_cb = sub {
-        my ($bref, $oid) = @_;
-        local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ };
-        ++$m;
-        my $mime = PublicInbox::MIME->new($$bref);
-        PublicInbox::View::multipart_text_as_html($mime, $mime_ctx);
-        my $eml = PublicInbox::Eml->new($$bref);
-        PublicInbox::View::multipart_text_as_html($eml, $eml_ctx);
-        if ($eml_buf ne $mime_buf) {
-                ++$ndiff_html;
-                open my $fh, '>', "$tmpdir/mime" or die $!;
-                print $fh $mime_buf or die $!;
-                close $fh or die $!;
-                open $fh, '>', "$tmpdir/eml" or die $!;
-                print $fh $eml_buf or die $!;
-                close $fh or die $!;
-                # using `git diff', diff(1) may not be installed
-                diag "$inboxdir $oid differs";
-                diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]);
-        }
-        $eml_buf = $mime_buf = '';
-
-        # don't tolerate differences in attachment downloads
-        $mime = PublicInbox::MIME->new($$bref);
-        $mime->each_part($digest_attach, my $mime_cmp = [], 1);
-        $eml = PublicInbox::Eml->new($$bref);
-        $eml->each_part($digest_attach, my $eml_cmp = [], 1);
-        $mime_cmp->[0] = $mime_cmp->[0]->hexdigest;
-        $eml_cmp->[0] = $eml_cmp->[0]->hexdigest;
-        # don't have millions of "ok" lines
-        if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) {
-                diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]);
-                is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match");
-        }
-};
-my $t = timeit(1, sub {
-        while (<$fh>) {
-                my ($oid, $type) = split / /;
-                next if $type ne 'blob';
-                ++$n;
-                $git->cat_async($oid, $git_cb);
-        }
-        $git->async_wait_all;
-});
-is($m, $n, 'rendered all messages');
-
-# we'll tolerate minor differences in HTML rendering
-diag "$ndiff_html HTML differences";
-
-done_testing();
diff --git a/xt/perf-msgview.t b/xt/perf-msgview.t
index cf550c1a..7f92ce85 100644
--- a/xt/perf-msgview.t
+++ b/xt/perf-msgview.t
@@ -7,7 +7,7 @@ use PublicInbox::TestCommon;
 use Benchmark qw(:all);
 use PublicInbox::Inbox;
 use PublicInbox::View;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::WwwStream;
 
 my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR};
 my $blob = $ENV{TEST_BLOB};
@@ -31,26 +31,28 @@ if ($fh) {
                 die "timed out waiting for --batch-check";
 }
 
-my $ctx = {
+my $ctx = bless {
         env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
         ibx => $ibx,
         www => Plack::Util::inline_object(style => sub {''}),
-};
-my ($mime, $res, $oid, $type);
+        gz => PublicInbox::GzipFilter::gzip_or_die(),
+}, 'PublicInbox::WwwStream';
+my ($eml, $res, $oid, $type);
 my $n = 0;
-my $obuf = '';
 my $m = 0;
+${$ctx->{obuf}} = '';
+$ctx->{mhref} = '../';
 
 my $cb = sub {
-        $mime = PublicInbox::Eml->new(shift);
-        PublicInbox::View::multipart_text_as_html($mime, $ctx);
+        $eml = PublicInbox::Eml->new(shift);
+        $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
+        $ctx->zflush;
         ++$m;
-        $obuf = '';
+        delete $ctx->{zbuf};
+        ${$ctx->{obuf}} = '';
 };
 
 my $t = timeit(1, sub {
-        $ctx->{obuf} = \$obuf;
-        $ctx->{mhref} = '../';
         if (defined $blob) {
                 my $nr = $ENV{NR} // 10000;
                 for (1..$nr) {
@@ -67,6 +69,6 @@ my $t = timeit(1, sub {
         }
         $git->async_wait_all;
 });
-diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages";
+diag 'add_text_body took '.timestr($t)." for $n <=> $m messages";
 is($m, $n, 'rendered all messages');
 done_testing();
diff --git a/xt/perf-obfuscate.t b/xt/perf-obfuscate.t
index 640309d2..4da36124 100644
--- a/xt/perf-obfuscate.t
+++ b/xt/perf-obfuscate.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -7,6 +7,7 @@ use PublicInbox::TestCommon;
 use Benchmark qw(:all);
 use PublicInbox::Inbox;
 use PublicInbox::View;
+use PublicInbox::WwwStream;
 
 my $inboxdir = $ENV{GIANT_INBOX_DIR};
 plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
@@ -22,7 +23,6 @@ if (require_git(2.19, 1)) {
 "git <2.19, cat-file lacks --unordered, locality suffers\n";
 }
 require_mods qw(Plack::Util);
-use_ok 'Plack::Util';
 my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' ,
                                     obfuscate => $obfuscate});
 my $git = $ibx->git;
@@ -31,26 +31,28 @@ my $vec = '';
 vec($vec, fileno($fh), 1) = 1;
 select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
 
-my $ctx = {
+my $ctx = bless {
         env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
         ibx => $ibx,
         www => Plack::Util::inline_object(style => sub {''}),
-};
-my ($mime, $res, $oid, $type);
+        gz => PublicInbox::GzipFilter::gzip_or_die(),
+}, 'PublicInbox::WwwStream';
+my ($eml, $res, $oid, $type);
 my $n = 0;
-my $obuf = '';
 my $m = 0;
+${$ctx->{obuf}} = '';
+$ctx->{mhref} = '../';
 
 my $cb = sub {
-        $mime = PublicInbox::Eml->new(shift);
-        PublicInbox::View::multipart_text_as_html($mime, $ctx);
+        $eml = PublicInbox::Eml->new(shift);
+        $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
+        $ctx->zflush;
         ++$m;
-        $obuf = '';
+        delete $ctx->{zbuf};
+        ${$ctx->{obuf}} = '';
 };
 
 my $t = timeit(1, sub {
-        $ctx->{obuf} = \$obuf;
-        $ctx->{mhref} = '../';
         while (<$fh>) {
                 ($oid, $type) = split / /;
                 next if $type ne 'blob';
@@ -59,6 +61,6 @@ my $t = timeit(1, sub {
         }
         $git->async_wait_all;
 });
-diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages";
+diag 'add_text_body took '.timestr($t)." for $n <=> $m messages";
 is($m, $n, 'rendered all messages');
 done_testing();