user/dev discussion of public-inbox itself
 help / color / mirror / Atom feed
cf780c77babaac401b72b6a38eb4925a1bc660c2 blob 2555 bytes (raw)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
 
#!perl -w
# Copyright (C) 2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Eml;
use PublicInbox::Inbox;
use List::Util qw(max);
use Benchmark qw(:all :hireswallclock);
use PublicInbox::Spawn qw(popen_rd);
use Carp ();
require_git(2.19); # for --unordered
require_mods(qw(BSD::Resource));
BSD::Resource->import(qw(getrusage));
my $cls = $ENV{TEST_CLASS};
if ($cls) {
	diag "TEST_CLASS=$cls";
	require_mods($cls);
}
$cls //= 'PublicInbox::Eml';
my $inboxdir = $ENV{GIANT_INBOX_DIR};
plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
local $PublicInbox::Eml::mime_nesting_limit = 0x7fffffff;
local $PublicInbox::Eml::mime_parts_limit = 0x7fffffff;
local $PublicInbox::Eml::header_size_limit = 0x7fffffff;
my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'x' });
my $git = $ibx->git;
my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered);
my $fh = $git->popen(@cat);
my ($m, $n);
my $max_nest = [ 0, '' ]; # [ bytes, blob oid ]
my $max_idx = [ 0, '' ];
my $max_parts = [ 0, '' ];
my $max_size = [ 0, '' ];
my $max_hdr = [ 0, '' ];
my $info = [ 0, '' ];
my $each_part_cb = sub {
	my ($p) = @_;
	my ($part, $depth, $idx) = @$p;
	$max_nest = [ $depth, $info->[1] ] if $depth > $max_nest->[0];
	my $max = max(split(/\./, $idx));
	$max_idx = [ $max, $info->[1] ] if $max > $max_idx->[0];
	++$info->[0];
};

my ($bref, $oid, $size);
local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ };
my $cat_cb = sub {
	($bref, $oid, undef, $size) = @_;
	++$m;
	$info = [ 0, $oid ];
	my $eml = $cls->new($bref);
	my $hdr_len = length($eml->header_obj->as_string);
	$max_hdr = [ $hdr_len, $oid ] if $hdr_len > $max_hdr->[0];
	$eml->each_part($each_part_cb, $info, 1);
	$max_parts = $info if $info->[0] > $max_parts->[0];
	$max_size = [ $size, $oid ] if $size > $max_size->[0];
};

my $t = timeit(1, sub {
	my ($blob, $type);
	while (<$fh>) {
		($blob, $type) = split / /;
		next if $type ne 'blob';
		++$n;
		$git->cat_async($blob, $cat_cb);
	}
	$git->cat_async_wait;
});
is($m, $n, 'scanned all messages');
diag "$$ $inboxdir took ".timestr($t)." for $n <=> $m messages";
diag "$$ max_nest $max_nest->[0] @ $max_nest->[1]";
diag "$$ max_idx $max_idx->[0] @ $max_idx->[1]";
diag "$$ max_parts $max_parts->[0] @ $max_parts->[1]";
diag "$$ max_size $max_size->[0] @ $max_size->[1]";
diag "$$ max_hdr $max_hdr->[0] @ $max_hdr->[1]";
diag "$$ RSS ".getrusage()->maxrss. ' k';
done_testing;
debug log:

solving cf780c77 ...
found cf780c77 in https://80x24.org/public-inbox.git

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for the project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git