From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C703E1FC97 for ; Sun, 19 Sep 2021 12:50:36 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 11/16] xt: add fsck script over over.sqlite3 Date: Sun, 19 Sep 2021 12:50:30 +0000 Message-Id: <20210919125035.6331-12-e@80x24.org> In-Reply-To: <20210919125035.6331-1-e@80x24.org> References: <20210919125035.6331-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: I'm not sure what caused it, but I've noticed two missing messages that failed from "lei up" on an https:// external; and I've also seen some duplicates in the past (which I think I fixed...). --- MANIFEST | 1 + xt/over-fsck.perl | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 xt/over-fsck.perl diff --git a/MANIFEST b/MANIFEST index 218e20e9..2df743f8 100644 --- a/MANIFEST +++ b/MANIFEST @@ -568,6 +568,7 @@ xt/msgtime_cmp.t xt/net_nntp_socks.t xt/net_writer-imap.t xt/nntpd-validate.t +xt/over-fsck.perl xt/perf-msgview.t xt/perf-nntpd.t xt/perf-obfuscate.t diff --git a/xt/over-fsck.perl b/xt/over-fsck.perl new file mode 100644 index 00000000..053204fe --- /dev/null +++ b/xt/over-fsck.perl @@ -0,0 +1,44 @@ +#!perl -w +# Copyright (C) all contributors +# License: AGPL-3.0+ +# unstable dev script, chasing a bug which may be in LeiSavedSearch->is_dup +use v5.12; +use Data::Dumper; +use PublicInbox::OverIdx; +@ARGV == 1 or die "Usage: $0 /path/to/over.sqlite3\n"; +my $over = PublicInbox::OverIdx->new($ARGV[0]); +my $dbh = $over->dbh; +$dbh->do('PRAGMA mmap_size = '.(2 ** 48)); +my $num = 0; +my ($err, $none, $nr, $ids); +$Data::Dumper::Useqq = $Data::Dumper::Sortkeys = 1; +do { + $ids = $over->ids_after(\$num); + $nr += @$ids; + for my $n (@$ids) { + my $smsg = $over->get_art($n); + if (!$smsg) { + warn "#$n article missing\n"; + ++$err; + next; + } + my $exp = $smsg->{blob}; + if ($exp eq '') { + ++$none if $smsg->{bytes}; + next; + } + my $xr3 = $over->get_xref3($n, 1); + my $found; + for my $r (@$xr3) { + $r->[2] = unpack('H*', $r->[2]); + $found = 1 if $r->[2] eq $exp; + } + if (!$found) { + warn Dumper([$smsg, $xr3 ]); + ++$err; + } + } +} while (@$ids); +warn "$none/$nr had no blob (external?)\n" if $none; +warn "$err errors\n" if $err; +exit($err ? 1 : 0);