From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] learn: respect indexlevel for v1 inboxes
Date: Sun, 15 Oct 2023 08:16:28 +0000 [thread overview]
Message-ID: <20231015081628.956814-1-e@80x24.org> (raw)
v2 never suffered from this bug, apparently, but -learn didn't
seem able to handle indexlevel=basic (nor respect `medium')
for v1 inboxes. I only noticed this bug because I converted
some ancient v1 inboxes to `basic' to save space.
---
script/public-inbox-learn | 1 +
t/mda.t | 37 ++++++++++++++++++++++++++++++++++++-
t/v2mda.t | 27 ++++++++++++++++++++++++++-
3 files changed, 63 insertions(+), 2 deletions(-)
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 6e1978a7..54d31cb6 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -64,6 +64,7 @@ sub remove_or_add ($$$$) {
$ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name};
$ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr;
$ibx = PublicInbox::InboxWritable->new($ibx);
+ $ibx->{indexlevel} = $ibx->detect_indexlevel;
my $im = $ibx->importer(0);
if ($train eq "rm") {
diff --git a/t/mda.t b/t/mda.t
index e3c5cdff..83b0b33a 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -2,13 +2,13 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use warnings;
-use Test::More;
use Cwd qw(getcwd);
use PublicInbox::MID qw(mid2path);
use PublicInbox::Git;
use PublicInbox::InboxWritable;
use PublicInbox::TestCommon;
use PublicInbox::Import;
+use File::Path qw(remove_tree);
my ($tmpdir, $for_destroy) = tmpdir();
my $home = "$tmpdir/pi-home";
my $pi_home = "$home/.public-inbox";
@@ -312,4 +312,39 @@ EOF
like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git');
}
+SKIP: {
+ require_mods(qw(DBD::SQLite Xapian), 1);
+ local $ENV{PI_EMERGENCY} = $faildir;
+ local $ENV{HOME} = $home;
+ local $ENV{PATH} = $main_path;
+ my $rdr = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+ ok(run_script([qw(-index -L medium), $maindir], undef, $rdr),
+ 'index inbox');
+ my $in = <<'EOM';
+From: a@example.com
+To: updated-address@example.com
+Subject: this is a ham message for learn
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Message-ID: <medium-ham@example>
+
+yum
+EOM
+ $rdr->{0} = \$in;
+ ok(run_script([qw(-learn ham)], undef, $rdr), 'learn medium ham');
+ is($err, '', 'nothing in stderr after medium -learn');
+ my $msg = $git->cat_file('HEAD:'.mid2path('medium-ham@example'));
+ like($$msg, qr/medium-ham/, 'medium ham added via -learn');
+ my @xap = grep(!m!/over\.sqlite3!,
+ glob("$maindir/public-inbox/xapian*/*"));
+ ok(remove_tree(@xap), 'rm Xapian files to convert to indexlevel=basic');
+ $in =~ s/medium-ham/basic-ham/g or xbail 'BUG: no s//';
+ ok(run_script([qw(-learn ham)], undef, $rdr), 'learn basic ham');
+ is($err, '', 'nothing in stderr after basic -learn');
+ $msg = $git->cat_file('HEAD:'.mid2path('basic-ham@example'));
+ like($$msg, qr/basic-ham/, 'basic ham added via -learn');
+ @xap = grep(!m!/over\.sqlite3!,
+ glob("$maindir/public-inbox/xapian*/*"));
+ is_deeply(\@xap, [], 'no Xapian files created by -learn');
+};
+
done_testing();
diff --git a/t/v2mda.t b/t/v2mda.t
index a49eeb6d..b7d177b2 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -3,11 +3,11 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use v5.10.1;
use strict;
-use Test::More;
use Fcntl qw(SEEK_SET);
use Cwd;
use PublicInbox::TestCommon;
use PublicInbox::Eml;
+use File::Path qw(remove_tree);
require_git(2.6);
my $V = 2;
@@ -96,4 +96,29 @@ is($eml->as_string, $mime->as_string, 'injected message');
is($mset->size, 1, 'patchid search works');
}
+{
+ my @shards = grep(m!/[0-9]+\z!, glob("$ibx->{inboxdir}/xap*/*"));
+ ok(remove_tree(@shards), 'rm shards to convert to indexlevel=basic');
+ $ibx->do_cleanup;
+ $rdr->{2} = \(my $err = '');
+ $rdr->{0} = \<<'EOM';
+From: a@example.com
+To: test@example.com
+Subject: this is a ham message for learn
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Message-ID: <ham@example>
+
+yum
+EOM
+ my ($id, $prev);
+ is($ibx->over->next_by_mid('ham@example', \$id, \$prev), undef,
+ 'no ham@example, yet');
+ ok(run_script([qw(-learn ham)], undef, $rdr), '-learn runs on basic')
+ or diag $err;
+ my $smsg = $ibx->over->next_by_mid('ham@example', \$id, \$prev);
+ ok($smsg, 'ham message learned w/ indexlevel=basic');
+ @shards = grep(m!/[0-9]+\z!, glob("$ibx->{inboxdir}/xap*/*"));
+ is_deeply(\@shards, [], 'not converted to medium/full after learn');
+}
+
done_testing();
reply other threads:[~2023-10-15 8:16 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231015081628.956814-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).