user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] learn: respect indexlevel for v1 inboxes
Date: Sun, 15 Oct 2023 08:16:28 +0000	[thread overview]
Message-ID: <20231015081628.956814-1-e@80x24.org> (raw)

v2 never suffered from this bug, apparently, but -learn didn't
seem able to handle indexlevel=basic (nor respect `medium')
for v1 inboxes.  I only noticed this bug because I converted
some ancient v1 inboxes to `basic' to save space.
---
 script/public-inbox-learn |  1 +
 t/mda.t                   | 37 ++++++++++++++++++++++++++++++++++++-
 t/v2mda.t                 | 27 ++++++++++++++++++++++++++-
 3 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 6e1978a7..54d31cb6 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -64,6 +64,7 @@ sub remove_or_add ($$$$) {
 	$ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name};
 	$ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr;
 	$ibx = PublicInbox::InboxWritable->new($ibx);
+	$ibx->{indexlevel} = $ibx->detect_indexlevel;
 	my $im = $ibx->importer(0);
 
 	if ($train eq "rm") {
diff --git a/t/mda.t b/t/mda.t
index e3c5cdff..83b0b33a 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -2,13 +2,13 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use warnings;
-use Test::More;
 use Cwd qw(getcwd);
 use PublicInbox::MID qw(mid2path);
 use PublicInbox::Git;
 use PublicInbox::InboxWritable;
 use PublicInbox::TestCommon;
 use PublicInbox::Import;
+use File::Path qw(remove_tree);
 my ($tmpdir, $for_destroy) = tmpdir();
 my $home = "$tmpdir/pi-home";
 my $pi_home = "$home/.public-inbox";
@@ -312,4 +312,39 @@ EOF
 	like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git');
 }
 
+SKIP: {
+	require_mods(qw(DBD::SQLite Xapian), 1);
+	local $ENV{PI_EMERGENCY} = $faildir;
+	local $ENV{HOME} = $home;
+	local $ENV{PATH} = $main_path;
+	my $rdr = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+	ok(run_script([qw(-index -L medium), $maindir], undef, $rdr),
+		'index inbox');
+	my $in = <<'EOM';
+From: a@example.com
+To: updated-address@example.com
+Subject: this is a ham message for learn
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Message-ID: <medium-ham@example>
+
+yum
+EOM
+	$rdr->{0} = \$in;
+	ok(run_script([qw(-learn ham)], undef, $rdr), 'learn medium ham');
+	is($err, '', 'nothing in stderr after medium -learn');
+	my $msg = $git->cat_file('HEAD:'.mid2path('medium-ham@example'));
+	like($$msg, qr/medium-ham/, 'medium ham added via -learn');
+	my @xap = grep(!m!/over\.sqlite3!,
+			glob("$maindir/public-inbox/xapian*/*"));
+	ok(remove_tree(@xap), 'rm Xapian files to convert to indexlevel=basic');
+	$in =~ s/medium-ham/basic-ham/g or xbail 'BUG: no s//';
+	ok(run_script([qw(-learn ham)], undef, $rdr), 'learn basic ham');
+	is($err, '', 'nothing in stderr after basic -learn');
+	$msg = $git->cat_file('HEAD:'.mid2path('basic-ham@example'));
+	like($$msg, qr/basic-ham/, 'basic ham added via -learn');
+	@xap = grep(!m!/over\.sqlite3!,
+			glob("$maindir/public-inbox/xapian*/*"));
+	is_deeply(\@xap, [], 'no Xapian files created by -learn');
+};
+
 done_testing();
diff --git a/t/v2mda.t b/t/v2mda.t
index a49eeb6d..b7d177b2 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -3,11 +3,11 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use v5.10.1;
 use strict;
-use Test::More;
 use Fcntl qw(SEEK_SET);
 use Cwd;
 use PublicInbox::TestCommon;
 use PublicInbox::Eml;
+use File::Path qw(remove_tree);
 require_git(2.6);
 
 my $V = 2;
@@ -96,4 +96,29 @@ is($eml->as_string, $mime->as_string, 'injected message');
 	is($mset->size, 1, 'patchid search works');
 }
 
+{
+	my @shards = grep(m!/[0-9]+\z!, glob("$ibx->{inboxdir}/xap*/*"));
+	ok(remove_tree(@shards), 'rm shards to convert to indexlevel=basic');
+	$ibx->do_cleanup;
+	$rdr->{2} = \(my $err = '');
+	$rdr->{0} = \<<'EOM';
+From: a@example.com
+To: test@example.com
+Subject: this is a ham message for learn
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Message-ID: <ham@example>
+
+yum
+EOM
+	my ($id, $prev);
+	is($ibx->over->next_by_mid('ham@example', \$id, \$prev), undef,
+		'no ham@example, yet');
+	ok(run_script([qw(-learn ham)], undef, $rdr), '-learn runs on basic')
+		or diag $err;
+	my $smsg = $ibx->over->next_by_mid('ham@example', \$id, \$prev);
+	ok($smsg, 'ham message learned w/ indexlevel=basic');
+	@shards = grep(m!/[0-9]+\z!, glob("$ibx->{inboxdir}/xap*/*"));
+	is_deeply(\@shards, [], 'not converted to medium/full after learn');
+}
+
 done_testing();

                 reply	other threads:[~2023-10-15  8:16 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231015081628.956814-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).