user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
 Warning: Initial query:
 %22public-inbox 1.3.0%22
 returned no results, used:
 "public-inbox 1.3.0"
 instead

Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 01/20] index: support --rethread switch to fix old indices
  @ 2020-07-24  5:55  3% ` Eric Wong
  0 siblings, 0 replies; 7+ results
From: Eric Wong @ 2020-07-24  5:55 UTC (permalink / raw)
  To: meta

Older versions of public-inbox < 1.3.0 had subtly
different semantics around threading in some corner
cases.  This switch (when combined with --reindex)
allows us to fix them by regenerating associations.
---
 Documentation/public-inbox-index.pod | 23 +++++++--
 lib/PublicInbox/OverIdx.pm           | 76 ++++++++++++++++++++++++++--
 lib/PublicInbox/SearchIdx.pm         |  7 ++-
 lib/PublicInbox/V2Writable.pm        |  4 +-
 script/public-inbox-index            |  2 +-
 t/v1reindex.t                        | 34 +++++++++++++
 t/v2reindex.t                        | 45 ++++++++++++++++
 7 files changed, 177 insertions(+), 14 deletions(-)

diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod
index ff2e54867..08f2fbf45 100644
--- a/Documentation/public-inbox-index.pod
+++ b/Documentation/public-inbox-index.pod
@@ -68,12 +68,25 @@ Xapian database.  Using this with C<--compact> or running
 L<public-inbox-compact(1)> afterwards is recommended to
 release free space.
 
-public-inbox protects writes to various indices with L<flock(2)>,
-so it is safe to reindex while L<public-inbox-watch(1)>,
-L<public-inbox-mda(1)> or L<public-inbox-learn(1)> run.
+public-inbox protects writes to various indices with
+L<flock(2)>, so it is safe to reindex (and rethread) while
+L<public-inbox-watch(1)>, L<public-inbox-mda(1)> or
+L<public-inbox-learn(1)> run.
 
-This does not touch the NNTP article number database or
-affect threading.
+This does not touch the NNTP article number database.
+It does not affect threading unless C<--rethread> is
+used.
+
+=item --rethread
+
+Regenerate internal THREADID and message thread associations
+when reindexing.
+
+This fixes some bugs in older versions of public-inbox.  While
+it is possible to use this without C<--reindex>, it makes little
+sense to do so.
+
+Available in public-inbox 1.6.0 (PENDING).
 
 =item --prune
 
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 5601e602c..c57be7243 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -17,6 +17,7 @@ use PublicInbox::MID qw/id_compress mids_for_index references/;
 use PublicInbox::Smsg qw(subject_normalized);
 use Compress::Zlib qw(compress);
 use PublicInbox::Search;
+use Carp qw(croak);
 
 sub dbh_new {
 	my ($self) = @_;
@@ -37,6 +38,13 @@ sub dbh_new {
 	$dbh;
 }
 
+sub new {
+	my ($class, $f) = @_;
+	my $self = $class->SUPER::new($f);
+	$self->{min_tid} = 0;
+	$self;
+}
+
 sub get_counter ($$) {
 	my ($dbh, $key) = @_;
 	my $sth = $dbh->prepare_cached(<<'', undef, 1);
@@ -164,8 +172,12 @@ sub _resolve_mid_to_tid {
 	my $cur_tid = $smsg->{tid};
 	if (defined $$tid) {
 		merge_threads($self, $$tid, $cur_tid);
-	} else {
+	} elsif ($cur_tid > $self->{min_tid}) {
 		$$tid = $cur_tid;
+	} else { # rethreading, queue up dead ghosts
+		$$tid = next_tid($self);
+		my $num = $smsg->{num};
+		push(@{$self->{-ghosts_to_delete}}, $num) if $num < 0;
 	}
 	1;
 }
@@ -175,7 +187,10 @@ sub resolve_mid_to_tid {
 	my ($self, $mid) = @_;
 	my $tid;
 	each_by_mid($self, $mid, ['tid'], \&_resolve_mid_to_tid, \$tid);
-	defined $tid ? $tid : create_ghost($self, $mid);
+	if (my $del = delete $self->{-ghosts_to_delete}) {
+		delete_by_num($self, $_) for @$del;
+	}
+	$tid // create_ghost($self, $mid);
 }
 
 sub create_ghost {
@@ -221,7 +236,7 @@ sub link_refs {
 			merge_threads($self, $tid, $ptid);
 		}
 	} else {
-		$tid = defined $old_tid ? $old_tid : next_tid($self);
+		$tid = $old_tid // next_tid($self);
 	}
 	$tid;
 }
@@ -278,10 +293,17 @@ sub _add_over {
 	my $cur_tid = $smsg->{tid};
 	my $n = $smsg->{num};
 	die "num must not be zero for $mid" if !$n;
-	$$old_tid = $cur_tid unless defined $$old_tid;
+	my $cur_valid = $cur_tid > $self->{min_tid};
+
 	if ($n > 0) { # regular mail
-		merge_threads($self, $$old_tid, $cur_tid);
+		if ($cur_valid) {
+			$$old_tid //= $cur_tid;
+			merge_threads($self, $$old_tid, $cur_tid);
+		} else {
+			$$old_tid //= next_tid($self);
+		}
 	} elsif ($n < 0) { # ghost
+		$$old_tid //= $cur_valid ? $cur_tid : next_tid($self);
 		link_refs($self, $refs, $$old_tid);
 		delete_by_num($self, $n);
 		$$v++;
@@ -297,6 +319,7 @@ sub add_over {
 
 	begin_lazy($self);
 	delete_by_num($self, $num, \$old_tid);
+	$old_tid = undef if ($old_tid // 0) <= $self->{min_tid};
 	foreach my $mid (@$mids) {
 		my $v = 0;
 		each_by_mid($self, $mid, ['tid'], \&_add_over,
@@ -456,4 +479,47 @@ sub create {
 	$self->disconnect;
 }
 
+sub rethread_prepare {
+	my ($self, $opt) = @_;
+	return unless $opt->{rethread};
+	begin_lazy($self);
+	my $min = $self->{min_tid} = get_counter($self->{dbh}, 'thread') // 0;
+	my $pr = $opt->{-progress};
+	$pr->("rethread min THREADID ".($min + 1)."\n") if $pr && $min;
+}
+
+sub rethread_done {
+	my ($self, $opt) = @_;
+	return unless $opt->{rethread} && $self->{txn};
+	defined(my $min = $self->{min_tid}) or croak('BUG: no min_tid');
+	my $dbh = $self->{dbh} or croak('BUG: no dbh');
+	my $rows = $dbh->selectall_arrayref(<<'', { Slice => {} }, $min);
+SELECT num,tid FROM over WHERE num < 0 AND tid < ?
+
+	my $show_id = $dbh->prepare('SELECT id FROM id2num WHERE num = ?');
+	my $show_mid = $dbh->prepare('SELECT mid FROM msgid WHERE id = ?');
+	my $pr = $opt->{-progress};
+	my $total = 0;
+	for my $r (@$rows) {
+		my $exp = 0;
+		$show_id->execute($r->{num});
+		while (defined(my $id = $show_id->fetchrow_array)) {
+			++$exp;
+			$show_mid->execute($id);
+			my $mid = $show_mid->fetchrow_array;
+			if (!defined($mid)) {
+				warn <<EOF;
+E: ghost NUM=$r->{num} ID=$id THREADID=$r->{tid} has no Message-ID
+EOF
+				next;
+			}
+			$pr->(<<EOM) if $pr;
+I: ghost $r->{num} <$mid> THREADID=$r->{tid} culled
+EOM
+		}
+		delete_by_num($self, $r->{num});
+	}
+	$pr->("I: rethread culled $total ghosts\n") if $pr && $total;
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 831625090..e641ffd43 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -723,6 +723,7 @@ sub _index_sync {
 	my $pr = $opts->{-progress};
 
 	my $xdb = $self->begin_txn_lazy;
+	$self->{over}->rethread_prepare($opts);
 	my $mm = _msgmap_init($self);
 	do {
 		$xlog = undef; # stop previous git-log via SIGPIPE
@@ -761,12 +762,14 @@ sub _index_sync {
 				$xdb->set_metadata('last_commit', $newest);
 			}
 		}
+
+		$self->{over}->rethread_done($opts) if $newest; # all done
 		$self->commit_txn_lazy;
 		$git->cleanup;
 		$xdb = _xdb_release($self, $nr);
-		# let another process do some work... <
+		# let another process do some work...
 		$pr->("indexed $nr/$self->{ntodo}\n") if $pr && $nr;
-		if (!$newest) {
+		if (!$newest) { # more to come
 			$xdb = $self->begin_txn_lazy;
 			$dbh->begin_work if $dbh;
 		}
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 0582dd5e3..16556ddc2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1308,6 +1308,7 @@ sub index_sync {
 	my $latest = git_dir_latest($self, \$epoch_max);
 	return unless defined $latest;
 	$self->idx_init($opt); # acquire lock
+	$self->{over}->rethread_prepare($opt);
 	my $sync = {
 		D => {}, # "$mid\0$chash" => $oid
 		unindex_range => {}, # EPOCH => oid_old..oid_new
@@ -1370,12 +1371,13 @@ sub index_sync {
 		my $pr = $sync->{-opt}->{-progress};
 		$pr->('all.git '.sprintf($sync->{-regen_fmt}, $nr)) if $pr;
 	}
+	$self->{over}->rethread_done($opt);
 
 	# reindex does not pick up new changes, so we rerun w/o it:
 	if ($opt->{reindex}) {
 		my %again = %$opt;
 		$sync = undef;
-		delete @again{qw(reindex -skip_lock)};
+		delete @again{qw(rethread reindex -skip_lock)};
 		index_sync($self, \%again);
 	}
 }
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 6217fb86c..2e1934b08 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -15,7 +15,7 @@ use PublicInbox::Xapcmd;
 
 my $compact_opt;
 my $opt = { quiet => -1, compact => 0, maxsize => undef };
-GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune
+GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
 		indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s))
 	or die "bad command-line args\n$usage";
 die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
diff --git a/t/v1reindex.t b/t/v1reindex.t
index 9f23ef01e..8cb751881 100644
--- a/t/v1reindex.t
+++ b/t/v1reindex.t
@@ -11,6 +11,7 @@ require_git(2.6);
 require_mods(qw(DBD::SQLite Search::Xapian));
 use_ok 'PublicInbox::SearchIdx';
 use_ok 'PublicInbox::Import';
+use_ok 'PublicInbox::OverIdx';
 my ($inboxdir, $for_destroy) = tmpdir();
 my $ibx_config = {
 	inboxdir => $inboxdir,
@@ -427,5 +428,38 @@ ok(!-d $xap, 'Xapian directories removed again');
 		  ], 'msgmap as expected' );
 }
 
+{
+	my @warn;
+	local $SIG{__WARN__} = sub { push @warn, @_ };
+	my $ibx = PublicInbox::Inbox->new({ %$ibx_config });
+	my $f = $ibx->over->{dbh}->sqlite_db_filename;
+	my $over = PublicInbox::OverIdx->new($f);
+	my $dbh = $over->connect;
+	my $non_ghost_tids = sub {
+		$dbh->selectall_arrayref(<<'');
+SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC
+
+	};
+	my $before = $non_ghost_tids->();
+
+	# mess up threading:
+	my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread');
+	my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid);
+
+	my $rw = PublicInbox::SearchIdx->new($ibx, 1);
+	my @pr;
+	my $pr = sub { push @pr, @_ };
+	$rw->index_sync({reindex => 1, rethread => 1, -progress => $pr });
+	my @n = $dbh->selectrow_array(<<EOS, undef, $tid);
+SELECT COUNT(*) FROM over WHERE tid <= ?
+EOS
+	is_deeply(\@n, [ 0 ], 'rethread dropped old threadids');
+	my $after = $non_ghost_tids->();
+	ok($after->[0]->[0] > $before->[-1]->[0],
+		'all tids greater than before');
+	is(scalar @$after, scalar @$before, 'thread count unchanged');
+	is_deeply([], \@warn, 'no warnings');
+	# diag "@pr"; # XXX do we care?
+}
 
 done_testing();
diff --git a/t/v2reindex.t b/t/v2reindex.t
index 77deffb4b..ea2b24e59 100644
--- a/t/v2reindex.t
+++ b/t/v2reindex.t
@@ -10,6 +10,7 @@ use PublicInbox::TestCommon;
 require_git(2.6);
 require_mods(qw(DBD::SQLite Search::Xapian));
 use_ok 'PublicInbox::V2Writable';
+use_ok 'PublicInbox::OverIdx';
 my ($inboxdir, $for_destroy) = tmpdir();
 my $ibx_config = {
 	inboxdir => $inboxdir,
@@ -423,6 +424,46 @@ ok(!-d $xap, 'Xapian directories removed again');
 		  ], 'msgmap as expected' );
 }
 
+my $check_rethread = sub {
+	my ($desc) = @_;
+	my @warn;
+	local $SIG{__WARN__} = sub { push @warn, @_ };
+	my %config = %$ibx_config;
+	my $ibx = PublicInbox::Inbox->new(\%config);
+	my $f = $ibx->over->{dbh}->sqlite_db_filename;
+	my $over = PublicInbox::OverIdx->new($f);
+	my $dbh = $over->connect;
+	my $non_ghost_tids = sub {
+		$dbh->selectall_arrayref(<<'');
+SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC
+
+	};
+	my $before = $non_ghost_tids->();
+
+	# mess up threading:
+	my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread');
+	my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid);
+	diag "messing up all threads with tid=$tid";
+
+	my $v2w = PublicInbox::V2Writable->new($ibx);
+	my @pr;
+	my $pr = sub { push @pr, @_ };
+	$v2w->index_sync({reindex => 1, rethread => 1, -progress => $pr});
+	# diag "@pr"; # nobody cares
+	is_deeply(\@warn, [], 'no warnings on reindex + rethread');
+
+	my @n = $dbh->selectrow_array(<<EOS, undef, $tid);
+SELECT COUNT(*) FROM over WHERE tid <= ?
+EOS
+	is_deeply(\@n, [ 0 ], 'rethread dropped old threadids');
+	my $after = $non_ghost_tids->();
+	ok($after->[0]->[0] > $before->[-1]->[0],
+		'all tids greater than before');
+	is(scalar @$after, scalar @$before, 'thread count unchanged');
+};
+
+$check_rethread->('no-monster');
+
 # A real example from linux-renesas-soc on lore where a 3-headed monster
 # of a message has 3 sets of common headers.  Another normal message
 # previously existed with a single Message-ID that conflicts with one
@@ -497,4 +538,8 @@ EOF
 	is_deeply([values %uniq], [3], 'search on different subjects');
 }
 
+# XXX: not deterministic when dealing with ambiguous messages, oh well
+$check_rethread->('3-headed-monster once');
+$check_rethread->('3-headed-monster twice');
+
 done_testing();

^ permalink raw reply related	[relevance 3%]

* [PATCH] import: drop '<' and '>' characters in addresses
  2020-02-25  9:28  0% ` weird From: lines [was: Two small issues when importing old archives] Eric Wong
@ 2020-02-26 10:21  0%   ` Eric Wong
  0 siblings, 0 replies; 7+ results
From: Eric Wong @ 2020-02-26 10:21 UTC (permalink / raw)
  To: Leah Neukirchen; +Cc: meta

Eric Wong <e@yhbt.net> wrote:
> Leah Neukirchen <leah@vuxu.org> wrote:
> > 2) Weird From: lines crash the whole import
> > 
> > From: "=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@jochen-kuepper.de
> > 
> > This funny line broke import_maildir:
> > 
> > fatal: Missing > in ident string: =?iso-8859-1?Q?Jochen_K=FCpper?= usenet <"=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@jochen-kuepper.de> 1101853296 +0100
> > fast-import: dumping crash report to /var/lib/public-inbox/repositories/ding.git/fast_import_crash_31402
> > EOF from fast-import:  at /usr/share/perl5/vendor_perl/PublicInbox/Import.pm line 96, <$r> line 54681.
> > 
> > I fixed it manually.  (But I think it's actually a valid mail address,
> > even in this botched state.)  I'm not sure what added the ">", it's
> > not in the original mail.
> > 
> > (I use public-inbox-1.3.0/git-2.25.0 on Void Linux.)
> 
> Gah, this looks like it's because Email::Address::XS leaves a
> "<" in the name...   Perhaps Import should delete all [<>]
> characters unconditionally? (or swap in appropriate Unicode
> homographs and assume users have the necessary glyphs...)

So we already do `$name =~ tr/<>//d', so I think doing the same
with `$email' is appropiate for fast-import.  The "correct"
address featuring '<' will still be indexed in Xapian, at least.

-------------8<-------------
Subject: [PATCH] import: drop '<' and '>' characters in addresses

Some strange "From:" lines will cause Email::Address::XS to
leave '<' (and presumably '>') in the address which
git-fast-import won't accept even if quoted.  Workaround this
problem by deleting '<' and '>' the same way we delete them for
the ident name.

Reported-by: Leah Neukirchen <leah@vuxu.org>
Link: https://public-inbox.org/meta/87h7zfemur.fsf@vuxu.org/
---
 lib/PublicInbox/Import.pm | 4 ++++
 t/import.t                | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index d8dc49b8..68dc0c7e 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -293,6 +293,10 @@ sub extract_cmt_info ($) {
 		}
 	}
 	if (defined $email) {
+		# Email::Address::XS may leave quoted '<' in addresses,
+		# which git-fast-import doesn't like
+		$email =~ tr/<>//d;
+
 		# quiet down wide character warnings with utf8::encode
 		utf8::encode($email);
 	} else {
diff --git a/t/import.t b/t/import.t
index e71dd714..b88d308e 100644
--- a/t/import.t
+++ b/t/import.t
@@ -55,6 +55,8 @@ $im->done;
 my @revs = $git->qx(qw(rev-list HEAD));
 is(scalar @revs, 1, 'one revision created');
 
+my $odd = '"=?iso-8859-1?Q?J_K=FCpper?= <usenet"@example.de';
+$mime->header_set('From', $odd);
 $mime->header_set('Message-ID', '<b@example.com>');
 $mime->header_set('Subject', 'msg2');
 like($im->add($mime, sub { $mime }), qr/\A:\d+\z/, 'added 2nd message');

^ permalink raw reply related	[relevance 0%]

* weird From: lines [was: Two small issues when importing old archives]
  2020-02-24 20:45  4% Two small issues when importing old archives Leah Neukirchen
@ 2020-02-25  9:28  0% ` Eric Wong
  2020-02-26 10:21  0%   ` [PATCH] import: drop '<' and '>' characters in addresses Eric Wong
  0 siblings, 1 reply; 7+ results
From: Eric Wong @ 2020-02-25  9:28 UTC (permalink / raw)
  To: Leah Neukirchen; +Cc: meta

Leah Neukirchen <leah@vuxu.org> wrote:
> 2) Weird From: lines crash the whole import
> 
> From: "=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@jochen-kuepper.de
> 
> This funny line broke import_maildir:
> 
> fatal: Missing > in ident string: =?iso-8859-1?Q?Jochen_K=FCpper?= usenet <"=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@jochen-kuepper.de> 1101853296 +0100
> fast-import: dumping crash report to /var/lib/public-inbox/repositories/ding.git/fast_import_crash_31402
> EOF from fast-import:  at /usr/share/perl5/vendor_perl/PublicInbox/Import.pm line 96, <$r> line 54681.
> 
> I fixed it manually.  (But I think it's actually a valid mail address,
> even in this botched state.)  I'm not sure what added the ">", it's
> not in the original mail.
> 
> (I use public-inbox-1.3.0/git-2.25.0 on Void Linux.)

Gah, this looks like it's because Email::Address::XS leaves a
"<" in the name...   Perhaps Import should delete all [<>]
characters unconditionally? (or swap in appropriate Unicode
homographs and assume users have the necessary glyphs...)

---------8<----------
Subject: [RFC] t/address.t: dump failing case

"PublicInbox::Address" (w/o "PP") is Email::Address::XS 1.04
from Debian 10:

PublicInbox::Address names: $VAR1 = [
          '=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet'
        ];
PublicInbox::Address emails: $VAR1 = [
          '"=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@example.de'
        ];
PublicInbox::AddressPP names: $VAR1 = [
          '=?iso-8859-1?Q?Jochen_K=FCpper?='
        ];
PublicInbox::AddressPP emails: $VAR1 = [
          'usenet"@example.de'
        ];
---
 t/address.t | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/t/address.t b/t/address.t
index 6f4bff6c..8c39f04b 100644
--- a/t/address.t
+++ b/t/address.t
@@ -14,6 +14,11 @@ sub test_pkg {
 		[$emails->('User <e@example.com>, e@example.org')],
 		'address extraction works as expected');
 
+	my $odd = '"=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@example.de';
+	use Data::Dumper;
+	diag "$pkg names: " . Dumper([$names->($odd)]);
+	diag "$pkg emails: " . Dumper([$emails->($odd)]);
+
 	is_deeply(['user@example.com'],
 		[$emails->('<user@example.com (Comment)>')],
 		'comment after domain accepted before >');

^ permalink raw reply related	[relevance 0%]

* Two small issues when importing old archives
@ 2020-02-24 20:45  4% Leah Neukirchen
  2020-02-25  9:28  0% ` weird From: lines [was: Two small issues when importing old archives] Eric Wong
  0 siblings, 1 reply; 7+ results
From: Leah Neukirchen @ 2020-02-24 20:45 UTC (permalink / raw)
  To: meta

Hi,

I've recently imported some sizable archives (~100k messages) of old
mailing lists and noticed some slight inconveniences:

1) RFC5322/822 invalid Date: headers should be parsed more gracefully

Some old mails had Date: headers without time zones, e.g.
Date: Sat, 27 Sep 1997 10:02:32

This results in public-inbox asserting this is the current date.
But this assumption makes no sense (literally every other guess
would be more likely), and also results in these messages showing up
on the first page of the archive.  Furthermore, sorting is then not
stable, pressing F5 make the threads jump around.  I'd recommend
falling back to +0000 instead.

2) Weird From: lines crash the whole import

From: "=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@jochen-kuepper.de

This funny line broke import_maildir:

fatal: Missing > in ident string: =?iso-8859-1?Q?Jochen_K=FCpper?= usenet <"=?iso-8859-1?Q?Jochen_K=FCpper?= <usenet"@jochen-kuepper.de> 1101853296 +0100
fast-import: dumping crash report to /var/lib/public-inbox/repositories/ding.git/fast_import_crash_31402
EOF from fast-import:  at /usr/share/perl5/vendor_perl/PublicInbox/Import.pm line 96, <$r> line 54681.

I fixed it manually.  (But I think it's actually a valid mail address,
even in this botched state.)  I'm not sure what added the ">", it's
not in the original mail.

(I use public-inbox-1.3.0/git-2.25.0 on Void Linux.)

thx,
-- 
Leah Neukirchen  <leah@vuxu.org>  https://leahneukirchen.org/

^ permalink raw reply	[relevance 4%]

* [ANNOUNCE] public-inbox 1.3.0
@ 2020-02-10  5:52 21% Eric Wong
  0 siblings, 0 replies; 7+ results
From: Eric Wong @ 2020-02-10  5:52 UTC (permalink / raw)
  To: meta

Many internal improvements to improve the developer experience,
long-term maintainability, ease-of-installation and compatibility.
There are also several bugfixes.

Some of the internal improvements involve avoiding Perl startup
time in tests.  "make check" now runs about 50% faster than
before, and the new "make check-run" can be around 30% faster
than "make check" after being primed by "make check".

Most closures (anonymous subroutines) are purged from the
-nntpd, -httpd and WWW code paths to make checking for memory
leaks easier.

* documentation now builds on BSD make

* Date::Parse (TimeDate CPAN distribution) is now optional, allowing
  installation from OpenBSD systems via "pkg".

* the work-in-progress Xapian.pm SWIG bindings are now supported
  in addition to the traditional Search::Xapian XS bindings.
  Only the SWIG bindings are packaged for OpenBSD.

* Plack is optional for users who wish to avoid web-related components

* Filesys::Notify::Simple is optional for non-watch users
  (but Plack will still pull it in)

* improved internal error checking and reporting in numerous places

* fixed Perl 5.10.1 compatibility (tested with Devel::PatchPerl)

* IPC::Run and XML::Feed are no longer used in tests,
  though XML::TreePP becomes an optional test dependency.

* Email::Address::XS used if available (newer Email::MIME
  requires it), it should handle more corner cases.

* PublicInbox::WWW:
  - "nested" search results page now shows relevancy percentages
  - many solver bugs fixed
  - solver works on "-U0" patches using "git apply --unidiff-zero"
  - solver now compatible with git < v1.8.5 (but >= v1.8.0)
  - raw HTML no longer shown inline in multipart/alternative messages
    (v1.2.0 regression)
  - reduced memory usage for displaying multipart messages
  - static file responses support Last-Modified/If-Modified-Since
  - avoid trailing underlines in diffstat linkification
  - more consistent handling of messages without Subjects

* public-inbox-httpd / public-inbox-nntpd:
  - MSG_MORE used consistently in long responses
  - fixed IO::KQueue usage on *BSDs
  - listen sockets are closed immediately on graceful shutdown
  - missed signals avoided with signalfd or EVFILT_SIGNAL
  - Linux x32 ABI support

* public-inbox-nntpd:
  - Y2020 workaround for Time::Local

* public-inbox-watch:
  - avoid memory leak from cyclic reference on SIGHUP
  - fix documentation of publicinboxwatch.watchspam

* public-inbox-convert:
  - avoid article number jumps when converting indexed v1 inboxes

* public-inbox-compact / public-inbox-xcpdb:
  - concurrent invocations of -compact and -xcpdb commands,
    not just -mda, -watch, -learn, -purge

* examples/unsubscribe.milter:
  - support unique mailto: unsubscribe

Release tarball available for download at:

https://public-inbox.org/public-inbox.git/snapshot/public-inbox-1.3.0.tar.gz

Please report bugs via plain-text mail to: meta@public-inbox.org

See archives at https://public-inbox.org/meta/ for all history.
See https://public-inbox.org/TODO for what the future holds.

^ permalink raw reply	[relevance 21%]

* [PATCH] doc: more 1.3.0 release notes updates
@ 2020-01-31 23:45  6% Eric Wong
  0 siblings, 0 replies; 7+ results
From: Eric Wong @ 2020-01-31 23:45 UTC (permalink / raw)
  To: meta

Some updates with recent bugfixes and a few wording/formatting
improvements.
---
 I'm thinking it's time for a release, soon; before new features
 creep in...

 Documentation/RelNotes/v1.3.0.eml | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/Documentation/RelNotes/v1.3.0.eml b/Documentation/RelNotes/v1.3.0.eml
index 9000ccaf..cbf7438b 100644
--- a/Documentation/RelNotes/v1.3.0.eml
+++ b/Documentation/RelNotes/v1.3.0.eml
@@ -3,8 +3,9 @@ To: meta@public-inbox.org
 Subject: [WIP] public-inbox 1.3.0
 Content-Type: text/plain; charset=utf-8
 
-Many internal improvements to improve the developer experience
-and long-term maintainability.
+Many internal improvements to improve the developer experience,
+long-term maintainability, ease-of-installation and compatibility.
+There are also several bugfixes.
 
 Some of the internal improvements involve avoiding Perl startup
 time in tests.  "make check" now runs about 50% faster than
@@ -27,16 +28,18 @@ leaks easier.
 * Plack is optional for users who wish to avoid web-related components
 
 * Filesys::Notify::Simple is optional for non-watch users
-  (but Plack will pull it in)
+  (but Plack will still pull it in)
 
 * improved internal error checking and reporting in numerous places
 
+* fixed Perl 5.10.1 compatibility (tested with Devel::PatchPerl)
+
 * IPC::Run is no longer used in tests
 
 * Email::Address::XS used if available (newer Email::MIME
   requires it), it should handle more corner cases.
 
-* PublicInbox::WWW
+* PublicInbox::WWW:
   - "nested" search results page now shows relevancy percentages
   - many solver bugs fixed
   - solver works on "-U0" patches using "git apply --unidiff-zero"
@@ -46,6 +49,7 @@ leaks easier.
   - reduced memory usage for displaying multipart messages
   - static file responses support Last-Modified/If-Modified-Since
   - avoid trailing underlines in diffstat linkification
+  - more consistent handling of messages without Subjects
 
 * public-inbox-httpd / public-inbox-nntpd:
   - MSG_MORE used consistently in long responses
@@ -54,15 +58,27 @@ leaks easier.
   - missed signals avoided with signalfd or EVFILT_SIGNAL
 
 * public-inbox-nntpd:
-  Y2020 workaround for Time::Local
+  - Y2020 workaround for Time::Local
 
-* public-inbox-watch
+* public-inbox-watch:
   - avoid memory leak from cyclic reference on SIGHUP
   - fix documentation of publicinboxwatch.watchspam
 
+* public-inbox-convert:
+  - avoid article number jumps when converting indexed v1 inboxes
+
+* public-inbox-compact / public-inbox-xcpdb:
+  - concurrent invocations of -compact and -xcpdb commands,
+    not just -mda, -watch, -learn, -purge
+
+* examples/unsubscribe.milter:
+  - support unique mailto: unsubscribe
+
 Release tarballs will be available for download at
 
 	https://public-inbox.org/public-inbox.git
 
+Please report bugs via plain-text mail to: meta@public-inbox.org
+
 See archives at https://public-inbox.org/meta/ for all history.
 See https://public-inbox.org/TODO for what the future holds.

^ permalink raw reply related	[relevance 6%]

* [PATCH 3/6] doc: release notes: set Date for 1.2.0, start 1.3.0
  @ 2020-01-01  9:57  6% ` Eric Wong
  0 siblings, 0 replies; 7+ results
From: Eric Wong @ 2020-01-01  9:57 UTC (permalink / raw)
  To: meta

Seems like a lot's happened since 1.2, but it's mostly
internal stuff...
---
 Documentation/RelNotes/v1.2.0.eml |  9 ++++++
 Documentation/RelNotes/v1.3.0.eml | 50 +++++++++++++++++++++++++++++++
 MANIFEST                          |  1 +
 3 files changed, 60 insertions(+)
 create mode 100644 Documentation/RelNotes/v1.3.0.eml

diff --git a/Documentation/RelNotes/v1.2.0.eml b/Documentation/RelNotes/v1.2.0.eml
index 2eeb0de0..d8b8d2b6 100644
--- a/Documentation/RelNotes/v1.2.0.eml
+++ b/Documentation/RelNotes/v1.2.0.eml
@@ -1,3 +1,5 @@
+From e@80x24.org Sun Nov  3 03:12:41 2019
+Date: Sun, 3 Nov 2019 03:12:41 +0000
 From: Eric Wong <e@80x24.org>
 To: meta@public-inbox.org
 Subject: [ANNOUNCE] public-inbox 1.2.0
@@ -73,5 +75,12 @@ for their sponsorship and support over the past two years.
 
 https://public-inbox.org/releases/public-inbox-1.2.0.tar.gz
 
+SHA256: dabc735a5cfe396f457ac721559de26ae38abbaaa74612eb786e9e2e1ca94269
+
+  Chances are: You don't know me and never will.  Everybody else
+  can verify the tarball and sign a reply saying they've
+  verified it, instead.  The more who do this, the better, but
+  don't trust the BOFH :P
+
 See archives at https://public-inbox.org/meta/ for all history.
 See https://public-inbox.org/TODO for what the future holds.
diff --git a/Documentation/RelNotes/v1.3.0.eml b/Documentation/RelNotes/v1.3.0.eml
new file mode 100644
index 00000000..11806ccd
--- /dev/null
+++ b/Documentation/RelNotes/v1.3.0.eml
@@ -0,0 +1,50 @@
+From: Eric Wong <e@80x24.org>
+To: meta@public-inbox.org
+Subject: [WIP] public-inbox 1.3.0
+Content-Type: text/plain; charset=utf-8
+
+Many internal improvements to improve the developer experience
+and long-term maintainability.
+
+Many of the internal improvements focused on being able to avoid
+Perl startup time in tests.  "make check" now runs about 50%
+faster than before, and the new "make check-run" can be around
+30% faster after being primed by "make check".
+
+Most closures (anonymous subroutines) are purged from the
+-nntpd, -httpd and WWW code paths to make checking for memory
+leaks easier.
+
+* documentation now builds on BSD make
+
+* Date::Parse (TimeDate CPAN distribution) is now optional, allowing
+  installation from OpenBSD systems via "pkg".
+
+* the work-in-progress Xapian.pm SWIG bindings are now supported
+  in addition to the traditional Search::Xapian XS bindings.
+  Only SWIG bindings are packaged for OpenBSD.
+
+* IPC::Run is no longer used in tests
+
+* improved internal error checking and reporting in numerous places
+  
+* PublicInbox::WWW
+  - "nested" search results page now shows relevancy percentages
+  - solver works on "-U0" patches using "git apply --unidiff-zero"
+  - raw HTML no longer shown inline in multipart/alternative messages
+    (v1.2.0 regression)
+
+* public-inbox-httpd / public-inbox-nntpd:
+  - MSG_MORE used consistently in long responses
+  - fixed IO::KQueue usage on *BSDs
+
+* public-inbox-watch
+  - avoid memory leak from cyclic reference on SIGHUP
+  - fix documentation of publicinboxwatch.watchspam
+
+Release tarballs will be available for download at
+
+	https://public-inbox.org/public-inbox.git
+
+See archives at https://public-inbox.org/meta/ for all history.
+See https://public-inbox.org/TODO for what the future holds.
diff --git a/MANIFEST b/MANIFEST
index f649bbef..59716adf 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -6,6 +6,7 @@ Documentation/.gitignore
 Documentation/RelNotes/v1.0.0.eml
 Documentation/RelNotes/v1.1.0-pre1.eml
 Documentation/RelNotes/v1.2.0.eml
+Documentation/RelNotes/v1.3.0.eml
 Documentation/dc-dlvr-spam-flow.txt
 Documentation/design_notes.txt
 Documentation/design_www.txt

^ permalink raw reply related	[relevance 6%]

Results 1-7 of 7 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-01-01  9:57     [PATCH 0/6] doc updates and such Eric Wong
2020-01-01  9:57  6% ` [PATCH 3/6] doc: release notes: set Date for 1.2.0, start 1.3.0 Eric Wong
2020-01-31 23:45  6% [PATCH] doc: more 1.3.0 release notes updates Eric Wong
2020-02-10  5:52 21% [ANNOUNCE] public-inbox 1.3.0 Eric Wong
2020-02-24 20:45  4% Two small issues when importing old archives Leah Neukirchen
2020-02-25  9:28  0% ` weird From: lines [was: Two small issues when importing old archives] Eric Wong
2020-02-26 10:21  0%   ` [PATCH] import: drop '<' and '>' characters in addresses Eric Wong
2020-07-24  5:55     [PATCH 00/20] indexing changes and new features Eric Wong
2020-07-24  5:55  3% ` [PATCH 01/20] index: support --rethread switch to fix old indices Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).