user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 11/11] view: depend on SearchMsg for Message-ID
  2018-03-27 11:11  6% [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11  3% ` Eric Wong (Contractor, The Linux Foundation)
  0 siblings, 0 replies; 6+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
  To: meta

Since we need to handle messages with multiple and duplicate
Message-ID headers, our thread skeleton display must account
for that.

Since we have a "preferred" Message-ID in case of conflicts,
use it as the UUID in an Atom feed so readers do not get
confused by conflicts.
---
 lib/PublicInbox/Feed.pm          | 36 ++++++++--------
 lib/PublicInbox/Inbox.pm         |  9 ++++
 lib/PublicInbox/SearchMsg.pm     |  6 +--
 lib/PublicInbox/SearchView.pm    | 14 +++---
 lib/PublicInbox/View.pm          | 93 ++++++++++++++++++----------------------
 lib/PublicInbox/WWW.pm           |  1 +
 lib/PublicInbox/WwwAtomStream.pm |  9 ++--
 t/psgi_v2.t                      | 26 +++++++++++
 8 files changed, 109 insertions(+), 85 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 74d0bbd..f2285a6 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,18 +8,18 @@ use warnings;
 use PublicInbox::MIME;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
+use PublicInbox::SearchMsg; # this loads w/o Search::Xapian
 
 # main function
 sub generate {
 	my ($ctx) = @_;
-	my $oids = recent_blobs($ctx);
-	return _no_thread() unless @$oids;
+	my $msgs = recent_msgs($ctx);
+	return _no_thread() unless @$msgs;
 
-	my $git = $ctx->{-inbox}->git;
+	my $ibx = $ctx->{-inbox};
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
-		while (my $oid = shift @$oids) {
-			my $msg = $git->cat_file($oid) or next;
-			return PublicInbox::MIME->new($msg);
+		while (my $smsg = shift @$msgs) {
+			$ibx->smsg_mime($smsg) and return $smsg;
 		}
 	});
 }
@@ -36,9 +36,8 @@ sub generate_thread_atom {
 	$ctx->{-html_url} = $html_url;
 	my $msgs = $res->{msgs};
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
-		while (my $msg = shift @$msgs) {
-			$msg = $ibx->msg_by_smsg($msg) and
-				return PublicInbox::MIME->new($msg);
+		while (my $smsg = shift @$msgs) {
+			$ibx->smsg_mime($smsg) and return $smsg;
 		}
 	});
 }
@@ -62,20 +61,19 @@ sub generate_html_index {
 
 sub new_html {
 	my ($ctx) = @_;
-	my $oids = recent_blobs($ctx);
-	if (!@$oids) {
+	my $msgs = recent_msgs($ctx);
+	if (!@$msgs) {
 		return [404, ['Content-Type', 'text/plain'],
 			["No messages, yet\n"] ];
 	}
 	$ctx->{-html_tip} = '<pre>';
 	$ctx->{-upfx} = '';
 	$ctx->{-hr} = 1;
-	my $git = $ctx->{-inbox}->git;
+	my $ibx = $ctx->{-inbox};
 	PublicInbox::WwwStream->response($ctx, 200, sub {
-		while (my $oid = shift @$oids) {
-			my $msg = $git->cat_file($oid) or next;
-			my $m = PublicInbox::MIME->new($msg);
-			my $more = scalar @$oids;
+		while (my $smsg = shift @$msgs) {
+			my $m = $ibx->smsg_mime($smsg) or next;
+			my $more = scalar @$msgs;
 			return PublicInbox::View::index_entry($m, $ctx, $more);
 		}
 		new_html_footer($ctx);
@@ -103,7 +101,7 @@ sub new_html_footer {
 	"<hr><pre>page: $next$latest</pre>";
 }
 
-sub recent_blobs {
+sub recent_msgs {
 	my ($ctx) = @_;
 	my $ibx = $ctx->{-inbox};
 	my $max = $ibx->{feedmax};
@@ -119,7 +117,7 @@ sub recent_blobs {
 		my $res = $srch->query('', { limit => $max, offset => $o });
 		my $next = $o + $max;
 		$ctx->{next_page} = "o=$next" if $res->{total} >= $next;
-		return [ map { $_->{blob} } @{$res->{msgs}} ];
+		return $res->{msgs};
 	}
 
 	my $hex = '[a-f0-9]';
@@ -171,7 +169,7 @@ sub recent_blobs {
 	}
 
 	$ctx->{next_page} = "r=$last_commit" if $last_commit;
-	\@oids;
+	[ map { bless {blob => $_ }, 'PublicInbox::SearchMsg' } @oids ];
 }
 
 1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 666c81d..b1ea8dc 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -8,6 +8,7 @@ use warnings;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
 use Devel::Peek qw(SvREFCNT);
+use PublicInbox::MIME;
 
 my $cleanup_timer;
 eval {
@@ -246,6 +247,14 @@ sub msg_by_smsg ($$;$) {
 	$str;
 }
 
+sub smsg_mime {
+	my ($self, $smsg, $ref) = @_;
+	if (my $s = msg_by_smsg($self, $smsg, $ref)) {
+		$smsg->{mime} = PublicInbox::MIME->new($s);
+		return $smsg;
+	}
+}
+
 sub path_check {
 	my ($self, $path) = @_;
 	git($self)->check('HEAD:'.$path);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b944868..e314fed 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -6,7 +6,6 @@
 package PublicInbox::SearchMsg;
 use strict;
 use warnings;
-use Search::Xapian;
 use PublicInbox::MID qw/mid_clean mid_mime/;
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
@@ -165,9 +164,10 @@ sub mid ($;$) {
 		$self->{mid} = $mid;
 	} elsif (my $rv = $self->{mid}) {
 		$rv;
+	} elsif ($self->{doc}) {
+		$self->{mid} = _get_term_val($self, 'Q', qr/\AQ/);
 	} else {
-		$self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
-				$self->_extract_mid;
+		$self->_extract_mid; # v1 w/o Xapian
 	}
 }
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 6e537b4..1a8fe7f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -248,15 +248,14 @@ sub mset_thread {
 		*PublicInbox::View::pre_thread);
 
 	@$msgs = reverse @$msgs if $r;
-	my $mime;
 	sub {
 		return unless $msgs;
-		while ($mime = pop @$msgs) {
-			$mime = $inbox->msg_by_smsg($mime) and last;
+		my $smsg;
+		while (my $m = pop @$msgs) {
+			$smsg = $inbox->smsg_mime($m) and last;
 		}
-		if ($mime) {
-			$mime = PublicInbox::MIME->new($mime);
-			return PublicInbox::View::index_entry($mime, $ctx,
+		if ($smsg) {
+			return PublicInbox::View::index_entry($smsg, $ctx,
 				scalar @$msgs);
 		}
 		$msgs = undef;
@@ -290,8 +289,7 @@ sub adump {
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
 		while (my $x = shift @items) {
 			$x = load_doc_retry($srch, $x);
-			$x = $ibx->msg_by_smsg($x) and
-					return PublicInbox::MIME->new($x);
+			$x = $ibx->smsg_mime($x) and return $x;
 		}
 		return undef;
 	});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 34ab3e5..5fb2b31 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -9,7 +9,8 @@ use warnings;
 use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
 use PublicInbox::Linkify;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids
+			references/;
 use PublicInbox::MsgIter;
 use PublicInbox::Address;
 use PublicInbox::WwwStream;
@@ -69,30 +70,31 @@ sub msg_page {
 				$more = [ $head, $tail, $db ];
 			}
 		});
+		return unless $first;
 	} else {
 		$first = $ibx->msg_by_mid($mid) or return;
 	}
-	$first ? msg_html($ctx, PublicInbox::MIME->new($first), $more) : undef;
+	msg_html($ctx, PublicInbox::MIME->new($first), $more);
 }
 
 sub msg_html_more {
 	my ($ctx, $more, $nr) = @_;
 	my $str = eval {
-		my $mref;
+		my $smsg;
 		my ($head, $tail, $db) = @$more;
-		for (; !defined($mref) && $head != $tail; $head++) {
-			my $smsg = PublicInbox::SearchMsg->get($head, $db,
-								$ctx->{mid});
-			next if $smsg->type ne 'mail';
-			$mref = $ctx->{-inbox}->msg_by_smsg($smsg);
+		my $mid = $ctx->{mid};
+		for (; !defined($smsg) && $head != $tail; $head++) {
+			my $m = PublicInbox::SearchMsg->get($head, $db, $mid);
+			next if $m->type ne 'mail';
+			$smsg = $ctx->{-inbox}->smsg_mime($m);
 		}
 		if ($head == $tail) { # done
 			@$more = ();
 		} else {
 			$more->[0] = $head;
 		}
-		if ($mref) {
-			my $mime = PublicInbox::MIME->new($mref);
+		if ($smsg) {
+			my $mime = $smsg->{mime};
 			_msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
 				multipart_text_as_html($mime, '',
 							$ctx->{-obfs_ibx}) .
@@ -167,14 +169,8 @@ EOF
 
 sub in_reply_to {
 	my ($hdr) = @_;
-	my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID');
-	my @refs = (($hdr->header_raw('References') || '') =~ /<([^>]+)>/g);
-	push(@refs, (($hdr->header_raw('In-Reply-To') || '') =~ /<([^>]+)>/g));
-	while (defined(my $irt = pop @refs)) {
-		next if $mid{"<$irt>"};
-		return $irt;
-	}
-	undef;
+	my $refs = references($hdr);
+	$refs->[-1];
 }
 
 sub _hdr_names_html ($$) {
@@ -191,12 +187,10 @@ sub nr_to_s ($$$) {
 
 # this is already inside a <pre>
 sub index_entry {
-	my ($mime, $ctx, $more) = @_;
+	my ($smsg, $ctx, $more) = @_;
 	my $srch = $ctx->{srch};
-	my $hdr = $mime->header_obj;
-	my $subj = $hdr->header('Subject');
-
-	my $mid_raw = mid_clean(mid_mime($mime));
+	my $subj = $smsg->subject;
+	my $mid_raw = $smsg->mid;
 	my $id = id_compress($mid_raw, 1);
 	my $id_m = 'm'.$id;
 
@@ -211,6 +205,8 @@ sub index_entry {
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
+	my $mime = $smsg->{mime};
+	my $hdr = $mime->header_obj;
 	foreach my $f (qw(To Cc)) {
 		my $dst = _hdr_names_html($hdr, $f);
 		if ($dst ne '') {
@@ -220,7 +216,7 @@ sub index_entry {
 	}
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
-	$rv .= "From: $from @ "._msg_date($hdr)." UTC";
+	$rv .= "From: $from @ ".fmt_ts($smsg->ds)." UTC";
 	my $upfx = $ctx->{-upfx};
 	my $mhref = $upfx . mid_escape($mid_raw) . '/';
 	$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
@@ -363,30 +359,30 @@ sub pre_thread  {
 }
 
 sub thread_index_entry {
-	my ($ctx, $level, $mime) = @_;
+	my ($ctx, $level, $smsg) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $level);
-	$beg . '<pre>' . index_entry($mime, $ctx, 0) . '</pre>' . $end;
+	$beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
 }
 
 sub stream_thread ($$) {
 	my ($rootset, $ctx) = @_;
 	my $inbox = $ctx->{-inbox};
-	my $mime;
 	my @q = map { (0, $_) } @$rootset;
 	my $level;
+	my $smsg;
 	while (@q) {
 		$level = shift @q;
 		my $node = shift @q or next;
 		my $cl = $level + 1;
 		unshift @q, map { ($cl, $_) } @{$node->{children}};
-		$mime = $inbox->msg_by_smsg($node->{smsg}) and last;
+		$smsg = $inbox->smsg_mime($node->{smsg}) and last;
 	}
-	return missing_thread($ctx) unless $mime;
+	return missing_thread($ctx) unless $smsg;
 
 	$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
-	$mime = PublicInbox::MIME->new($mime);
-	$ctx->{-title_html} = ascii_html($mime->header('Subject'));
-	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
+	$ctx->{-title_html} = ascii_html($smsg->subject);
+	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg);
+	$smsg = undef;
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $ctx;
 		while (@q) {
@@ -394,10 +390,8 @@ sub stream_thread ($$) {
 			my $node = shift @q or next;
 			my $cl = $level + 1;
 			unshift @q, map { ($cl, $_) } @{$node->{children}};
-			my $mid = $node->{id};
-			if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
-				$mime = PublicInbox::MIME->new($mime);
-				return thread_index_entry($ctx, $level, $mime);
+			if ($smsg = $inbox->smsg_mime($node->{smsg})) {
+				return thread_index_entry($ctx, $level, $smsg);
 			} else {
 				return ghost_index_entry($ctx, $level, $node);
 			}
@@ -445,24 +439,21 @@ sub thread_html {
 	return stream_thread($rootset, $ctx) unless $ctx->{flat};
 
 	# flat display: lazy load the full message from smsg
-	my $mime;
-	while ($mime = shift @$msgs) {
-		$mime = $inbox->msg_by_smsg($mime) and last;
+	my $smsg;
+	while (my $m = shift @$msgs) {
+		$smsg = $inbox->smsg_mime($m) and last;
 	}
-	return missing_thread($ctx) unless $mime;
-	$mime = PublicInbox::MIME->new($mime);
-	$ctx->{-title_html} = ascii_html($mime->header('Subject'));
-	$ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs);
-	$mime = undef;
+	return missing_thread($ctx) unless $smsg;
+	$ctx->{-title_html} = ascii_html($smsg->subject);
+	$ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+	$smsg = undef;
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $msgs;
-		while ($mime = shift @$msgs) {
-			$mime = $inbox->msg_by_smsg($mime) and last;
-		}
-		if ($mime) {
-			$mime = PublicInbox::MIME->new($mime);
-			return index_entry($mime, $ctx, scalar @$msgs);
+		$smsg = undef;
+		while (my $m = shift @$msgs) {
+			$smsg = $inbox->smsg_mime($m) and last;
 		}
+		return index_entry($smsg, $ctx, scalar @$msgs) if $smsg;
 		$msgs = undef;
 		$skel;
 	});
@@ -656,7 +647,7 @@ sub _msg_html_prepare {
 sub thread_skel {
 	my ($dst, $ctx, $hdr, $tpfx) = @_;
 	my $srch = $ctx->{srch};
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
+	my $mid = mids($hdr)->[0];
 	my $sres = $srch->get_thread($mid);
 	my $nr = $sres->{total};
 	my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f5ed271..a2c2a4a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -225,6 +225,7 @@ sub get_mid_txt {
 sub get_mid_html {
 	my ($ctx) = @_;
 	require PublicInbox::View;
+	searcher($ctx);
 	PublicInbox::View::msg_page($ctx) || r404($ctx);
 }
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index bb574a7..38eba2a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -33,8 +33,8 @@ sub response {
 sub getline {
 	my ($self) = @_;
 	if (my $middle = $self->{cb}) {
-		my $mime = $middle->();
-		return feed_entry($self, $mime) if $mime;
+		my $smsg = $middle->();
+		return feed_entry($self, $smsg) if $smsg;
 	}
 	delete $self->{cb} ? '</feed>' : undef;
 }
@@ -92,10 +92,11 @@ sub mid2uuid ($) {
 
 # returns undef or string
 sub feed_entry {
-	my ($self, $mime) = @_;
+	my ($self, $smsg) = @_;
 	my $ctx = $self->{ctx};
+	my $mime = $smsg->{mime};
 	my $hdr = $mime->header_obj;
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
+	my $mid = $smsg->mid;
 	my $irt = PublicInbox::View::in_reply_to($hdr);
 	my $uuid = mid2uuid($mid);
 	my $base = $ctx->{feed_base_url};
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index eaa3218..2a798d6 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -139,6 +139,32 @@ test_psgi(sub { $www->call(@_) }, sub {
 	foreach my $mid ('a-mid@b', $new_mid, $third) {
 		like($raw, qr/&lt;\Q$mid\E&gt;/s, "Message-ID $mid shown");
 	}
+	like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+	my $exp = [ qw(<a-mid@b> <reuse@mid>) ];
+	$mime->header_set('Message-Id', @$exp);
+	$mime->header_set('Subject', '4th dupe');
+	local $SIG{__WARN__} = sub {};
+	ok($im->add($mime), 'added one message');
+	$im->done;
+	my @h = $mime->header('Message-ID');
+	is_deeply($exp, \@h, 'reused existing Message-ID');
+
+	$config->each_inbox(sub { $_[0]->search->reopen });
+
+	$res = $cb->(GET('/v2test/new.atom'));
+	my @ids = ($res->content =~ m!<id>urn:uuid:([^<]+)</id>!sg);
+	my %ids;
+	$ids{$_}++ for @ids;
+	is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique');
+
+	$res = $cb->(GET('/v2test/reuse@mid/T/'));
+	$raw = $res->content;
+	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/');
+
+	$res = $cb->(GET('/v2test/reuse@mid/t/'));
+	$raw = $res->content;
+	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
 });
 
 done_testing();
-- 
EW


^ permalink raw reply related	[relevance 3%]

* [ANNOUNCE] public-inbox 1.1.0-pre1
@ 2018-05-09 20:23  5% Eric Wong
  0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2018-05-09 20:23 UTC (permalink / raw)
  To: meta; +Cc: Konstantin Ryabitsev

Pre-release for v2 repository support.
Thanks to The Linux Foundation for supporting this work!

https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz

SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009
(only intended to guard against accidental file corruption)

shortlog below:

Eric Wong (27):
      nntp: improve fairness during XOVER and similar commands
      nntp: do not drain rbuf if there is a command pending
      extmsg: use news.gmane.org for Message-ID lookups
      searchview: fix non-numeric comparison
      mbox: do not barf on queries which return no results
      nntp: allow and ignore empty commands
      ensure SQLite and Xapian files respect core.sharedRepository
      TODO: a few more updates
      filter/rubylang: do not set altid on spam training
      import: cleanup git cat-file processes when ->done
      disallow "\t" and "\n" in OVER headers
      searchidx: release lock again during v1 batch callback
      searchidx: remove leftover debugging code
      convert: copy description and git config from v1 repo
      view: untangle loop when showing message headers
      view: wrap To: and Cc: headers in HTML display
      view: drop redundant References: display code
      TODO: add EPOLLEXCLUSIVE item
      searchview: do not blindly append "l" parameter to URL
      search: avoid repeated mbox results from search
      msgmap: add limit to response for NNTP
      thread: prevent hidden threads in /$INBOX/ landing page
      thread: sort incoming messages by Date
      searchidx: preserve umask when starting/committing transactions
      scripts/import_slrnspool: support v2 repos
      scripts/import_slrnspool: cleanup progress messages
      public-inbox 1.1.0-pre1

Eric Wong (Contractor, The Linux Foundation) (239):
      AUTHORS: add The Linux Foundation
      watch_maildir: allow '-' in mail filename
      scripts/import_vger_from_mbox: relax From_ line match slightly
      import: stop writing legacy ssoma.index by default
      import: begin supporting this without ssoma.lock
      import: initial handling for v2
      t/import: test for last_object_id insertion
      content_id: add test case
      searchmsg: add mid_mime import for _extract_mid
      scripts/import_vger_from_mbox: support --dry-run option
      import: APIs to support v2 use
      search: free up 'Q' prefix for a real unique identifier
      searchidx: fix comment around next_thread_id
      address: extract more characters from email addresses
      import: pass "raw" dates to git-fast-import(1)
      scripts/import_vger_from_mbox: use v2 layout for import
      import: quiet down warnings from bogus From: lines
      import: allow the epoch (0s) as a valid time
      extmsg: fix broken Xapian MID lookup
      search: stop assuming Message-ID is unique
      www: stop assuming mainrepo == git_dir
      v2writable: initial cut for repo-rotation
      git: reload alternates file on missing blob
      v2: support Xapian + SQLite indexing
      import_vger_from_inbox: allow "-V" option
      import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering
      v2: parallelize Xapian indexing
      v2writable: round-robin to partitions based on article number
      searchidxpart: increase pipe size for partitions
      v2writable: warn on duplicate Message-IDs
      searchidx: do not modify Xapian DB while iterating
      v2/ui: some hacky things to get the PSGI UI to show up
      v2/ui: retry DB reopens in a few more places
      v2writable: cleanup unused pipes in partitions
      searchidxpart: binmode
      use PublicInbox::MIME consistently
      searchidxpart: chomp line before splitting
      searchidx*: name child subprocesses
      searchidx: get rid of pointless index_blob wrapper
      view: remove X-PI-TS reference
      searchidxthread: load doc data for references
      searchidxpart: force integers into add_message
      search: reopen skeleton DB as well
      searchidx: index values in the threader
      search: use different Enquire object for skeleton queries
      rename SearchIdxThread to SearchIdxSkeleton
      v2writable: commit to skeleton via remote partitions
      searchidxskeleton: extra error checking
      searchidx: do not modify Xapian DB while iterating
      search: query_xover uses skeleton DB iff available
      v2/ui: get nntpd and init tests running on v2
      v2writable: delete ::Import obj when ->done
      search: remove informational "warning" message
      searchidx: add PID to error message when die-ing
      content_id: special treatment for Message-Id headers
      evcleanup: disable outside of daemon
      v2writable: deduplicate detection on add
      evcleanup: do not create event loop if nothing was registered
      mid: add `mids' and `references' methods for extraction
      content_id: use `mids' and `references' for MID extraction
      searchidx: use new `references' method for parsing References
      content_id: no need to be human-friendly
      v2writable: inject new Message-IDs on true duplicates
      search: revert to using 'Q' as a uniQue id per-Xapian conventions
      searchidx: support indexing multiple MIDs
      mid: be strict with References, but loose on Message-Id
      searchidx: avoid excessive XNQ indexing with diffs
      searchidxskeleton: add a note about locking
      v2writable: generated Message-ID goes first
      searchidx: use add_boolean_term for internal terms
      searchidx: add NNTP article number as a searchable term
      mid: truncate excessively long MIDs early
      nntp: use NNTP article numbers for lookups
      nntp: fix NEWNEWS command
      searchidx: store the primary MID in doc data for NNTP
      import: consolidate object info for v2 imports
      v2: avoid redundant/repeated configs for git partition repos
      INSTALL: document more optional dependencies
      search: favor skeleton DB for lookup_mail
      search: each_smsg_by_mid uses skeleton if available
      v2writable: remove unnecessary skeleton commit
      favor Received: date over Date: header globally
      import: fall back to Sender for extracting name and email
      scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping
      v2writable: detect and use previous partition count
      extmsg: rework partial MID matching to favor current inbox
      extmsg: rework partial MID matching to favor current inbox
      content_id: use Sender header if From is not available
      v2writable: support "barrier" operation to avoid reforking
      use string ref for Email::Simple->new
      v2writable: remove unnecessary idx_init call
      searchidx: do not delete documents while iterating
      search: allow ->reopen to be chainable
      v2writable: implement remove correctly
      skeleton: barrier init requires a lock
      import: (v2) delete writes the blob into history in subdir
      import: (v2): write deletes to a separate '_' subdirectory
      import: implement barrier operation for v1 repos
      mid: mid_mime uses v2-compatible mids function
      watchmaildir: use content_digest to generate Message-Id
      import: force Message-ID generation for v1 here
      import: switch to URL-safe Base64 for Message-IDs
      v2writable: test for idempotent removals
      import: enable locking under v2
      index: s/GIT_DIR/REPO_DIR/
      Lock: new base class for writable lockers
      t/watch_maildir: note the reason for FIFO creation
      v2writable: ensure ->done is idempotent
      watchmaildir: support v2 repositories
      searchidxpart: s/barrier/remote_barrier/
      v2writable: allow disabling parallelization
      scripts/import_vger_from_mbox: filter out same headers as MDA
      v2writable: add DEBUG_DIFF env support
      v2writable: remove "resent" message for duplicate Message-IDs
      content_id: do not take Message-Id into account
      introduce InboxWritable class
      import: discard all the same headers as MDA
      InboxWritable: add mbox/maildir parsing + import logic
      use both Date: and Received: times
      msgmap: add tmp_clone to create an anonymous copy
      fix syntax warnings
      v2writable: support reindexing Xapian
      t/altid.t: extra tests for mid_set
      v2writable: add NNTP article number regeneration support
      v2writable: clarify header cleanups
      v2writable: DEBUG_DIFF respects $TMPDIR
      feed: $INBOX/new.atom endpoint supports v2 inboxes
      import: consolidate mid prepend logic, here
      www: $MESSAGE_ID/raw endpoint supports "duplicates"
      search: reopen DB if each_smsg_by_mid fails
      t/psgi_v2: minimal test for Atom feed and t.mbox.gz
      feed: fix new.html for v2
      view: permalink (per-message) view shows multiple messages
      searchidx: warn about vivifying multiple ghosts
      v2writable: warn on unseen deleted files
      www: get rid of unnecessary 'inbox' name reference
      searchview: remove unnecessary imports from MID module
      view: depend on SearchMsg for Message-ID
      http: fix modification of read-only value
      githttpbackend: avoid infinite loop on generic PSGI servers
      www: support cloning individual v2 git partitions
      http: fix modification of read-only value
      githttpbackend: avoid infinite loop on generic PSGI servers
      www: remove unnecessary ghost checks
      v2writable: append, instead of prepending generated Message-ID
      lookup by Message-ID favors the "primary" one
      www: fix attachment downloads for conflicted Message-IDs
      searchmsg: document why we store To: and Cc: for NNTP
      public-inbox-convert: tool for converting old to new inboxes
      v2writable: support purging messages from git entirely
      search: cleanup uniqueness checking
      search: get rid of most lookup_* subroutines
      search: move find_doc_ids to searchidx
      v2writable: cleanup: get rid of unused fields
      mbox: avoid extracting Message-ID for linkification
      www: cleanup expensive fallback for legacy URLs
      view: get rid of some unnecessary imports
      search: retry_reopen on first_smsg_by_mid
      import: run_die supports redirects as spawn does
      v2writable: initializing an existing inbox is idempotent
      public-inbox-compact: new tool for driving xapian-compact
      mda: support v2 inboxes
      search: warn on reopens and die on total failure
      v2writable: allow gaps in git partitions
      v2writable: convert some fatal reindex errors to warnings
      wwwstream: flesh out clone instructions for v2
      v2writable: go backwards through alternate Message-IDs
      view: speed up homepage loading time with date clamp
      view: drop load_results
      feed: optimize query for feeds, too
      msgtime: parse 3-digit years properly
      convert: avoid redundant "done\n" statement for fast-import
      search: move permissions handling to InboxWritable
      t/v2writable: use simplify permissions reading
      v2: respect core.sharedRepository in git configs
      searchidx: correct warning for over-vivification
      v2: one file, really
      v2writable: fix parallel termination
      truncate Message-IDs and References consistently
      scripts/import_vger_from_mbox: set address properly
      search: reduce columns stored in Xapian
      replace Xapian skeleton with SQLite overview DB
      v2writable: simplify barrier vs checkpoints
      t/over: test empty Subject: line matching
      www: rework query responses to avoid COUNT in SQLite
      over: speedup get_thread by avoiding JOIN
      nntp: fix NEWNEWS command
      t/thread-all.t: modernize test to support modern inboxes
      rename+rewrite test using Benchmark module
      nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
      view: avoid offset during pagination
      mbox: remove remaining OFFSET usage in SQLite
      msgmap: replace id_batch with ids_after
      nntp: simplify the long_response API
      searchidx: ensure duplicated Message-IDs can be linked together
      init: s/GIT_DIR/REPO_DIR/ in usage
      import: rewrite less history during purge
      v2: support incremental indexing + purge
      v2writable: do not modify DBs while iterating for ->remove
      v2writable: recount partitions after acquiring lock
      searchmsg: remove unused `tid' and `path' methods
      search: remove unnecessary OP_AND of query
      mbox: do not sort search results
      searchview: minor cleanup
      support altid mechanism for v2
      compact: better handling of over.sqlite3* files
      v2writable: remove redundant remove from Over DB
      v2writable: allow tracking parallel versions
      v2writable: refer to git each repository as "epoch"
      over: use only supported and safe SQLite APIs
      search: index and allow searching by date-time
      altid: fix miscopied field name
      nntp: set Xref across multiple inboxes
      www: favor reading more from SQLite, and less from Xapian
      ensure Xapian and SQLite are still optional for v1 tests
      psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological
      over: avoid excessive SELECT
      over: remove forked subprocess
      v2writable: reduce barriers
      index: allow specifying --jobs=0 to disable multiprocess
      convert: support converting with altid defined
      store less data in the Xapian document
      msgmap: speed up minmax with separate queries
      feed: respect feedmax, again
      v1: remove articles from overview DB
      compact: do not merge v2 repos by default
      v2writable: reduce partititions by one
      search: preserve References in Xapian smsg for x=t view
      v2: generate better Message-IDs for duplicates
      v2: improve deduplication checks
      import: cat_blob drops leading 'From ' lines like Inbox
      searchidx: regenerate and avoid article number gaps on full index
      extmsg: remove expensive git path checks
      use %H consistently to disable abbreviations
      searchidx: increase term positions for all text terms
      searchidx: revert default BATCH_BYTES to 1_000_000
      Merge remote-tracking branch 'origin/master' into v2
      fix tests to run without Xapian installed
      extmsg: use Xapian only for partial matches

Jonathan Corbet (3):
      Don't use LIMIT in UPDATE statements
      Update the installation instructions with Fedora package names
      Allow specification of the number of search results to return
-- 
git clone https://public-inbox.org/ public-inbox
(working on a homepage... sorta :)

^ permalink raw reply	[relevance 5%]

* [PATCH] view: remove unused _msg_date sub
@ 2019-01-30  7:38  5% Eric Wong
  0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2019-01-30  7:38 UTC (permalink / raw)
  To: meta

Not needed since commit 956abe9ad5f13a0d1755262be412d6a54fda72e9
("view: depend on SearchMsg for Message-ID")
---
 lib/PublicInbox/View.pm | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index cd125e0..4c1c670 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -867,11 +867,6 @@ sub missing_thread {
 	PublicInbox::ExtMsg::ext_msg($ctx);
 }
 
-sub _msg_date {
-	my ($hdr) = @_;
-	fmt_ts(msg_datestamp($hdr));
-}
-
 sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
 sub dedupe_subject {
-- 
EW


^ permalink raw reply related	[relevance 5%]

* v2 merged to master
@ 2018-04-19  1:20  5% Eric Wong
  0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2018-04-19  1:20 UTC (permalink / raw)
  To: meta

I actually merged master into v2, so it's a bit backwards :P

	commit cfb8d16578e7f2f2e300f9f436205e4a8fc7f322
	Merge: 1dc0f0c 119463b
	Author: Eric Wong (Contractor, The Linux Foundation) <e@80x24.org>
	Date:   Wed Apr 18 20:58:35 2018 +0000

	    Merge remote-tracking branch 'origin/master' into v2

I screwed up the indexing on http://hjrcffqmbrq6wope.onion/git/
so that's still going, but I think I was able to update the rest
of them (including the heavily trafficked non-.onion) w/o downtime.

The mirror at http://czquwvybam4bgbro.onion/git/ has been running
the v2 code for over a week, now.

Thanks to the Linux Foundation for funding this work.  Will still
need to make some documentation updates and such.

Eric Wong (Contractor, The Linux Foundation) (237):
      AUTHORS: add The Linux Foundation
      watch_maildir: allow '-' in mail filename
      scripts/import_vger_from_mbox: relax From_ line match slightly
      import: stop writing legacy ssoma.index by default
      import: begin supporting this without ssoma.lock
      import: initial handling for v2
      t/import: test for last_object_id insertion
      content_id: add test case
      searchmsg: add mid_mime import for _extract_mid
      scripts/import_vger_from_mbox: support --dry-run option
      import: APIs to support v2 use
      search: free up 'Q' prefix for a real unique identifier
      searchidx: fix comment around next_thread_id
      address: extract more characters from email addresses
      import: pass "raw" dates to git-fast-import(1)
      scripts/import_vger_from_mbox: use v2 layout for import
      import: quiet down warnings from bogus From: lines
      import: allow the epoch (0s) as a valid time
      extmsg: fix broken Xapian MID lookup
      search: stop assuming Message-ID is unique
      www: stop assuming mainrepo == git_dir
      v2writable: initial cut for repo-rotation
      git: reload alternates file on missing blob
      v2: support Xapian + SQLite indexing
      import_vger_from_inbox: allow "-V" option
      import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering
      v2: parallelize Xapian indexing
      v2writable: round-robin to partitions based on article number
      searchidxpart: increase pipe size for partitions
      v2writable: warn on duplicate Message-IDs
      searchidx: do not modify Xapian DB while iterating
      v2/ui: some hacky things to get the PSGI UI to show up
      v2/ui: retry DB reopens in a few more places
      v2writable: cleanup unused pipes in partitions
      searchidxpart: binmode
      use PublicInbox::MIME consistently
      searchidxpart: chomp line before splitting
      searchidx*: name child subprocesses
      searchidx: get rid of pointless index_blob wrapper
      view: remove X-PI-TS reference
      searchidxthread: load doc data for references
      searchidxpart: force integers into add_message
      search: reopen skeleton DB as well
      searchidx: index values in the threader
      search: use different Enquire object for skeleton queries
      rename SearchIdxThread to SearchIdxSkeleton
      v2writable: commit to skeleton via remote partitions
      searchidxskeleton: extra error checking
      searchidx: do not modify Xapian DB while iterating
      search: query_xover uses skeleton DB iff available
      v2/ui: get nntpd and init tests running on v2
      v2writable: delete ::Import obj when ->done
      search: remove informational "warning" message
      searchidx: add PID to error message when die-ing
      content_id: special treatment for Message-Id headers
      evcleanup: disable outside of daemon
      v2writable: deduplicate detection on add
      evcleanup: do not create event loop if nothing was registered
      mid: add `mids' and `references' methods for extraction
      content_id: use `mids' and `references' for MID extraction
      searchidx: use new `references' method for parsing References
      content_id: no need to be human-friendly
      v2writable: inject new Message-IDs on true duplicates
      search: revert to using 'Q' as a uniQue id per-Xapian conventions
      searchidx: support indexing multiple MIDs
      mid: be strict with References, but loose on Message-Id
      searchidx: avoid excessive XNQ indexing with diffs
      searchidxskeleton: add a note about locking
      v2writable: generated Message-ID goes first
      searchidx: use add_boolean_term for internal terms
      searchidx: add NNTP article number as a searchable term
      mid: truncate excessively long MIDs early
      nntp: use NNTP article numbers for lookups
      nntp: fix NEWNEWS command
      searchidx: store the primary MID in doc data for NNTP
      import: consolidate object info for v2 imports
      v2: avoid redundant/repeated configs for git partition repos
      INSTALL: document more optional dependencies
      search: favor skeleton DB for lookup_mail
      search: each_smsg_by_mid uses skeleton if available
      v2writable: remove unnecessary skeleton commit
      favor Received: date over Date: header globally
      import: fall back to Sender for extracting name and email
      scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping
      v2writable: detect and use previous partition count
      extmsg: rework partial MID matching to favor current inbox
      extmsg: rework partial MID matching to favor current inbox
      content_id: use Sender header if From is not available
      v2writable: support "barrier" operation to avoid reforking
      use string ref for Email::Simple->new
      v2writable: remove unnecessary idx_init call
      searchidx: do not delete documents while iterating
      search: allow ->reopen to be chainable
      v2writable: implement remove correctly
      skeleton: barrier init requires a lock
      import: (v2) delete writes the blob into history in subdir
      import: (v2): write deletes to a separate '_' subdirectory
      import: implement barrier operation for v1 repos
      mid: mid_mime uses v2-compatible mids function
      watchmaildir: use content_digest to generate Message-Id
      import: force Message-ID generation for v1 here
      import: switch to URL-safe Base64 for Message-IDs
      v2writable: test for idempotent removals
      import: enable locking under v2
      index: s/GIT_DIR/REPO_DIR/
      Lock: new base class for writable lockers
      t/watch_maildir: note the reason for FIFO creation
      v2writable: ensure ->done is idempotent
      watchmaildir: support v2 repositories
      searchidxpart: s/barrier/remote_barrier/
      v2writable: allow disabling parallelization
      scripts/import_vger_from_mbox: filter out same headers as MDA
      v2writable: add DEBUG_DIFF env support
      v2writable: remove "resent" message for duplicate Message-IDs
      content_id: do not take Message-Id into account
      introduce InboxWritable class
      import: discard all the same headers as MDA
      InboxWritable: add mbox/maildir parsing + import logic
      use both Date: and Received: times
      msgmap: add tmp_clone to create an anonymous copy
      fix syntax warnings
      v2writable: support reindexing Xapian
      t/altid.t: extra tests for mid_set
      v2writable: add NNTP article number regeneration support
      v2writable: clarify header cleanups
      v2writable: DEBUG_DIFF respects $TMPDIR
      feed: $INBOX/new.atom endpoint supports v2 inboxes
      import: consolidate mid prepend logic, here
      www: $MESSAGE_ID/raw endpoint supports "duplicates"
      search: reopen DB if each_smsg_by_mid fails
      t/psgi_v2: minimal test for Atom feed and t.mbox.gz
      feed: fix new.html for v2
      view: permalink (per-message) view shows multiple messages
      searchidx: warn about vivifying multiple ghosts
      v2writable: warn on unseen deleted files
      www: get rid of unnecessary 'inbox' name reference
      searchview: remove unnecessary imports from MID module
      view: depend on SearchMsg for Message-ID
      http: fix modification of read-only value
      githttpbackend: avoid infinite loop on generic PSGI servers
      www: support cloning individual v2 git partitions
      http: fix modification of read-only value
      githttpbackend: avoid infinite loop on generic PSGI servers
      www: remove unnecessary ghost checks
      v2writable: append, instead of prepending generated Message-ID
      lookup by Message-ID favors the "primary" one
      www: fix attachment downloads for conflicted Message-IDs
      searchmsg: document why we store To: and Cc: for NNTP
      public-inbox-convert: tool for converting old to new inboxes
      v2writable: support purging messages from git entirely
      search: cleanup uniqueness checking
      search: get rid of most lookup_* subroutines
      search: move find_doc_ids to searchidx
      v2writable: cleanup: get rid of unused fields
      mbox: avoid extracting Message-ID for linkification
      www: cleanup expensive fallback for legacy URLs
      view: get rid of some unnecessary imports
      search: retry_reopen on first_smsg_by_mid
      import: run_die supports redirects as spawn does
      v2writable: initializing an existing inbox is idempotent
      public-inbox-compact: new tool for driving xapian-compact
      mda: support v2 inboxes
      search: warn on reopens and die on total failure
      v2writable: allow gaps in git partitions
      v2writable: convert some fatal reindex errors to warnings
      wwwstream: flesh out clone instructions for v2
      v2writable: go backwards through alternate Message-IDs
      view: speed up homepage loading time with date clamp
      view: drop load_results
      feed: optimize query for feeds, too
      msgtime: parse 3-digit years properly
      convert: avoid redundant "done\n" statement for fast-import
      search: move permissions handling to InboxWritable
      t/v2writable: use simplify permissions reading
      v2: respect core.sharedRepository in git configs
      searchidx: correct warning for over-vivification
      v2: one file, really
      v2writable: fix parallel termination
      truncate Message-IDs and References consistently
      scripts/import_vger_from_mbox: set address properly
      search: reduce columns stored in Xapian
      replace Xapian skeleton with SQLite overview DB
      v2writable: simplify barrier vs checkpoints
      t/over: test empty Subject: line matching
      www: rework query responses to avoid COUNT in SQLite
      over: speedup get_thread by avoiding JOIN
      nntp: fix NEWNEWS command
      t/thread-all.t: modernize test to support modern inboxes
      rename+rewrite test using Benchmark module
      nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
      view: avoid offset during pagination
      mbox: remove remaining OFFSET usage in SQLite
      msgmap: replace id_batch with ids_after
      nntp: simplify the long_response API
      searchidx: ensure duplicated Message-IDs can be linked together
      init: s/GIT_DIR/REPO_DIR/ in usage
      import: rewrite less history during purge
      v2: support incremental indexing + purge
      v2writable: do not modify DBs while iterating for ->remove
      v2writable: recount partitions after acquiring lock
      searchmsg: remove unused `tid' and `path' methods
      search: remove unnecessary OP_AND of query
      mbox: do not sort search results
      searchview: minor cleanup
      support altid mechanism for v2
      compact: better handling of over.sqlite3* files
      v2writable: remove redundant remove from Over DB
      v2writable: allow tracking parallel versions
      v2writable: refer to git each repository as "epoch"
      over: use only supported and safe SQLite APIs
      search: index and allow searching by date-time
      altid: fix miscopied field name
      nntp: set Xref across multiple inboxes
      www: favor reading more from SQLite, and less from Xapian
      ensure Xapian and SQLite are still optional for v1 tests
      psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological
      over: avoid excessive SELECT
      over: remove forked subprocess
      v2writable: reduce barriers
      index: allow specifying --jobs=0 to disable multiprocess
      convert: support converting with altid defined
      store less data in the Xapian document
      msgmap: speed up minmax with separate queries
      feed: respect feedmax, again
      v1: remove articles from overview DB
      compact: do not merge v2 repos by default
      v2writable: reduce partititions by one
      search: preserve References in Xapian smsg for x=t view
      v2: generate better Message-IDs for duplicates
      v2: improve deduplication checks
      import: cat_blob drops leading 'From ' lines like Inbox
      searchidx: regenerate and avoid article number gaps on full index
      extmsg: remove expensive git path checks
      use %H consistently to disable abbreviations
      searchidx: increase term positions for all text terms
      searchidx: revert default BATCH_BYTES to 1_000_000
      Merge remote-tracking branch 'origin/master' into v2

^ permalink raw reply	[relevance 5%]

* [PATCH 00/11] duplicate support in UI + tests
@ 2018-03-27 11:11  6% Eric Wong (Contractor, The Linux Foundation)
  2018-03-27 11:11  3% ` [PATCH 11/11] view: depend on SearchMsg for Message-ID Eric Wong (Contractor, The Linux Foundation)
  0 siblings, 1 reply; 6+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
  To: meta

Threading messages sharing the same Message-ID requires
using our "preferred" Message-ID.

For the Atom feed and new.html, this requires some effort to
fake SearchMsg results for v1 where Xapian was optional, but
should allow us to handle corner cases where legit messages
share a Message-ID.

I'm not sure if messages with the same Message-ID should
be rendered as relatives, or if they should only be grouped
together in the same rootset (as if their "Subject:" matches)
but not rendered as ancestors.

Eric Wong (Contractor, The Linux Foundation) (11):
  import: consolidate mid prepend logic, here
  www: $MESSAGE_ID/raw endpoint supports "duplicates"
  search: reopen DB if each_smsg_by_mid fails
  t/psgi_v2: minimal test for Atom feed and t.mbox.gz
  feed: fix new.html for v2
  view: permalink (per-message) view shows multiple messages
  searchidx: warn about vivifying multiple ghosts
  v2writable: warn on unseen deleted files
  www: get rid of unnecessary 'inbox' name reference
  searchview: remove unnecessary imports from MID module
  view: depend on SearchMsg for Message-ID

 MANIFEST                         |   1 +
 lib/PublicInbox/Feed.pm          |  98 +++++++++------------
 lib/PublicInbox/Import.pm        |  31 +++++--
 lib/PublicInbox/Inbox.pm         |   9 ++
 lib/PublicInbox/Mbox.pm          |  65 ++++++++++++--
 lib/PublicInbox/Search.pm        |   6 ++
 lib/PublicInbox/SearchIdx.pm     |   4 +
 lib/PublicInbox/SearchMsg.pm     |  13 ++-
 lib/PublicInbox/SearchView.pm    |  16 ++--
 lib/PublicInbox/V2Writable.pm    |  15 +++-
 lib/PublicInbox/View.pm          | 180 +++++++++++++++++++++++++++------------
 lib/PublicInbox/WWW.pm           |  10 +--
 lib/PublicInbox/WwwAtomStream.pm |   9 +-
 t/psgi_v2.t                      | 172 +++++++++++++++++++++++++++++++++++++
 14 files changed, 474 insertions(+), 155 deletions(-)
 create mode 100644 t/psgi_v2.t

-- 
EW

^ permalink raw reply	[relevance 6%]

* [PATCH] doc: add release notes directory
@ 2019-09-14 19:50  7% Eric Wong
  0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2019-09-14 19:50 UTC (permalink / raw)
  To: meta

The v1.2.0 is a work-in-progress, while the others are copied
out of our mail archives.

Eventually, a NEWS file will be generated from these emails and
distributed in the release tarball.  There'll also be an Atom
feed for the website reusing our feed generation code.
---
 .gitattributes                         |   2 +
 Documentation/RelNotes/v1.0.0.eml      |  21 ++
 Documentation/RelNotes/v1.1.0-pre1.eml | 295 +++++++++++++++++++++++++
 Documentation/RelNotes/v1.2.0.wip      |  40 ++++
 MANIFEST                               |   4 +
 5 files changed, 362 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 Documentation/RelNotes/v1.0.0.eml
 create mode 100644 Documentation/RelNotes/v1.1.0-pre1.eml
 create mode 100644 Documentation/RelNotes/v1.2.0.wip

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..bb53518
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Email signatures start with "-- \n"
+*.eml whitespace=-blank-at-eol
diff --git a/Documentation/RelNotes/v1.0.0.eml b/Documentation/RelNotes/v1.0.0.eml
new file mode 100644
index 0000000..ae6ea4e
--- /dev/null
+++ b/Documentation/RelNotes/v1.0.0.eml
@@ -0,0 +1,21 @@
+From e@80x24.org Thu Feb  8 02:33:57 2018
+Date: Thu, 8 Feb 2018 02:33:57 +0000
+From: Eric Wong <e@80x24.org>
+To: meta@public-inbox.org
+Subject: [ANNOUNCE] public-inbox 1.0.0
+Message-ID: <20180208023357.GA32591@80x24.org>
+
+After some 3.5 odd years of working on this, I suppose now is
+as good a time as any to tar this up and call it 1.0.0.
+
+The TODO list is still very long and there'll be some new
+development in coming weeks :>
+
+So, here you have a release:
+
+        https://public-inbox.org/releases/public-inbox-1.0.0.tar.gz
+
+Checksums, mainly as a safeguard against accidental file corruption:
+
+SHA-256 4a08569f3d99310f713bb32bec0aa4819d6b41871e0421ec4eec0657a5582216
+	(in other words, don't trust me; instead read the code :>)
diff --git a/Documentation/RelNotes/v1.1.0-pre1.eml b/Documentation/RelNotes/v1.1.0-pre1.eml
new file mode 100644
index 0000000..ee1ecc3
--- /dev/null
+++ b/Documentation/RelNotes/v1.1.0-pre1.eml
@@ -0,0 +1,295 @@
+From e@80x24.org Wed May  9 20:23:03 2018
+Date: Wed, 9 May 2018 20:23:03 +0000
+From: Eric Wong <e@80x24.org>
+To: meta@public-inbox.org
+Cc: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
+Subject: [ANNOUNCE] public-inbox 1.1.0-pre1
+Message-ID: <20180509202303.GA15156@dcvr>
+
+Pre-release for v2 repository support.
+Thanks to The Linux Foundation for supporting this work!
+
+https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz
+
+SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009
+(only intended to guard against accidental file corruption)
+
+shortlog below:
+
+Eric Wong (27):
+      nntp: improve fairness during XOVER and similar commands
+      nntp: do not drain rbuf if there is a command pending
+      extmsg: use news.gmane.org for Message-ID lookups
+      searchview: fix non-numeric comparison
+      mbox: do not barf on queries which return no results
+      nntp: allow and ignore empty commands
+      ensure SQLite and Xapian files respect core.sharedRepository
+      TODO: a few more updates
+      filter/rubylang: do not set altid on spam training
+      import: cleanup git cat-file processes when ->done
+      disallow "\t" and "\n" in OVER headers
+      searchidx: release lock again during v1 batch callback
+      searchidx: remove leftover debugging code
+      convert: copy description and git config from v1 repo
+      view: untangle loop when showing message headers
+      view: wrap To: and Cc: headers in HTML display
+      view: drop redundant References: display code
+      TODO: add EPOLLEXCLUSIVE item
+      searchview: do not blindly append "l" parameter to URL
+      search: avoid repeated mbox results from search
+      msgmap: add limit to response for NNTP
+      thread: prevent hidden threads in /$INBOX/ landing page
+      thread: sort incoming messages by Date
+      searchidx: preserve umask when starting/committing transactions
+      scripts/import_slrnspool: support v2 repos
+      scripts/import_slrnspool: cleanup progress messages
+      public-inbox 1.1.0-pre1
+
+Eric Wong (Contractor, The Linux Foundation) (239):
+      AUTHORS: add The Linux Foundation
+      watch_maildir: allow '-' in mail filename
+      scripts/import_vger_from_mbox: relax From_ line match slightly
+      import: stop writing legacy ssoma.index by default
+      import: begin supporting this without ssoma.lock
+      import: initial handling for v2
+      t/import: test for last_object_id insertion
+      content_id: add test case
+      searchmsg: add mid_mime import for _extract_mid
+      scripts/import_vger_from_mbox: support --dry-run option
+      import: APIs to support v2 use
+      search: free up 'Q' prefix for a real unique identifier
+      searchidx: fix comment around next_thread_id
+      address: extract more characters from email addresses
+      import: pass "raw" dates to git-fast-import(1)
+      scripts/import_vger_from_mbox: use v2 layout for import
+      import: quiet down warnings from bogus From: lines
+      import: allow the epoch (0s) as a valid time
+      extmsg: fix broken Xapian MID lookup
+      search: stop assuming Message-ID is unique
+      www: stop assuming mainrepo == git_dir
+      v2writable: initial cut for repo-rotation
+      git: reload alternates file on missing blob
+      v2: support Xapian + SQLite indexing
+      import_vger_from_inbox: allow "-V" option
+      import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering
+      v2: parallelize Xapian indexing
+      v2writable: round-robin to partitions based on article number
+      searchidxpart: increase pipe size for partitions
+      v2writable: warn on duplicate Message-IDs
+      searchidx: do not modify Xapian DB while iterating
+      v2/ui: some hacky things to get the PSGI UI to show up
+      v2/ui: retry DB reopens in a few more places
+      v2writable: cleanup unused pipes in partitions
+      searchidxpart: binmode
+      use PublicInbox::MIME consistently
+      searchidxpart: chomp line before splitting
+      searchidx*: name child subprocesses
+      searchidx: get rid of pointless index_blob wrapper
+      view: remove X-PI-TS reference
+      searchidxthread: load doc data for references
+      searchidxpart: force integers into add_message
+      search: reopen skeleton DB as well
+      searchidx: index values in the threader
+      search: use different Enquire object for skeleton queries
+      rename SearchIdxThread to SearchIdxSkeleton
+      v2writable: commit to skeleton via remote partitions
+      searchidxskeleton: extra error checking
+      searchidx: do not modify Xapian DB while iterating
+      search: query_xover uses skeleton DB iff available
+      v2/ui: get nntpd and init tests running on v2
+      v2writable: delete ::Import obj when ->done
+      search: remove informational "warning" message
+      searchidx: add PID to error message when die-ing
+      content_id: special treatment for Message-Id headers
+      evcleanup: disable outside of daemon
+      v2writable: deduplicate detection on add
+      evcleanup: do not create event loop if nothing was registered
+      mid: add `mids' and `references' methods for extraction
+      content_id: use `mids' and `references' for MID extraction
+      searchidx: use new `references' method for parsing References
+      content_id: no need to be human-friendly
+      v2writable: inject new Message-IDs on true duplicates
+      search: revert to using 'Q' as a uniQue id per-Xapian conventions
+      searchidx: support indexing multiple MIDs
+      mid: be strict with References, but loose on Message-Id
+      searchidx: avoid excessive XNQ indexing with diffs
+      searchidxskeleton: add a note about locking
+      v2writable: generated Message-ID goes first
+      searchidx: use add_boolean_term for internal terms
+      searchidx: add NNTP article number as a searchable term
+      mid: truncate excessively long MIDs early
+      nntp: use NNTP article numbers for lookups
+      nntp: fix NEWNEWS command
+      searchidx: store the primary MID in doc data for NNTP
+      import: consolidate object info for v2 imports
+      v2: avoid redundant/repeated configs for git partition repos
+      INSTALL: document more optional dependencies
+      search: favor skeleton DB for lookup_mail
+      search: each_smsg_by_mid uses skeleton if available
+      v2writable: remove unnecessary skeleton commit
+      favor Received: date over Date: header globally
+      import: fall back to Sender for extracting name and email
+      scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping
+      v2writable: detect and use previous partition count
+      extmsg: rework partial MID matching to favor current inbox
+      extmsg: rework partial MID matching to favor current inbox
+      content_id: use Sender header if From is not available
+      v2writable: support "barrier" operation to avoid reforking
+      use string ref for Email::Simple->new
+      v2writable: remove unnecessary idx_init call
+      searchidx: do not delete documents while iterating
+      search: allow ->reopen to be chainable
+      v2writable: implement remove correctly
+      skeleton: barrier init requires a lock
+      import: (v2) delete writes the blob into history in subdir
+      import: (v2): write deletes to a separate '_' subdirectory
+      import: implement barrier operation for v1 repos
+      mid: mid_mime uses v2-compatible mids function
+      watchmaildir: use content_digest to generate Message-Id
+      import: force Message-ID generation for v1 here
+      import: switch to URL-safe Base64 for Message-IDs
+      v2writable: test for idempotent removals
+      import: enable locking under v2
+      index: s/GIT_DIR/REPO_DIR/
+      Lock: new base class for writable lockers
+      t/watch_maildir: note the reason for FIFO creation
+      v2writable: ensure ->done is idempotent
+      watchmaildir: support v2 repositories
+      searchidxpart: s/barrier/remote_barrier/
+      v2writable: allow disabling parallelization
+      scripts/import_vger_from_mbox: filter out same headers as MDA
+      v2writable: add DEBUG_DIFF env support
+      v2writable: remove "resent" message for duplicate Message-IDs
+      content_id: do not take Message-Id into account
+      introduce InboxWritable class
+      import: discard all the same headers as MDA
+      InboxWritable: add mbox/maildir parsing + import logic
+      use both Date: and Received: times
+      msgmap: add tmp_clone to create an anonymous copy
+      fix syntax warnings
+      v2writable: support reindexing Xapian
+      t/altid.t: extra tests for mid_set
+      v2writable: add NNTP article number regeneration support
+      v2writable: clarify header cleanups
+      v2writable: DEBUG_DIFF respects $TMPDIR
+      feed: $INBOX/new.atom endpoint supports v2 inboxes
+      import: consolidate mid prepend logic, here
+      www: $MESSAGE_ID/raw endpoint supports "duplicates"
+      search: reopen DB if each_smsg_by_mid fails
+      t/psgi_v2: minimal test for Atom feed and t.mbox.gz
+      feed: fix new.html for v2
+      view: permalink (per-message) view shows multiple messages
+      searchidx: warn about vivifying multiple ghosts
+      v2writable: warn on unseen deleted files
+      www: get rid of unnecessary 'inbox' name reference
+      searchview: remove unnecessary imports from MID module
+      view: depend on SearchMsg for Message-ID
+      http: fix modification of read-only value
+      githttpbackend: avoid infinite loop on generic PSGI servers
+      www: support cloning individual v2 git partitions
+      http: fix modification of read-only value
+      githttpbackend: avoid infinite loop on generic PSGI servers
+      www: remove unnecessary ghost checks
+      v2writable: append, instead of prepending generated Message-ID
+      lookup by Message-ID favors the "primary" one
+      www: fix attachment downloads for conflicted Message-IDs
+      searchmsg: document why we store To: and Cc: for NNTP
+      public-inbox-convert: tool for converting old to new inboxes
+      v2writable: support purging messages from git entirely
+      search: cleanup uniqueness checking
+      search: get rid of most lookup_* subroutines
+      search: move find_doc_ids to searchidx
+      v2writable: cleanup: get rid of unused fields
+      mbox: avoid extracting Message-ID for linkification
+      www: cleanup expensive fallback for legacy URLs
+      view: get rid of some unnecessary imports
+      search: retry_reopen on first_smsg_by_mid
+      import: run_die supports redirects as spawn does
+      v2writable: initializing an existing inbox is idempotent
+      public-inbox-compact: new tool for driving xapian-compact
+      mda: support v2 inboxes
+      search: warn on reopens and die on total failure
+      v2writable: allow gaps in git partitions
+      v2writable: convert some fatal reindex errors to warnings
+      wwwstream: flesh out clone instructions for v2
+      v2writable: go backwards through alternate Message-IDs
+      view: speed up homepage loading time with date clamp
+      view: drop load_results
+      feed: optimize query for feeds, too
+      msgtime: parse 3-digit years properly
+      convert: avoid redundant "done\n" statement for fast-import
+      search: move permissions handling to InboxWritable
+      t/v2writable: use simplify permissions reading
+      v2: respect core.sharedRepository in git configs
+      searchidx: correct warning for over-vivification
+      v2: one file, really
+      v2writable: fix parallel termination
+      truncate Message-IDs and References consistently
+      scripts/import_vger_from_mbox: set address properly
+      search: reduce columns stored in Xapian
+      replace Xapian skeleton with SQLite overview DB
+      v2writable: simplify barrier vs checkpoints
+      t/over: test empty Subject: line matching
+      www: rework query responses to avoid COUNT in SQLite
+      over: speedup get_thread by avoiding JOIN
+      nntp: fix NEWNEWS command
+      t/thread-all.t: modernize test to support modern inboxes
+      rename+rewrite test using Benchmark module
+      nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster
+      view: avoid offset during pagination
+      mbox: remove remaining OFFSET usage in SQLite
+      msgmap: replace id_batch with ids_after
+      nntp: simplify the long_response API
+      searchidx: ensure duplicated Message-IDs can be linked together
+      init: s/GIT_DIR/REPO_DIR/ in usage
+      import: rewrite less history during purge
+      v2: support incremental indexing + purge
+      v2writable: do not modify DBs while iterating for ->remove
+      v2writable: recount partitions after acquiring lock
+      searchmsg: remove unused `tid' and `path' methods
+      search: remove unnecessary OP_AND of query
+      mbox: do not sort search results
+      searchview: minor cleanup
+      support altid mechanism for v2
+      compact: better handling of over.sqlite3* files
+      v2writable: remove redundant remove from Over DB
+      v2writable: allow tracking parallel versions
+      v2writable: refer to git each repository as "epoch"
+      over: use only supported and safe SQLite APIs
+      search: index and allow searching by date-time
+      altid: fix miscopied field name
+      nntp: set Xref across multiple inboxes
+      www: favor reading more from SQLite, and less from Xapian
+      ensure Xapian and SQLite are still optional for v1 tests
+      psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological
+      over: avoid excessive SELECT
+      over: remove forked subprocess
+      v2writable: reduce barriers
+      index: allow specifying --jobs=0 to disable multiprocess
+      convert: support converting with altid defined
+      store less data in the Xapian document
+      msgmap: speed up minmax with separate queries
+      feed: respect feedmax, again
+      v1: remove articles from overview DB
+      compact: do not merge v2 repos by default
+      v2writable: reduce partititions by one
+      search: preserve References in Xapian smsg for x=t view
+      v2: generate better Message-IDs for duplicates
+      v2: improve deduplication checks
+      import: cat_blob drops leading 'From ' lines like Inbox
+      searchidx: regenerate and avoid article number gaps on full index
+      extmsg: remove expensive git path checks
+      use %H consistently to disable abbreviations
+      searchidx: increase term positions for all text terms
+      searchidx: revert default BATCH_BYTES to 1_000_000
+      Merge remote-tracking branch 'origin/master' into v2
+      fix tests to run without Xapian installed
+      extmsg: use Xapian only for partial matches
+
+Jonathan Corbet (3):
+      Don't use LIMIT in UPDATE statements
+      Update the installation instructions with Fedora package names
+      Allow specification of the number of search results to return
+-- 
+git clone https://public-inbox.org/ public-inbox
+(working on a homepage... sorta :)
diff --git a/Documentation/RelNotes/v1.2.0.wip b/Documentation/RelNotes/v1.2.0.wip
new file mode 100644
index 0000000..41236a0
--- /dev/null
+++ b/Documentation/RelNotes/v1.2.0.wip
@@ -0,0 +1,40 @@
+To: meta@public-inbox.org
+Subject: [WIP] public-inbox 1.2.0
+
+* first non-pre/rc release with v2 format support for scalability.
+  See public-inbox-v2-format(5) manpage for more details.
+
+* new admin tools for v2 repos:
+  - public-inbox-convert - converts v1 to v2 repo formats
+  - public-inbox-compact - v2 convenience wrapper for xapian-compact(1)
+  - public-inbox-purge - purges entire messages out of v2 history
+  - public-inbox-edit - edits sensitive data out messages from v2 history
+  - public-inbox-xcpdb - copydatabase(1) wrapper to upgrade Xapian formats
+                         (e.g. from "chert" to "glass") and resharding
+                         of v2 repos
+
+* SQLite3 support decoupled from Xapian support, and Xapian DBs may be
+  configured without phrase support to save space.  See "indexlevel" in
+  public-inbox-config(5) manpage for more info.
+
+* public-inbox-nntpd
+  - support STARTTLS and NNTPS
+  - support COMPRESS extension
+  - fix several RFC3977 compliance bugs
+  - improved interopability with picky clients such as leafnode
+
+* public-inbox-watch
+  - support multiple spam training directories
+  - support mapping multiple inboxes per Maildir
+
+* PublicInbox::WWW
+  - grokmirror-compatible manifest.js.gz endpoint generation
+  - user-configurable color support in $INBOX_URL/_/text/color/
+  - BOFHs may set default colors via "publicinbox.css"
+    (see public-inbox-config(5))
+
+* Danga::Socket is no longer a runtime dependency of daemons.
+
+* improved FreeBSD support
+
+See archives at https://public-inbox.org/meta/ for all history.
diff --git a/MANIFEST b/MANIFEST
index f5290b4..ecf239f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,7 +1,11 @@
+.gitattributes
 .gitignore
 AUTHORS
 COPYING
 Documentation/.gitignore
+Documentation/RelNotes/v1.0.0.eml
+Documentation/RelNotes/v1.1.0-pre1.eml
+Documentation/RelNotes/v1.2.0.wip
 Documentation/dc-dlvr-spam-flow.txt
 Documentation/design_notes.txt
 Documentation/design_www.txt
-- 
EW


^ permalink raw reply related	[relevance 7%]

Results 1-6 of 6 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2019-09-14 19:50  7% [PATCH] doc: add release notes directory Eric Wong
2018-03-27 11:11  6% [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11  3% ` [PATCH 11/11] view: depend on SearchMsg for Message-ID Eric Wong (Contractor, The Linux Foundation)
2019-01-30  7:38  5% [PATCH] view: remove unused _msg_date sub Eric Wong
2018-04-19  1:20  5% v2 merged to master Eric Wong
2018-05-09 20:23  5% [ANNOUNCE] public-inbox 1.1.0-pre1 Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).