user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
8fb3a030aa723417036108bb1728d8abeec8be4f blob 5007 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
 
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# This license differs from the rest of public-inbox
#
# Our own jwz-style threading class based on Mail::Thread from CPAN.
# Mail::Thread is unmaintained and unavailable on some distros.
# We also do not want pruning or subject grouping, since we want
# to encourage strict threading and hopefully encourage people
# to use proper In-Reply-To/References.
#
# This includes fixes from several open bugs for Mail::Thread
#
# Avoid circular references
# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
#
# And avoid recursion in recurse_down:
# - https://rt.cpan.org/Ticket/Display.html?id=116727
# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
package PublicInbox::SearchThread;
use strict;
use warnings;
use PublicInbox::MID qw($MID_EXTRACT);

sub thread {
	my ($msgs, $ordersub, $ctx) = @_;

	# A. put all current $msgs (non-ghosts) into %id_table
	my %id_table = map {;
		# this delete saves around 4K across 1K messages
		# TODO: move this to a more appropriate place, breaks tests
		# if we do it during psgi_cull
		delete $_->{num};

		$_->{mid} => PublicInbox::SearchThread::Msg::cast($_);
	} @$msgs;

	# Sadly, we sort here anyways since the fill-in-the-blanks References:
	# can be shakier if somebody used In-Reply-To with multiple, disparate
	# messages.  So, take the client Date: into account since we can't
	# always determine ordering when somebody uses multiple In-Reply-To.
	# We'll trust the client Date: header here instead of the Received:
	# time since this is for display (and not retrieval)
	_set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs;
	my $ibx = $ctx->{ibx};
	my $rootset = [ grep {
			!delete($_->{parent}) && $_->visible($ibx)
		} values %id_table ];
	$rootset = $ordersub->($rootset);
	$_->order_children($ordersub, $ctx) for @$rootset;
	$rootset;
}

sub _set_parent ($$) {
	my ($id_table, $this) = @_;

	# B. For each element in the message's References field:
	defined(my $refs = $this->{references}) or return;

	# This loop exists to help fill in gaps left from missing
	# messages.  It is not needed in a perfect world where
	# everything is perfectly referenced, only the last ref
	# matters.
	my $prev;
	foreach my $ref ($refs =~ m/$MID_EXTRACT/go) {
		# Find a Container object for the given Message-ID
		my $cont = $id_table->{$ref} //=
			PublicInbox::SearchThread::Msg::ghost($ref);

		# Link the References field's Containers together in
		# the order implied by the References header
		#
		# * If they are already linked don't change the
		#   existing links
		# * Do not add a link if adding that link would
		#   introduce a loop...
		if ($prev &&
			!$cont->{parent} &&  # already linked
			!$cont->has_descendent($prev) # would loop
		   ) {
			$prev->add_child($cont);
		}
		$prev = $cont;
	}

	# C. Set the parent of this message to be the last element in
	# References.
	if (defined $prev && !$this->has_descendent($prev)) { # would loop
		$prev->add_child($this);
	}
}

package PublicInbox::SearchThread::Msg;
use base qw(PublicInbox::Smsg);
use strict;
use warnings;
use Carp qw(croak);

# declare a ghost smsg (determined by absence of {blob})
sub ghost {
	bless {
		mid => $_[0],
		children => {}, # becomes an array when sorted by ->order(...)
	}, __PACKAGE__;
}

# give a existing smsg the methods of this class
sub cast {
	my ($smsg) = @_;
	$smsg->{children} = {};
	bless $smsg, __PACKAGE__;
}

sub topmost {
	my ($self) = @_;
	my @q = ($self);
	while (my $cont = shift @q) {
		return $cont if $cont->{blob};
		push @q, values %{$cont->{children}};
	}
	undef;
}

sub add_child {
	my ($self, $child) = @_;
	croak "Cowardly refusing to become my own parent: $self"
	  if $self == $child;

	my $cid = $child->{mid};

	# reparenting:
	if (defined(my $parent = $child->{parent})) {
		delete $parent->{children}->{$cid};
	}

	$self->{children}->{$cid} = $child;
	$child->{parent} = $self;
}

sub has_descendent {
	my ($self, $child) = @_;
	my %seen; # loop prevention
	while ($child) {
		return 1 if $self == $child || $seen{$child}++;
		$child = $child->{parent};
	}
	0;
}

# Do not show/keep ghosts iff they have no children.  Sometimes
# a ghost Message-ID is the result of a long header line
# being folded/mangled by a MUA, and not a missing message.
sub visible ($$) {
	my ($self, $ibx) = @_;
	return 1 if $self->{blob};
	if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) {
		%$self = (%$self, %$by_mid);
		1;
	} else {
		(scalar values %{$self->{children}});
	}
}

sub order_children {
	my ($cur, $ordersub, $ctx) = @_;

	my %seen = ($cur => 1); # self-referential loop prevention
	my @q = ($cur);
	my $ibx = $ctx->{ibx};
	while (defined($cur = shift @q)) {
		my $c = $cur->{children}; # The hashref here...

		$c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ];
		$c = $ordersub->($c) if scalar @$c > 1;
		$cur->{children} = $c; # ...becomes an arrayref
		push @q, @$c;
	}
}

1;
debug log:

solving 8fb3a030aa72 ...
found 8fb3a030aa72 in https://80x24.org/public-inbox.git

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).