user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
41fe859e76af60aa30cc7aa2481ba00d14d4a12a blob 7094 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
 
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# This license differs from the rest of public-inbox
#
# Our own jwz-style threading class based on Mail::Thread from CPAN.
# Mail::Thread is unmaintained and available on some distros.
# We also do not want pruning or subject grouping, since we want
# to encourage strict threading and hopefully encourage people
# to use proper In-Reply-To.
#
# This includes fixes from several open bugs for Mail::Thread
#
# Avoid circular references
# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
#
# And avoid recursion in recurse_down:
# - https://rt.cpan.org/Ticket/Display.html?id=116727
# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
package PublicInbox::SearchThread;
use strict;
use warnings;
use Email::Abstract;

sub new {
	return bless {
		messages => $_[1],
		id_table => {},
		rootset  => []
	}, $_[0];
}

sub _get_hdr {
	my ($class, $msg, $hdr) = @_;
	Email::Abstract->get_header($msg, $hdr) || '';
}

sub _uniq {
	my %seen;
	return grep { !$seen{$_}++ } @_;
}

sub _references {
	my $class = shift;
	my $msg = shift;
	my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
	my $foo = $class->_get_hdr($msg, "In-Reply-To");
	chomp $foo;
	$foo =~ s/.*?<([^>]+)>.*/$1/;
	push @references, $foo
	  if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
	return _uniq(@references);
}

sub _msgid {
	my ($class, $msg) = @_;
	my $id = $class->_get_hdr($msg, "Message-ID");
	die "attempt to thread message with no id" unless $id;
	chomp $id;
	$id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
	return $id;
}

sub rootset { @{$_[0]{rootset}} }

sub thread {
	my $self = shift;
	$self->_setup();
	$self->{rootset} = [ grep { !$_->parent } values %{$self->{id_table}} ];
	$self->_finish();
}

sub _finish {
	my $self = shift;
	delete $self->{id_table};
	delete $self->{seen};
}

sub _get_cont_for_id {
	my $self = shift;
	my $id = shift;
	$self->{id_table}{$id} ||= $self->_container_class->new($id);
}

sub _container_class { 'PublicInbox::SearchThread::Container' }

sub _setup {
	my ($self) = @_;

	_add_message($self, $_) foreach @{$self->{messages}};
}

sub _add_message ($$) {
	my ($self, $message) = @_;

	# A. if id_table...
	my $this_container = $self->_get_cont_for_id($self->_msgid($message));
	$this_container->message($message);

	# B. For each element in the message's References field:
	my @refs = $self->_references($message);

	my $prev;
	for my $ref (@refs) {
		# Find a Container object for the given Message-ID
		my $container = $self->_get_cont_for_id($ref);

		# Link the References field's Containers together in the
		# order implied by the References header
		# * If they are already linked don't change the existing links
		# * Do not add a link if adding that link would introduce
		#   a loop...

		if ($prev &&
			!$container->parent &&  # already linked
			!$container->has_descendent($prev) # would loop
		   ) {
			$prev->add_child($container);
		}
		$prev = $container;
	}

	# C. Set the parent of this message to be the last element in
	# References...
	if ($prev &&
		!$this_container->has_descendent($prev) # would loop
	   ) {
		$prev->add_child($this_container)
	}
}

sub order {
	my $self = shift;
	my $ordersub = shift;

	# make a fake root
	my $root = $self->_container_class->new( 'fakeroot' );
	$root->add_child( $_ ) for @{ $self->{rootset} };

	# sort it
	$root->order_children( $ordersub );

	# and untangle it
	my @kids = $root->children;
	$self->{rootset} = \@kids;
	$root->remove_child($_) for @kids;
}

package PublicInbox::SearchThread::Container;
use Carp qw(croak);
use Scalar::Util qw(weaken);

sub new { my $self = shift; bless { id => shift }, $self; }

sub message { $_[0]->{message} = $_[1] if @_ == 2; $_[0]->{message} }
sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
sub child { $_[0]->{child} = $_[1] if @_ == 2; $_[0]->{child} }
sub next { $_[0]->{next} = $_[1] if @_ == 2; $_[0]->{next} }
sub messageid { $_[0]->{id} }

sub add_child {
	my ($self, $child) = @_;
	croak "Cowardly refusing to become my own parent: $self"
	  if $self == $child;

	if (grep { $_ == $child } $self->children) {
		# All is potentially correct with the world
		$child->parent($self);
		return;
	}

	$child->parent->remove_child($child) if $child->parent;

	$child->next($self->child);
	$self->child($child);
	$child->parent($self);
}

sub remove_child {
	my ($self, $child) = @_;
	return unless $self->child;
	if ($self->child == $child) {  # First one's easy.
		$self->child($child->next);
		$child->next(undef);
		$child->parent(undef);
		return;
	}

	my $x = $self->child;
	my $prev = $x;
	while ($x = $x->next) {
		if ($x == $child) {
			$prev->next($x->next); # Unlink x
			$x->next(undef);
			$x->parent(undef);	 # Deparent it
			return;
		}
		$prev = $x;
	}
	# oddly, we can get here
	$child->next(undef);
	$child->parent(undef);
}

sub has_descendent {
	my $self = shift;
	my $child = shift;
	die "Assertion failed: $child" unless eval {$child};
	my $there = 0;
	$self->recurse_down(sub { $there = 1 if $_[0] == $child });

	return $there;
}

sub children {
	my $self = shift;
	my @children;
	my $visitor = $self->child;
	while ($visitor) {
		push @children, $visitor;
		$visitor = $visitor->next
	}
	return @children;
}

sub set_children {
	my $self = shift;
	my $walk = $self->child( shift );
	while (@_) { $walk = $walk->next( shift ) }
	$walk->next(undef) if $walk;
}

sub order_children {
	my $self = shift;
	my $ordersub = shift;

	return unless $ordersub;

	my $sub = sub {
		my $cont = shift;
		my @children = $cont->children;
		return if @children < 2;
		$cont->set_children( $ordersub->( @children ) );
	};
	$self->iterate_down( undef, $sub );
	undef $sub;
}

# non-recursive version of recurse_down to avoid stack depth warnings
sub recurse_down {
	my ($self, $callback) = @_;
	my %seen;
	my @q = ($self);
	while (my $cont = shift @q) {
		$seen{$cont}++;
		$callback->($cont);

		if (my $next = $cont->next) {
			if ($seen{$next}) {
				$cont->next(undef);
			} else {
				push @q, $next;
			}
		}
		if (my $child = $cont->child) {
			if ($seen{$child}) {
				$cont->child(undef);
			} else {
				push @q, $child;
			}
		}
	}
}

sub iterate_down {
	my $self = shift;
	my ($before, $after) = @_;

	my %seen;
	my $walk = $self;
	my $depth = 0;
	my @visited;
	while ($walk) {
		push @visited, [ $walk, $depth ];
		$before->($walk, $depth) if $before;

		# spot/break loops
		$seen{$walk}++;

		my $child = $walk->child;
		if ($child && $seen{$child}) {
			$walk->child(undef);
			$child = undef;
		}

		my $next = $walk->next;
		if ($next && $seen{$next}) {
			$walk->next(undef);
			$next = undef;
		}

		# go down, or across
		if ($child) {
			$next = $child;
			++$depth;
		}

		# no next?  look up
		if (!$next) {
			my $up = $walk;
			while ($up && !$next) {
				$up = $up->parent;
				--$depth;
				$next = $up->next if $up;
			}
		}
		$walk = $next;
	}
	return unless $after;
	while (@visited) { $after->(@{ pop @visited }) }
}

1;
debug log:

solving 41fe859 ...
found 41fe859 in https://80x24.org/public-inbox.git

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).