user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 5/8] overidx: each_by_mid: pass self and args to callbacks
  2020-07-17  6:31  7% [PATCH 0/8] indexing cleanup and code reduction Eric Wong
@ 2020-07-17  6:31  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-07-17  6:31 UTC (permalink / raw)
  To: meta

This saves runtime allocations and reduces the likelyhood of
memory leaks either from cycles or buggy old Perl versions.
---
 lib/PublicInbox/OverIdx.pm | 99 +++++++++++++++++++++-----------------
 1 file changed, 54 insertions(+), 45 deletions(-)

diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index ea8da723..52f6328e 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -107,7 +107,7 @@ DELETE FROM $_ WHERE num = ?
 
 # this includes ghosts
 sub each_by_mid {
-	my ($self, $mid, $cols, $cb) = @_;
+	my ($self, $mid, $cols, $cb, @arg) = @_;
 	my $dbh = $self->{dbh};
 
 =over
@@ -152,27 +152,29 @@ SELECT $cols FROM over WHERE over.num = ? LIMIT 1
 		foreach (@$nums) {
 			$sth->execute($_->[0]);
 			my $smsg = $sth->fetchrow_hashref;
-			$cb->(PublicInbox::Over::load_from_row($smsg)) or
-				return;
+			$smsg = PublicInbox::Over::load_from_row($smsg);
+			$cb->($self, $smsg, @arg) or return;
 		}
 		return if $nr != $lim;
 	}
 }
 
+sub _resolve_mid_to_tid {
+	my ($self, $smsg, $tid) = @_;
+	my $cur_tid = $smsg->{tid};
+	if (defined $$tid) {
+		merge_threads($self, $$tid, $cur_tid);
+	} else {
+		$$tid = $cur_tid;
+	}
+	1;
+}
+
 # this will create a ghost as necessary
 sub resolve_mid_to_tid {
 	my ($self, $mid) = @_;
 	my $tid;
-	each_by_mid($self, $mid, ['tid'], sub {
-		my ($smsg) = @_;
-		my $cur_tid = $smsg->{tid};
-		if (defined $tid) {
-			merge_threads($self, $tid, $cur_tid);
-		} else {
-			$tid = $cur_tid;
-		}
-		1;
-	});
+	each_by_mid($self, $mid, ['tid'], \&_resolve_mid_to_tid, \$tid);
 	defined $tid ? $tid : create_ghost($self, $mid);
 }
 
@@ -271,6 +273,22 @@ sub add_overview {
 	add_over($self, [ @$smsg{qw(ts ds num)}, $mids, $refs, $xpath, $dd ]);
 }
 
+sub _add_over {
+	my ($self, $smsg, $mid, $refs, $old_tid, $v) = @_;
+	my $cur_tid = $smsg->{tid};
+	my $n = $smsg->{num};
+	die "num must not be zero for $mid" if !$n;
+	$$old_tid = $cur_tid unless defined $$old_tid;
+	if ($n > 0) { # regular mail
+		merge_threads($self, $$old_tid, $cur_tid);
+	} elsif ($n < 0) { # ghost
+		link_refs($self, $refs, $$old_tid);
+		$self->delete_by_num($n);
+		$$v++;
+	}
+	1;
+}
+
 sub add_over {
 	my ($self, $values) = @_;
 	my ($ts, $ds, $num, $mids, $refs, $xpath, $ddd) = @$values;
@@ -281,21 +299,8 @@ sub add_over {
 	$self->delete_by_num($num, \$old_tid);
 	foreach my $mid (@$mids) {
 		my $v = 0;
-		each_by_mid($self, $mid, ['tid'], sub {
-			my ($cur) = @_;
-			my $cur_tid = $cur->{tid};
-			my $n = $cur->{num};
-			die "num must not be zero for $mid" if !$n;
-			$old_tid = $cur_tid unless defined $old_tid;
-			if ($n > 0) { # regular mail
-				merge_threads($self, $old_tid, $cur_tid);
-			} elsif ($n < 0) { # ghost
-				link_refs($self, $refs, $old_tid);
-				$self->delete_by_num($n);
-				$v++;
-			}
-			1;
-		});
+		each_by_mid($self, $mid, ['tid'], \&_add_over,
+				$mid, $refs, \$old_tid, \$v);
 		$v > 1 and warn "BUG: vivified multiple ($v) ghosts for $mid\n";
 		$vivified += $v;
 	}
@@ -320,35 +325,39 @@ INSERT INTO id2num (id, num) VALUES (?,?)
 	}
 }
 
+sub _remove_oid {
+	my ($self, $smsg, $oid, $nr) = @_;
+	if (!defined($oid) || $smsg->{blob} eq $oid) {
+		$self->delete_by_num($smsg->{num});
+		$$nr++;
+	}
+	1;
+}
+
 # returns number of removed messages
 # $oid may be undef to match only on $mid
 sub remove_oid {
 	my ($self, $oid, $mid) = @_;
 	my $nr = 0;
 	$self->begin_lazy;
-	each_by_mid($self, $mid, ['ddd'], sub {
-		my ($smsg) = @_;
-		if (!defined($oid) || $smsg->{blob} eq $oid) {
-			$self->delete_by_num($smsg->{num});
-			$nr++;
-		}
-		1;
-	});
+	each_by_mid($self, $mid, ['ddd'], \&_remove_oid, $oid, \$nr);
 	$nr;
 }
 
+sub _num_mid0_for_oid {
+	my ($self, $smsg, $oid, $res) = @_;
+	my $blob = $smsg->{blob};
+	return 1 if (!defined($blob) || $blob ne $oid); # continue;
+	@$res = ($smsg->{num}, $smsg->{mid});
+	0; # done
+}
+
 sub num_mid0_for_oid {
 	my ($self, $oid, $mid) = @_;
-	my ($num, $mid0);
+	my $res = [];
 	$self->begin_lazy;
-	each_by_mid($self, $mid, ['ddd'], sub {
-		my ($smsg) = @_;
-		my $blob = $smsg->{blob};
-		return 1 if (!defined($blob) || $blob ne $oid); # continue;
-		($num, $mid0) = ($smsg->{num}, $smsg->{mid});
-		0; # done
-	});
-	($num, $mid0);
+	each_by_mid($self, $mid, ['ddd'], \&_num_mid0_for_oid, $oid, $res);
+	@$res, # ($num, $mid0);
 }
 
 sub create_tables {

^ permalink raw reply related	[relevance 6%]

* [PATCH 0/8] indexing cleanup and code reduction
@ 2020-07-17  6:31  7% Eric Wong
  2020-07-17  6:31  6% ` [PATCH 5/8] overidx: each_by_mid: pass self and args to callbacks Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-07-17  6:31 UTC (permalink / raw)
  To: meta

Some bigger indexing updates coming, but first we can
reduce allocations and get rid of some overly complicated
unindexing logic.

Eric Wong (8):
  v2: use v5.10.1, parent.pm, drop warnings
  drop binmode usage
  import: use common capitalization for filtering headers
  with_umask: pass args to callback
  overidx: each_by_mid: pass self and args to callbacks
  overidx: favor non-OO sub dispatch for internal subs
  searchidx: use v5.10.1, parent.pm, drop warnings
  search: simplify unindexing

 lib/PublicInbox/Import.pm         |   4 +-
 lib/PublicInbox/InboxWritable.pm  |  42 ++++---
 lib/PublicInbox/OverIdx.pm        | 126 +++++++++++----------
 lib/PublicInbox/SearchIdx.pm      | 178 ++++++++++++------------------
 lib/PublicInbox/SearchIdxShard.pm |  12 +-
 lib/PublicInbox/V2Writable.pm     | 116 +++++++++----------
 lib/PublicInbox/Xapcmd.pm         |  35 +++---
 t/search.t                        |   6 +-
 8 files changed, 246 insertions(+), 273 deletions(-)

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-07-17  6:31  7% [PATCH 0/8] indexing cleanup and code reduction Eric Wong
2020-07-17  6:31  6% ` [PATCH 5/8] overidx: each_by_mid: pass self and args to callbacks Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).