about summary refs log tree commit homepage
path: root/lib/PublicInbox/IdxStack.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-24 05:55:49 +0000
committerEric Wong <e@yhbt.net>2020-07-25 20:48:18 +0000
commitc386663dd9ffec6ebbe64e7bb5096ee3df7d273d (patch)
tree0000c6f12609202892e8c9a42826ad44f53234d2 /lib/PublicInbox/IdxStack.pm
parent4441a38481ed2c9472767a6387d8a2455ea34ad5 (diff)
downloadpublic-inbox-c386663dd9ffec6ebbe64e7bb5096ee3df7d273d.tar.gz
This avoids pinning a potentially large chunk of memory from
`git-log --reverse' into RAM (or triggering less predictable
swap behavior).  Instead it uses a contiguous temporary file
with a fixed-size record for every blob we'll need to index.
Diffstat (limited to 'lib/PublicInbox/IdxStack.pm')
-rw-r--r--lib/PublicInbox/IdxStack.pm52
1 files changed, 52 insertions, 0 deletions
diff --git a/lib/PublicInbox/IdxStack.pm b/lib/PublicInbox/IdxStack.pm
new file mode 100644
index 00000000..b43b8064
--- /dev/null
+++ b/lib/PublicInbox/IdxStack.pm
@@ -0,0 +1,52 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# temporary stack for public-inbox-index
+package PublicInbox::IdxStack;
+use v5.10.1;
+use strict;
+use Fcntl qw(:seek);
+use constant FMT => eval { pack('Q', 1) } ? 'A1QQH*' : 'A1IIH*';
+
+# start off in write-only mode
+sub new {
+        open(my $io, '+>', undef) or die "open: $!";
+        bless { wr => $io, latest_cmt => $_[1] }, __PACKAGE__
+}
+
+# file_char = [a|m]
+sub push_rec {
+        my ($self, $file_char, $at, $ct, $blob_oid) = @_;
+        my $rec = pack(FMT, $file_char, $at, $ct, $blob_oid);
+        $self->{rec_size} //= length($rec);
+        print { $self->{wr} } $rec or die "print: $!";
+        $self->{tot_size} += length($rec);
+}
+
+sub num_records {
+        my ($self) = @_;
+        $self->{rec_size} ? $self->{tot_size} / $self->{rec_size} : 0;
+}
+
+# switch into read-only mode and returns self
+sub read_prepare {
+        my ($self) = @_;
+        my $io = $self->{rd} = delete($self->{wr});
+        $io->flush or die "flush: $!";
+        $self;
+}
+
+sub pop_rec {
+        my ($self) = @_;
+        my $sz = $self->{rec_size} or return;
+        my $rec_pos = $self->{tot_size} -= $sz;
+        return if $rec_pos < 0;
+        my $io = $self->{rd};
+        seek($io, $rec_pos, SEEK_SET) or die "seek: $!";
+        my $r = read($io, my $buf, $sz);
+        defined($r) or die "read: $!";
+        $r == $sz or die "read($r != $sz)";
+        unpack(FMT, $buf);
+}
+
+1;