about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-10-05 09:40:17 +0000
committerEric Wong <e@80x24.org>2021-10-05 23:09:45 +0000
commitef68ada3b207fdb511ebe6d33b072a84277e6cd6 (patch)
tree1154347962fccb85bbf0d8fcbb6489f4045448a7 /lib/PublicInbox/SearchIdx.pm
parent88344a50f62a39dcb5673b0aa42ebec7ec44bd71 (diff)
downloadpublic-inbox-ef68ada3b207fdb511ebe6d33b072a84277e6cd6.tar.gz
This lets administrators reindex specific time ranges
according to git "approxidate" formats.  These arguments
are passed directly to underlying git-log(1) invocations
and may still reach into old epochs.

Since these options rely on git committer dates (which we infer
from the most recent Received: header), they are not guaranteed
to be strictly tied to git history and it's possible to
over/under-reindex some messages.  It's probably not a major
problem in practice, though; reindexing a few extra messages
is generally harmless aside from some extra device wear.

Since this currently relies on git-log, these options do not
affect -extindex, yet.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm33
1 files changed, 23 insertions, 10 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 5b0e4458..e5c872d5 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -750,7 +750,8 @@ sub index_sync {
         my ($self, $opt) = @_;
         delete $self->{lock_path} if $opt->{-skip_lock};
         $self->with_umask(\&_index_sync, $self, $opt);
-        if ($opt->{reindex} && !$opt->{quit}) {
+        if ($opt->{reindex} && !$opt->{quit} &&
+                        !grep(defined, @$opt{qw(since until)})) {
                 my %again = %$opt;
                 delete @again{qw(rethread reindex)};
                 index_sync($self, \%again);
@@ -775,8 +776,8 @@ sub v1_checkpoint ($$;$) {
         # $newest may be undef
         my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}};
         if (defined($newest)) {
-                my $cur = $self->{mm}->last_commit || '';
-                if (need_update($self, $cur, $newest)) {
+                my $cur = $self->{mm}->last_commit;
+                if (need_update($self, $sync, $cur, $newest)) {
                         $self->{mm}->last_commit($newest);
                 }
         }
@@ -786,7 +787,7 @@ sub v1_checkpoint ($$;$) {
         my $xdb = $self->{xdb};
         if ($newest && $xdb) {
                 my $cur = $xdb->get_metadata('last_commit');
-                if (need_update($self, $cur, $newest)) {
+                if (need_update($self, $sync, $cur, $newest)) {
                         $xdb->set_metadata('last_commit', $newest);
                 }
         }
@@ -870,9 +871,14 @@ sub log2stack ($$$) {
 
         # Count the new files so they can be added newest to oldest
         # and still have numbers increasing from oldest to newest
-        my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H
-                                --no-notes --no-color --no-renames --no-abbrev),
-                                $range);
+        my @cmd = qw(log --raw -r --pretty=tformat:%at-%ct-%H
+                        --no-notes --no-color --no-renames --no-abbrev);
+        for my $k (qw(since until)) {
+                my $v = $sync->{-opt}->{$k} // next;
+                next if !$sync->{-opt}->{reindex};
+                push @cmd, "--$k=$v";
+        }
+        my $fh = $git->popen(@cmd, $range);
         my ($at, $ct, $stk, $cmt);
         while (<$fh>) {
                 return if $sync->{quit};
@@ -928,10 +934,17 @@ sub is_ancestor ($$$) {
         $? == 0;
 }
 
-sub need_update ($$$) {
-        my ($self, $cur, $new) = @_;
+sub need_update ($$$$) {
+        my ($self, $sync, $cur, $new) = @_;
         my $git = $self->{ibx}->git;
-        return 1 if $cur && !is_ancestor($git, $cur, $new);
+        $cur //= ''; # XS Search::Xapian ->get_metadata doesn't give undef
+
+        # don't rewind if --{since,until,before,after} are in use
+        return if $cur ne '' &&
+                grep(defined, @{$sync->{-opt}}{qw(since until)}) &&
+                is_ancestor($git, $new, $cur);
+
+        return 1 if $cur ne '' && !is_ancestor($git, $cur, $new);
         my $range = $cur eq '' ? $new : "$cur..$new";
         chomp(my $n = $git->qx(qw(rev-list --count), $range));
         ($n eq '' || $n > 0);