about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-06-21 00:21:33 +0000
committerEric Wong <e@yhbt.net>2020-06-23 00:22:18 +0000
commit5759c29ba5e0c8b2b1135e528e133dc7dde219f8 (patch)
tree841e315a7c4c54910bcc72868f4eb92a51fcc092 /lib
parent3ba0028804c50b75b1854d0c2bf637ebce0aa8b1 (diff)
downloadpublic-inbox-5759c29ba5e0c8b2b1135e528e133dc7dde219f8.tar.gz
For archivists with only newer mail archives, this option allows
reserving reserve NNTP article numbers for yet-to-be-archived
old messages.  Indexers will need to be updated to support this
feature in future commits.

-V1 inboxes will now be initialized with SQLite and Xapian
support if this option is used, or if --indexlevel= is
specified.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/InboxWritable.pm13
-rw-r--r--lib/PublicInbox/Msgmap.pm26
-rw-r--r--lib/PublicInbox/SearchIdx.pm1
-rw-r--r--lib/PublicInbox/V2Writable.pm3
4 files changed, 41 insertions, 2 deletions
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index c54be046..f9e28502 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -39,10 +39,21 @@ sub assert_usable_dir {
 
 sub init_inbox {
         my ($self, $shards, $skip_epoch, $skip_artnum) = @_;
-        # TODO: honor skip_artnum
         if ($self->version == 1) {
                 my $dir = assert_usable_dir($self);
                 PublicInbox::Import::init_bare($dir);
+                if (defined($self->{indexlevel}) || defined($skip_artnum)) {
+                        require PublicInbox::SearchIdx;
+                        my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
+                        $sidx->begin_txn_lazy;
+                        $self->with_umask(sub {
+                                my $mm = PublicInbox::Msgmap->new($dir, 1);
+                                $mm->{dbh}->begin_work;
+                                $mm->skip_artnum($skip_artnum);
+                                $mm->{dbh}->commit;
+                        }) if defined($skip_artnum);
+                        $sidx->commit_txn_lazy;
+                }
         } else {
                 my $v2w = importer($self);
                 $v2w->init_inbox($shards, $skip_epoch, $skip_artnum);
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index d115cbce..aa07e344 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -270,4 +270,30 @@ sub atfork_prepare {
         %$self = (tmp_name => $f, pid => $$);
 }
 
+sub skip_artnum {
+        my ($self, $skip_artnum) = @_;
+        return meta_accessor($self, 'skip_artnum') if !defined($skip_artnum);
+
+        my $cur = num_highwater($self) // 0;
+        if ($skip_artnum < $cur) {
+                die "E: current article number $cur ",
+                        "exceeds --skip-artnum=$skip_artnum\n";
+        } else {
+                my $ok;
+                for (1..10) {
+                        my $mid = 'skip'.rand.'@'.rand.'.example.com';
+                        $ok = mid_set($self, $skip_artnum, $mid);
+                        if ($ok) {
+                                mid_delete($self, $mid);
+                                last;
+                        }
+                }
+                $ok or die '--skip-artnum failed';
+
+                # in the future, the indexer may use this value for
+                # new messages in old epochs
+                meta_accessor($self, 'skip_artnum', $skip_artnum);
+        }
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 85821ea7..00e63938 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -694,6 +694,7 @@ sub _git_log {
                 } else {
                         # normal regen is for for fresh data
                         $self->{regen_down} = $fcount;
+                        $self->{regen_down} += $high unless $opts->{reindex};
                 }
         } else {
                 # Give oldest messages the smallest numbers
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 91379431..a0f041dd 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -128,12 +128,13 @@ sub new {
 
 # public (for now?)
 sub init_inbox {
-        my ($self, $shards, $skip_epoch) = @_;
+        my ($self, $shards, $skip_epoch, $skip_artnum) = @_;
         if (defined $shards) {
                 $self->{parallel} = 0 if $shards == 0;
                 $self->{shards} = $shards if $shards > 0;
         }
         $self->idx_init;
+        $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum;
         my $epoch_max = -1;
         git_dir_latest($self, \$epoch_max);
         if (defined $skip_epoch && $epoch_max == -1) {