about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-02-22 19:10:31 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-02-22 21:18:15 +0000
commit61ecf6a904b868ce791115231b11859d725c6113 (patch)
tree8618913e02c0946ac435c0c5234aeec3cf174bde /lib
parent15ff4705261ec64b7bbfa3c9a52c20951c78d67d (diff)
downloadpublic-inbox-61ecf6a904b868ce791115231b11859d725c6113.tar.gz
This should give us an idea of how much a problem deduplication
will be.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/SearchIdx.pm6
-rw-r--r--lib/PublicInbox/V2Writable.pm2
2 files changed, 5 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index cc7e7ec9..f9207e94 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -515,13 +515,15 @@ sub unindex_blob {
 }
 
 sub index_mm {
-        my ($self, $mime) = @_;
+        my ($self, $mime, $warn_existing) = @_;
         my $mid = mid_clean(mid_mime($mime));
         my $mm = $self->{mm};
         my $num = $mm->mid_insert($mid);
+        return $num if defined $num;
 
+        warn "<$mid> reused\n" if $warn_existing;
         # fallback to num_for since filters like RubyLang set the number
-        defined $num ? $num : $mm->num_for($mid);
+        $mm->num_for($mid);
 }
 
 sub unindex_mm {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index cf19c761..29ed23ca 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -63,7 +63,7 @@ sub add {
         my ($len, $msgref) = @{$im->{last_object}};
 
         $self->idx_init;
-        my $num = $self->{all}->index_mm($mime);
+        my $num = $self->{all}->index_mm($mime, 1);
         my $nparts = $self->{partitions};
         my $part = $num % $nparts;
         my $idx = $self->idx_part($part);