diff options
author | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-02-22 19:10:31 +0000 |
---|---|---|
committer | Eric Wong (Contractor, The Linux Foundation) <e@80x24.org> | 2018-02-22 21:18:15 +0000 |
commit | 61ecf6a904b868ce791115231b11859d725c6113 (patch) | |
tree | 8618913e02c0946ac435c0c5234aeec3cf174bde /lib | |
parent | 15ff4705261ec64b7bbfa3c9a52c20951c78d67d (diff) | |
download | public-inbox-61ecf6a904b868ce791115231b11859d725c6113.tar.gz |
This should give us an idea of how much a problem deduplication will be.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 2 |
2 files changed, 5 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index cc7e7ec9..f9207e94 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -515,13 +515,15 @@ sub unindex_blob { } sub index_mm { - my ($self, $mime) = @_; + my ($self, $mime, $warn_existing) = @_; my $mid = mid_clean(mid_mime($mime)); my $mm = $self->{mm}; my $num = $mm->mid_insert($mid); + return $num if defined $num; + warn "<$mid> reused\n" if $warn_existing; # fallback to num_for since filters like RubyLang set the number - defined $num ? $num : $mm->num_for($mid); + $mm->num_for($mid); } sub unindex_mm { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index cf19c761..29ed23ca 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -63,7 +63,7 @@ sub add { my ($len, $msgref) = @{$im->{last_object}}; $self->idx_init; - my $num = $self->{all}->index_mm($mime); + my $num = $self->{all}->index_mm($mime, 1); my $nparts = $self->{partitions}; my $part = $num % $nparts; my $idx = $self->idx_part($part); |