diff options
author | Eric Wong <e@80x24.org> | 2015-09-03 01:57:11 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2015-09-03 02:04:40 +0000 |
commit | 81a9c1b476987d845b340ab9013d26cf4487cb9a (patch) | |
tree | 65a8c150f97ca35dbc987711ed14d8070027aaf3 /lib | |
parent | fd138a0197450d30677441afb014584f34a661da (diff) | |
download | public-inbox-81a9c1b476987d845b340ab9013d26cf4487cb9a.tar.gz |
We'll continue to compress long Message-IDs in URLs (which we know about), but we will store entire Message-IDs in the Xapian database to facilitate ease-of-lookups in external databases.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/ExtMsg.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 37 | ||||
-rw-r--r-- | lib/PublicInbox/SearchMsg.pm | 4 |
4 files changed, 22 insertions, 34 deletions
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 1c0887cd..bdbff78f 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -12,7 +12,6 @@ sub ext_msg { my $pi_config = $ctx->{pi_config}; my $listname = $ctx->{listname}; my $mid = $ctx->{mid}; - my $cmid = mid_compress($mid); eval { require PublicInbox::Search }; my $have_xap = $@ ? 0 : 1; @@ -35,13 +34,13 @@ sub ext_msg { if ($have_xap) { my $doc_id = eval { my $s = PublicInbox::Search->new($git_dir); - $s->find_unique_doc_id('mid', $cmid); + $s->find_unique_doc_id('mid', $mid); }; if ($@) { # xapian not configured for this repo } else { # maybe we found it! - return r302($url, $cmid) if (defined $doc_id); + return r302($url, $mid) if (defined $doc_id); # no point in trying the fork fallback if we # know Xapian is up-to-date but missing the @@ -55,7 +54,7 @@ sub ext_msg { } # Xapian not installed or configured for some repos - my $path = "HEAD:" . mid2path($cmid); + my $path = "HEAD:" . mid2path($mid); foreach my $n (@nox) { my @cmd = ('git', "--git-dir=$n->{git_dir}", 'cat-file', @@ -70,7 +69,7 @@ sub ext_msg { my $type = eval { local $/; <$fh> }; close $fh; if ($? == 0 && $type eq "blob\n") { - return r302($n->{url}, $cmid); + return r302($n->{url}, $mid); } } } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 8b32ef38..eb49f724 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -26,14 +26,15 @@ use constant { # 6 - preserve References: order in document data # 7 - remove references and inreplyto terms # 8 - remove redundant/unneeded document data - SCHEMA_VERSION => 8, + # 9 - disable Message-ID compression + SCHEMA_VERSION => 9, QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; # setup prefixes my %bool_pfx_internal = ( type => 'T', # "mail" or "ghost" - mid => 'Q', # uniQue id (Message-ID or mid_compress) + mid => 'Q', # uniQue id (Message-ID) ); my %bool_pfx_external = ( @@ -171,7 +172,6 @@ sub date_range_processor { sub lookup_message { my ($self, $mid) = @_; $mid = mid_clean($mid); - $mid = mid_compress($mid); my $doc_id = $self->find_unique_doc_id('mid', $mid); my $smsg; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 32e07142..ee852683 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -41,8 +41,7 @@ sub add_message { my $db = $self->{xdb}; my $doc_id; - my $mid_orig = mid_clean($mime->header('Message-ID')); - my $mid = mid_compress($mid_orig); + my $mid = mid_clean($mime->header('Message-ID')); my $was_ghost = 0; my $ct_msg = $mime->header('Content-Type') || 'text/plain'; @@ -139,7 +138,7 @@ sub add_message { }; if ($@) { - warn "failed to index message <$mid_orig>: $@\n"; + warn "failed to index message <$mid>: $@\n"; return undef; } $doc_id; @@ -147,11 +146,10 @@ sub add_message { # returns deleted doc_id on success, undef on missing sub remove_message { - my ($self, $mid_orig) = @_; + my ($self, $mid) = @_; my $db = $self->{xdb}; my $doc_id; - $mid_orig = mid_clean($mid_orig); - my $mid = mid_compress($mid_orig); + $mid = mid_clean($mid); eval { $doc_id = $self->find_unique_doc_id('mid', $mid); @@ -159,7 +157,7 @@ sub remove_message { }; if ($@) { - warn "failed to remove message <$mid_orig>: $@\n"; + warn "failed to remove message <$mid>: $@\n"; return undef; } $doc_id; @@ -204,32 +202,24 @@ sub link_message { sub link_message_to_parents { my ($self, $smsg) = @_; my $doc = $smsg->{doc}; - my $mid = mid_compress($smsg->mid); + my $mid = $smsg->mid; my $mime = $smsg->mime; my $refs = $mime->header('References'); my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : (); - my $irt = $mime->header('In-Reply-To'); - if ($irt) { - $irt = mid_compress(mid_clean($irt)); - - # maybe some crazies will try to make a circular reference: - if ($irt eq $mid) { - $irt = undef; - } else { - # last References should be $irt - # we will de-dupe later - push @refs, $irt; - } + if (my $irt = $mime->header('In-Reply-To')) { + # last References should be $irt + # we will de-dupe later + push @refs, mid_clean($irt); } my $tid; if (@refs) { - my @crefs = map { mid_compress($_) } @refs; my %uniq = ($mid => 1); + my @orig_refs = @refs; + @refs = (); # prevent circular references via References: here: - @refs = (); - foreach my $ref (@crefs) { + foreach my $ref (@orig_refs) { next if $uniq{$ref}; $uniq{$ref} = 1; push @refs, $ref; @@ -342,7 +332,6 @@ sub _resolve_mid_to_tid { sub create_ghost { my ($self, $mid, $tid) = @_; - $mid = mid_compress($mid); $tid = $self->next_thread_id unless defined $tid; my $doc = Search::Xapian::Document->new; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 1821b076..38918236 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -9,7 +9,7 @@ use Email::Address qw//; use Email::Simple qw//; use POSIX qw//; use Date::Parse qw/str2time/; -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean/; use Encode qw/find_encoding/; my $enc_utf8 = find_encoding('UTF-8'); our $PFX2TERM_RE = undef; @@ -167,7 +167,7 @@ sub _extract_mid { my ($self) = @_; my $mid = $self->mime->header('Message-ID'); - $mid ? mid_compress(mid_clean($mid)) : $mid; + defined $mid ? mid_clean($mid) : $mid; } sub mime { |