From d7fcdec712accc212bcfa35e50ade1233eb9beb3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 15 Aug 2015 09:28:32 +0000 Subject: extract redundant Message-ID handling code Quit repeating ourselves and use a common MID module instead. --- lib/PublicInbox/Hval.pm | 13 +++---------- lib/PublicInbox/MID.pm | 27 +++++++++++++++++++++++++++ lib/PublicInbox/Search.pm | 31 ++++++------------------------- lib/PublicInbox/View.pm | 8 +++----- 4 files changed, 39 insertions(+), 40 deletions(-) create mode 100644 lib/PublicInbox/MID.pm (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 68f89546..d8b31c84 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -8,6 +8,7 @@ use warnings; use fields qw(raw href); use Encode qw(find_encoding); use URI::Escape qw(uri_escape_utf8); +use PublicInbox::MID qw/mid_clean mid_compressed/; my $enc_ascii = find_encoding('us-ascii'); @@ -25,16 +26,8 @@ sub new { sub new_msgid { my ($class, $msgid) = @_; - $msgid =~ s/\A\s*?\s*\z//; - - if (length($msgid) <= 40) { - $class->new($msgid); - } else { - require Digest::SHA; - my $hex = Digest::SHA::sha1_hex($msgid); - $class->new($msgid, $hex); - } + $msgid = mid_clean($msgid); + $class->new($msgid, mid_compressed($msgid)); } sub new_oneline { diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm new file mode 100644 index 00000000..e5a30a1b --- /dev/null +++ b/lib/PublicInbox/MID.pm @@ -0,0 +1,27 @@ +# Copyright (C) 2015, all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +package PublicInbox::MID; +use strict; +use warnings; +use base qw/Exporter/; +our @EXPORT_OK = qw/mid_clean mid_compressed/; +use Digest::SHA qw/sha1_hex/; +use constant MID_MAX => 40; # SHA-1 hex length + +sub mid_clean { + my ($mid) = @_; + defined($mid) or die "no Message-ID"; + # MDA->precheck did more checking for us + $mid =~ s/\A\s*?\s*\z//; + $mid; +} + +# this is idempotent +sub mid_compressed { + my ($mid) = @_; + return $mid if (length($mid) <= MID_MAX); + sha1_hex($mid); +} + +1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 15bb9f62..e88bfb16 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -6,25 +6,22 @@ use strict; use warnings; use PublicInbox::SearchMsg; use base qw/Exporter/; -use Digest::SHA qw//; use Search::Xapian qw/:standard/; require PublicInbox::View; use Date::Parse qw/str2time/; use POSIX qw//; use Email::MIME; +use PublicInbox::MID qw/mid_clean mid_compressed/; -our @EXPORT = qw/xpfx mid_compressed/; +our @EXPORT = qw/xpfx/; use constant { TS => 0, - SHA1HEX_LEN => 40, SCHEMA_VERSION => 0, LANG => 'english', QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; -use constant MID_MAX => SHA1HEX_LEN; - # setup prefixes my %bool_pfx_internal = ( type => 'T', # "mail" or "ghost" @@ -54,13 +51,6 @@ while (my ($k, $v) = each %all_pfx) { my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail'); -# this is idempotent -sub mid_compressed { - my ($mid) = @_; - return $mid if (length($mid) <= MID_MAX); - Digest::SHA::sha1_hex($mid); -} - sub new { my ($class, $git_dir, $writable) = @_; # allow concurrent versions for easier rollback: @@ -86,7 +76,7 @@ sub add_message { my $db = $self->{xdb}; my $doc_id; - my $mid = clean_mid($mime->header('Message-ID')); + my $mid = mid_clean($mime->header('Message-ID')); $mid = mid_compressed($mid); my $was_ghost = 0; my $ct_msg = $mime->header('Content-Type') || 'text/plain'; @@ -211,7 +201,7 @@ sub remove_message { my ($self, $mid) = @_; my $db = $self->{xdb}; my $doc_id; - $mid = clean_mid($mid); + $mid = mid_clean($mid); $mid = mid_compressed($mid); $db->begin_transaction; @@ -241,7 +231,7 @@ sub query { # given a message ID, get replies to a message sub get_replies { my ($self, $mid, $opts) = @_; - $mid = clean_mid($mid); + $mid = mid_clean($mid); $mid = mid_compressed($mid); my $qp = $self->qp; my $irt = $qp->parse_query("inreplyto:$mid", 0); @@ -344,15 +334,6 @@ sub date_range_processor { $_[0]->{drp} ||= Search::Xapian::DateValueRangeProcessor->new(TS); } -sub clean_mid { - my ($mid) = @_; - defined($mid) or die "no Message-ID"; - # MDA->precheck did more checking for us - $mid =~ s/\A\s*?\s*\z//; - $mid; -} - sub link_message { my ($self, $smsg, $is_ghost) = @_; @@ -410,7 +391,7 @@ sub link_message_to_parents { sub lookup_message { my ($self, $mid) = @_; - $mid = clean_mid($mid); + $mid = mid_clean($mid); $mid = mid_compressed($mid); my $doc_id = $self->find_unique_doc_id('mid', $mid); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 30759a30..c2dbb7ed 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -3,11 +3,12 @@ package PublicInbox::View; use strict; use warnings; -use PublicInbox::Hval; use URI::Escape qw/uri_escape_utf8/; use Encode qw/find_encoding/; use Encode::MIME::Header; use Email::MIME::ContentType qw/parse_content_type/; +use PublicInbox::Hval; +use PublicInbox::MID qw/mid_clean mid_compressed/; require POSIX; # TODO: make these constants tunable @@ -366,12 +367,9 @@ sub linkify_refs { } @_); } -require Digest::SHA; sub anchor_for { my ($msgid) = @_; - $msgid =~ s/\A\s*?\s*\z//; - 'm' . Digest::SHA::sha1_hex($msgid); + 'm' . mid_compressed(mid_clean($msgid)); } 1; -- cgit v1.2.3-24-ge0c7