From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 5D2D71FB05 for ; Sun, 24 Jan 2021 11:46:56 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 8/9] smsg: make parse_references an object method Date: Sun, 24 Jan 2021 04:46:54 -0700 Message-Id: <20210124114655.12815-9-e@80x24.org> In-Reply-To: <20210124114655.12815-1-e@80x24.org> References: <20210124114655.12815-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Having parse_references in OverIdx was awkward and Smsg is a better place for it. --- lib/PublicInbox/LeiXSearch.pm | 3 +-- lib/PublicInbox/OverIdx.pm | 22 +--------------------- lib/PublicInbox/SearchIdx.pm | 2 +- lib/PublicInbox/Smsg.pm | 22 +++++++++++++++++++++- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 2bedf000..841257c1 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -169,7 +169,7 @@ sub each_eml { # callback for MboxReader->mboxrd my ($eml, $self, $lei, $each_smsg) = @_; my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); - PublicInbox::OverIdx::parse_references($smsg, $eml, mids($eml)); + $smsg->parse_references($eml, mids($eml)); $smsg->{$_} //= '' for qw(from to cc ds subject references mid); delete @$smsg{qw(From Subject -ds -ts)}; if (my $startq = delete($self->{5})) { wait_startq($startq) } @@ -381,7 +381,6 @@ sub ipc_atfork_prepare { my ($self) = @_; if (exists $self->{remotes}) { require PublicInbox::MboxReader; - require PublicInbox::OverIdx; # parse_references require IO::Uncompress::Gunzip; } # FDS: (0: done_wr, 1: stdout|mbox, 2: stderr, diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index e606dcf5..985c5473 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -243,26 +243,6 @@ sub link_refs { $tid; } -sub parse_references ($$$) { - my ($smsg, $hdr, $mids) = @_; - my $refs = references($hdr); - push(@$refs, @$mids) if scalar(@$mids) > 1; - return $refs if scalar(@$refs) == 0; - - # prevent circular references here: - my %seen = ( $smsg->{mid} => 1 ); - my @keep; - foreach my $ref (@$refs) { - if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) { - warn "References: <$ref> too long, ignoring\n"; - next; - } - push(@keep, $ref) unless $seen{$ref}++; - } - $smsg->{references} = '<'.join('> <', @keep).'>' if @keep; - \@keep; -} - # normalize subjects so they are suitable as pathnames for URLs # XXX: consider for removal sub subject_path ($) { @@ -283,7 +263,7 @@ sub add_overview { my ($self, $eml, $smsg) = @_; $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; my $mids = mids_for_index($eml); - my $refs = parse_references($smsg, $eml, $mids); + my $refs = $smsg->parse_references($eml, $mids); $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid}; $smsg->{mid} //= ''; my $subj = $smsg->{subject}; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 7f7b980d..826302de 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -380,7 +380,7 @@ sub eml2doc ($$$;$) { if (!$self->{-skip_docdata}) { # WWW doesn't need {to} or {cc}, only NNTP $smsg->{to} = $smsg->{cc} = ''; - PublicInbox::OverIdx::parse_references($smsg, $eml, $mids); + $smsg->parse_references($eml, $mids); my $data = $smsg->to_doc_data; $doc->set_data($data); } diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index c6ff7f52..2b72e8b5 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -12,7 +12,7 @@ use strict; use warnings; use base qw(Exporter); our @EXPORT_OK = qw(subject_normalized); -use PublicInbox::MID qw(mids); +use PublicInbox::MID qw(mids references); use PublicInbox::Address; use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); @@ -69,6 +69,26 @@ sub psgi_cull ($) { $self; } +sub parse_references ($$$) { + my ($smsg, $hdr, $mids) = @_; + my $refs = references($hdr); + push(@$refs, @$mids) if scalar(@$mids) > 1; + return $refs if scalar(@$refs) == 0; + + # prevent circular references here: + my %seen = ( $smsg->{mid} => 1 ); + my @keep; + foreach my $ref (@$refs) { + if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) { + warn "References: <$ref> too long, ignoring\n"; + next; + } + push(@keep, $ref) unless $seen{$ref}++; + } + $smsg->{references} = '<'.join('> <', @keep).'>' if @keep; + \@keep; +} + # used for v2, Import and v1 non-SQLite WWW code paths sub populate { my ($self, $hdr, $sync) = @_;