From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 8/9] smsg: make parse_references an object method
Date: Sun, 24 Jan 2021 04:46:54 -0700 [thread overview]
Message-ID: <20210124114655.12815-9-e@80x24.org> (raw)
In-Reply-To: <20210124114655.12815-1-e@80x24.org>
Having parse_references in OverIdx was awkward and Smsg is
a better place for it.
---
lib/PublicInbox/LeiXSearch.pm | 3 +--
lib/PublicInbox/OverIdx.pm | 22 +---------------------
lib/PublicInbox/SearchIdx.pm | 2 +-
lib/PublicInbox/Smsg.pm | 22 +++++++++++++++++++++-
4 files changed, 24 insertions(+), 25 deletions(-)
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 2bedf000..841257c1 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -169,7 +169,7 @@ sub each_eml { # callback for MboxReader->mboxrd
my ($eml, $self, $lei, $each_smsg) = @_;
my $smsg = bless {}, 'PublicInbox::Smsg';
$smsg->populate($eml);
- PublicInbox::OverIdx::parse_references($smsg, $eml, mids($eml));
+ $smsg->parse_references($eml, mids($eml));
$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
delete @$smsg{qw(From Subject -ds -ts)};
if (my $startq = delete($self->{5})) { wait_startq($startq) }
@@ -381,7 +381,6 @@ sub ipc_atfork_prepare {
my ($self) = @_;
if (exists $self->{remotes}) {
require PublicInbox::MboxReader;
- require PublicInbox::OverIdx; # parse_references
require IO::Uncompress::Gunzip;
}
# FDS: (0: done_wr, 1: stdout|mbox, 2: stderr,
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index e606dcf5..985c5473 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -243,26 +243,6 @@ sub link_refs {
$tid;
}
-sub parse_references ($$$) {
- my ($smsg, $hdr, $mids) = @_;
- my $refs = references($hdr);
- push(@$refs, @$mids) if scalar(@$mids) > 1;
- return $refs if scalar(@$refs) == 0;
-
- # prevent circular references here:
- my %seen = ( $smsg->{mid} => 1 );
- my @keep;
- foreach my $ref (@$refs) {
- if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
- warn "References: <$ref> too long, ignoring\n";
- next;
- }
- push(@keep, $ref) unless $seen{$ref}++;
- }
- $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
- \@keep;
-}
-
# normalize subjects so they are suitable as pathnames for URLs
# XXX: consider for removal
sub subject_path ($) {
@@ -283,7 +263,7 @@ sub add_overview {
my ($self, $eml, $smsg) = @_;
$smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
my $mids = mids_for_index($eml);
- my $refs = parse_references($smsg, $eml, $mids);
+ my $refs = $smsg->parse_references($eml, $mids);
$mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid};
$smsg->{mid} //= '';
my $subj = $smsg->{subject};
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 7f7b980d..826302de 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -380,7 +380,7 @@ sub eml2doc ($$$;$) {
if (!$self->{-skip_docdata}) {
# WWW doesn't need {to} or {cc}, only NNTP
$smsg->{to} = $smsg->{cc} = '';
- PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
+ $smsg->parse_references($eml, $mids);
my $data = $smsg->to_doc_data;
$doc->set_data($data);
}
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index c6ff7f52..2b72e8b5 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -12,7 +12,7 @@ use strict;
use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw(subject_normalized);
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
@@ -69,6 +69,26 @@ sub psgi_cull ($) {
$self;
}
+sub parse_references ($$$) {
+ my ($smsg, $hdr, $mids) = @_;
+ my $refs = references($hdr);
+ push(@$refs, @$mids) if scalar(@$mids) > 1;
+ return $refs if scalar(@$refs) == 0;
+
+ # prevent circular references here:
+ my %seen = ( $smsg->{mid} => 1 );
+ my @keep;
+ foreach my $ref (@$refs) {
+ if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
+ warn "References: <$ref> too long, ignoring\n";
+ next;
+ }
+ push(@keep, $ref) unless $seen{$ref}++;
+ }
+ $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
+ \@keep;
+}
+
# used for v2, Import and v1 non-SQLite WWW code paths
sub populate {
my ($self, $hdr, $sync) = @_;
next prev parent reply other threads:[~2021-01-24 11:46 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-24 11:46 [PATCH 0/9] lei remotes fixes and updates Eric Wong
2021-01-24 11:46 ` [PATCH 1/9] lei q: limit concurrency to 4 remote connections Eric Wong
2021-01-24 11:46 ` [PATCH 2/9] ipc: wq supports arbitrarily large payloads Eric Wong
2021-01-24 11:46 ` [PATCH 3/9] ipc: get rid of wq_set_recv_modes Eric Wong
2021-01-24 11:46 ` [PATCH 4/9] lei q: disable remote externals if locals exist Eric Wong
2021-01-24 11:46 ` [PATCH 5/9] lei q: honor --no-local to force remote searches Eric Wong
2021-01-24 12:31 ` exit codes [was: [PATCH 5/9] lei q: honor --no-local to force remote searches] Eric Wong
2021-01-24 11:46 ` [PATCH 6/9] lei_xsearch: use curl -d '' for nginx compatibility Eric Wong
2021-01-24 11:46 ` [PATCH 7/9] lei q: fix JSON overview with remote externals Eric Wong
2021-01-24 12:37 ` Eric Wong
2021-01-24 11:46 ` Eric Wong [this message]
2021-01-24 11:46 ` [PATCH 9/9] smsg: parse_references: micro-optimization Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210124114655.12815-9-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).