user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] use raw header for Message-ID
@ 2016-03-03  3:23 Eric Wong
  2016-03-03  7:37 ` [PATCH 2/1] view: fix stupid typo in inline_dump Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2016-03-03  3:23 UTC (permalink / raw)
  To: meta

Message-IDs should not be MIME encoded, but in case they are,
use the raw form for compatibility with ssoma and possibly
other tools.  This prevents a potential problem where a
malicious client could confuse our storage layer into indexing
incorrect contents.
---
 Makefile.PL                   |  1 +
 lib/PublicInbox/Feed.pm       |  2 +-
 lib/PublicInbox/MDA.pm        |  3 ++-
 lib/PublicInbox/MID.pm        |  4 +++-
 lib/PublicInbox/SearchIdx.pm  | 19 ++++++++++---------
 lib/PublicInbox/SearchMsg.pm  |  8 +-------
 lib/PublicInbox/SearchView.pm |  4 ++--
 lib/PublicInbox/View.pm       | 28 ++++++++++++++--------------
 lib/PublicInbox/WWW.pm        |  4 ++--
 9 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/Makefile.PL b/Makefile.PL
index 8582203..904b6ad 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -22,6 +22,7 @@ WriteMakefile(
 		'Email::Filter' => 0,
 		'Email::MIME' => 0,
 		'Email::MIME::ContentType' => 0,
+		'Email::Simple' => 0,
 		'Encode::MIME::Header' => 0,
 		'File::Path::Expand' => 0,
 		'IPC::Run' => 0,
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 54cbf23..65375fa 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -320,7 +320,7 @@ sub add_to_feed {
 	my $midurl = $feed_opts->{midurl};
 
 	my $header_obj = $mime->header_obj;
-	my $mid = $header_obj->header('Message-ID');
+	my $mid = $header_obj->header_raw('Message-ID');
 	defined $mid or return 0;
 	$mid = PublicInbox::Hval->new_msgid($mid);
 	my $href = $mid->as_href;
diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm
index ba5f36b..003bac6 100644
--- a/lib/PublicInbox/MDA.pm
+++ b/lib/PublicInbox/MDA.pm
@@ -5,6 +5,7 @@
 package PublicInbox::MDA;
 use strict;
 use warnings;
+use Email::Simple;
 use Email::Address;
 use Date::Parse qw(strptime);
 use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable
@@ -21,7 +22,7 @@ sub __drop_plus {
 # do not allow Bcc, only Cc and To if recipient is set
 sub precheck {
 	my ($klass, $filter, $address) = @_;
-	my $simple = $filter->simple;
+	my Email::Simple $simple = $filter->simple;
 	my $mid = $simple->header("Message-ID");
 	return 0 if (length($mid) > MAX_MID_SIZE);
 	return 0 unless usable_str(length('<m@h>'), $mid) && $mid =~ /\@/;
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 169ffa7..78952b9 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -6,7 +6,7 @@ package PublicInbox::MID;
 use strict;
 use warnings;
 use base qw/Exporter/;
-our @EXPORT_OK = qw/mid_clean id_compress mid2path/;
+our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime/;
 use Digest::SHA qw/sha1_hex/;
 use constant MID_MAX => 40; # SHA-1 hex length
 
@@ -42,4 +42,6 @@ sub mid2path {
 	"$x2/$x38";
 }
 
+sub mid_mime ($) { $_[0]->header_obj->header_raw('Message-ID') }
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 415decd..63be681 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -10,7 +10,7 @@ package PublicInbox::SearchIdx;
 use strict;
 use warnings;
 use base qw(PublicInbox::Search);
-use PublicInbox::MID qw/mid_clean id_compress/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime/;
 require PublicInbox::Git;
 *xpfx = *PublicInbox::Search::xpfx;
 
@@ -54,7 +54,7 @@ sub add_message {
 	my $db = $self->{xdb};
 
 	my $doc_id;
-	my $mid = mid_clean($mime->header('Message-ID'));
+	my $mid = mid_clean(mid_mime($mime));
 	my $was_ghost = 0;
 	my $ct_msg = $mime->header('Content-Type') || 'text/plain';
 
@@ -222,9 +222,10 @@ sub link_message_to_parents {
 	my $doc = $smsg->{doc};
 	my $mid = $smsg->mid;
 	my $mime = $smsg->mime;
-	my $refs = $mime->header('References');
+	my $hdr = $mime->header_obj;
+	my $refs = $hdr->header_raw('References');
 	my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
-	if (my $irt = $mime->header('In-Reply-To')) {
+	if (my $irt = $hdr->header_raw('In-Reply-To')) {
 		# last References should be $irt
 		# we will de-dupe later
 		push @refs, mid_clean($irt);
@@ -274,29 +275,29 @@ sub index_blob {
 
 sub unindex_blob {
 	my ($self, $git, $mime) = @_;
-	my $mid = mid_clean($mime->header('Message-ID'));
+	my $mid = eval { mid_clean(mid_mime($mime)) };
 	$self->remove_message($mid) if defined $mid;
 }
 
 sub index_mm {
 	my ($self, $git, $mime) = @_;
-	$self->{mm}->mid_insert(mid_clean($mime->header('Message-ID')));
+	$self->{mm}->mid_insert(mid_clean(mid_mime($mime)));
 }
 
 sub unindex_mm {
 	my ($self, $git, $mime) = @_;
-	$self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
+	$self->{mm}->mid_delete(mid_clean(mid_mime($mime)));
 }
 
 sub index_mm2 {
 	my ($self, $git, $mime, $bytes) = @_;
-	my $num = $self->{mm}->num_for(mid_clean($mime->header('Message-ID')));
+	my $num = $self->{mm}->num_for(mid_clean(mid_mime($mime)));
 	index_blob($self, $git, $mime, $bytes, $num);
 }
 
 sub unindex_mm2 {
 	my ($self, $git, $mime) = @_;
-	$self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
+	$self->{mm}->mid_delete(mid_clean(mid_mime($mime)));
 	unindex_blob($self, $git, $mime);
 }
 
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 1b33d09..477ffff 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -8,7 +8,6 @@ use strict;
 use warnings;
 use Search::Xapian;
 use Email::Address qw//;
-use Email::Simple qw//;
 use POSIX qw//;
 use Date::Parse qw/str2time/;
 use PublicInbox::MID qw/mid_clean/;
@@ -189,12 +188,7 @@ sub mid ($;$) {
 	}
 }
 
-sub _extract_mid {
-	my ($self) = @_;
-
-	my $mid = $self->mime->header('Message-ID');
-	defined $mid ? mid_clean($mid) : $mid;
-}
+sub _extract_mid { mid_clean(mid_mime($_[0]->mime)) }
 
 sub mime {
 	my ($self, $mime) = @_;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 746cb83..36522a3 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -8,7 +8,7 @@ use warnings;
 use PublicInbox::SearchMsg;
 use PublicInbox::Hval;
 use PublicInbox::View;
-use PublicInbox::MID qw(mid2path mid_clean);
+use PublicInbox::MID qw(mid2path mid_clean mid_mime);
 use Email::MIME;
 require PublicInbox::Git;
 our $LIM = 50;
@@ -195,7 +195,7 @@ sub tdump_ent {
 
 	if ($mime) {
 		# lazy load the full message from mini_mime:
-		my $mid = $mime->header('Message-ID');
+		my $mid = mid_mime($mime);
 		$mime = eval {
 			my $path = mid2path(mid_clean($mid));
 			Email::MIME->new($git->cat_file('HEAD:'.$path));
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4692b22..867ed6f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -13,7 +13,7 @@ use Encode::MIME::Header;
 use Email::MIME::ContentType qw/parse_content_type/;
 use PublicInbox::Hval;
 use PublicInbox::Linkify;
-use PublicInbox::MID qw/mid_clean id_compress mid2path/;
+use PublicInbox::MID qw/mid_clean id_compress mid2path mid_mime/;
 require POSIX;
 
 # TODO: make these constants tunable
@@ -51,7 +51,7 @@ sub msg_reply {
 	my $f = $hdr->header('From');
 	$f = '' unless defined $f;
 	$s = PublicInbox::Hval->new_oneline($s);
-	my $mid = $hdr->header('Message-ID');
+	my $mid = $hdr->header_raw('Message-ID');
 	$mid = PublicInbox::Hval->new_msgid($mid);
 	my $t = $s->as_html;
 	my $se_url =
@@ -92,11 +92,11 @@ sub feed_entry {
 
 sub in_reply_to {
 	my ($hdr) = @_;
-	my $irt = $hdr->header('In-Reply-To');
+	my $irt = $hdr->header_raw('In-Reply-To');
 
 	return mid_clean($irt) if (defined $irt);
 
-	my $refs = $hdr->header('References');
+	my $refs = $hdr->header_raw('References');
 	if ($refs && $refs =~ /<([^>]+)>\s*\z/s) {
 		return $1;
 	}
@@ -115,7 +115,7 @@ sub index_entry {
 	my $enc = enc_for($hdr->header("Content-Type"));
 	my $subj = $hdr->header('Subject');
 
-	my $mid_raw = mid_clean($hdr->header('Message-ID'));
+	my $mid_raw = mid_clean(mid_mime($mime));
 	my $id = anchor_for($mid_raw);
 	my $seen = $state->{seen};
 	$seen->{$id} = "#$id"; # save the anchor for children, later
@@ -409,7 +409,7 @@ sub headers_to_html_header {
 	my $srch = $ctx->{srch} if $ctx;
 	my $rv = "";
 	my @title;
-	my $mid = $hdr->header('Message-ID');
+	my $mid = $hdr->header_raw('Message-ID');
 	$mid = PublicInbox::Hval->new_msgid($mid);
 	foreach my $h (qw(From To Cc Subject Date)) {
 		my $v = $hdr->header($h);
@@ -452,7 +452,7 @@ sub headers_to_html_header {
 sub thread_inline {
 	my ($dst, $ctx, $hdr, $upfx) = @_;
 	my $srch = $ctx->{srch};
-	my $mid = mid_clean($hdr->header('Message-ID'));
+	my $mid = mid_clean($hdr->header_raw('Message-ID'));
 	my $res = $srch->get_thread($mid);
 	my $nr = $res->{total};
 	my $expand = "<a\nhref=\"${upfx}t/#u\">expand</a> " .
@@ -509,7 +509,7 @@ sub _parent_headers_nosrch {
 		$rv .= "<a\nhref=\"../$href/\">$html</a>&gt;\n";
 	}
 
-	my $refs = $hdr->header('References');
+	my $refs = $hdr->header_raw('References');
 	if ($refs) {
 		# avoid redundant URLs wasting bandwidth
 		my %seen;
@@ -550,7 +550,7 @@ sub mailto_arg_link {
 
 	my $subj = $hdr->header('Subject') || '';
 	$subj = "Re: $subj" unless $subj =~ /\bRe:/i;
-	my $mid = $hdr->header('Message-ID');
+	my $mid = $hdr->header_raw('Message-ID');
 	push @arg, "--in-reply-to='" . ascii_html($mid) . "'";
 	my $irt = uri_escape_utf8($mid);
 	delete $cc{$to};
@@ -637,7 +637,7 @@ sub thread_html_head {
 
 sub pre_anchor_entry {
 	my ($seen, $mime) = @_;
-	my $id = anchor_for($mime->header('Message-ID'));
+	my $id = anchor_for(mid_mime($mime));
 	$seen->{$id} = "#$id"; # save the anchor for children, later
 }
 
@@ -690,7 +690,7 @@ sub __thread_entry {
 
 	# lazy load the full message from mini_mime:
 	$mime = eval {
-		my $path = mid2path(mid_clean($mime->header('Message-ID')));
+		my $path = mid2path(mid_clean(mid_mime($mime)));
 		Email::MIME->new($git->cat_file('HEAD:'.$path));
 	} or return;
 
@@ -780,7 +780,7 @@ sub _inline_header {
 	my $dot = $level == 0 ? '' : '` ';
 
 	my $cur = $state->{cur};
-	my $mid = mid_clean($hdr->header('Message-ID'));
+	my $mid = mid_clean($hdr->header_raw('Message-ID'));
 	my $f = $hdr->header('X-PI-From');
 	my $d = _msg_date($hdr);
 	$f = PublicInbox::Hval->new_oneline($f)->as_html;
@@ -833,7 +833,7 @@ sub inline_dump {
 	return unless $node;
 	if (my $mime = $node->message) {
 		my $hdr = $mime->header_obj;
-		my $mid = mid_clean($hdr->header('Message-ID'));
+		my $mid = mid_clean($hdr->header_obj('Message-ID'));
 		if ($mid eq $state->{parent_cmp}) {
 			$state->{parent} = $mid;
 		}
@@ -881,7 +881,7 @@ sub add_topic {
 			push @{$state->{order}}, [ $level, $subj ];
 		}
 
-		my $mid = mid_clean($x->header('Message-ID'));
+		my $mid = mid_clean($x->header_raw('Message-ID'));
 
 		my $ts = $x->header('X-PI-TS');
 		my $exist = $state->{latest}->{$subj};
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2acb4c8..369be68 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -147,8 +147,8 @@ sub invalid_list_mid {
 		if ($mid = mid2blob($ctx)) {
 			require Email::Simple;
 			use PublicInbox::MID qw/mid_clean/;
-			$mid = Email::Simple->new($mid);
-			$ctx->{mid} = mid_clean($mid->header('Message-ID'));
+			my $s = Email::Simple->new($mid);
+			$ctx->{mid} = mid_clean($s->header('Message-ID'));
 		}
 	}
 	undef;
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/1] view: fix stupid typo in inline_dump
  2016-03-03  3:23 [PATCH] use raw header for Message-ID Eric Wong
@ 2016-03-03  7:37 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2016-03-03  7:37 UTC (permalink / raw)
  To: meta

Ugh, this enabled-iff-xapian-is-available code really
needs better testing...
---
 lib/PublicInbox/View.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 867ed6f..3522bf4 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -833,7 +833,7 @@ sub inline_dump {
 	return unless $node;
 	if (my $mime = $node->message) {
 		my $hdr = $mime->header_obj;
-		my $mid = mid_clean($hdr->header_obj('Message-ID'));
+		my $mid = mid_clean($hdr->header_raw('Message-ID'));
 		if ($mid eq $state->{parent_cmp}) {
 			$state->{parent} = $mid;
 		}
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-03-03  7:37 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-03  3:23 [PATCH] use raw header for Message-ID Eric Wong
2016-03-03  7:37 ` [PATCH 2/1] view: fix stupid typo in inline_dump Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).