* [PATCH 1/3] disallow "\t" and "\n" in OVER headers
2018-04-20 3:27 [PATCH 0/3] searchidx fixes for NNTP XOVER Eric Wong
@ 2018-04-20 3:27 ` Eric Wong
2018-04-20 3:27 ` [PATCH 2/3] searchidx: release lock again during v1 batch callback Eric Wong
2018-04-20 3:27 ` [PATCH 3/3] searchidx: remove leftover debugging code Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2018-04-20 3:27 UTC (permalink / raw)
To: meta
For Subject/To/Cc/From headers, we squeeze them to a space (' ').
For Message-IDs (including References/In-Reply-To), '\t', '\n', '\r'
are deleted since some MUAs might screw them up:
https://public-inbox.org/git/656C30A1EFC89F6B2082D9B6@localhost/raw
---
lib/PublicInbox/MID.pm | 1 +
lib/PublicInbox/SearchMsg.pm | 2 +-
t/mid.t | 11 +++++++++++
t/nntpd.t | 17 ++++++++++-------
4 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index c82e840..cd56f27 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -87,6 +87,7 @@ sub uniq_mids ($) {
my @ret;
my %seen;
foreach my $mid (@$mids) {
+ $mid =~ tr/\n\t\r//d;
if (length($mid) > MAX_MID_SIZE) {
warn "Message-ID: <$mid> too long, truncating\n";
$mid = substr($mid, 0, MAX_MID_SIZE);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index ab971e0..c7787ea 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -100,7 +100,7 @@ sub __hdr ($$) {
my $mime = $self->{mime} or return;
$val = $mime->header($field);
$val = '' unless defined $val;
- $val =~ tr/\n/ /;
+ $val =~ tr/\t\n/ /;
$val =~ tr/\r//d;
$self->{$field} = $val;
}
diff --git a/t/mid.t b/t/mid.t
index 223be79..8c307c8 100644
--- a/t/mid.t
+++ b/t/mid.t
@@ -25,6 +25,17 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
$mime->header_set('In-Reply-To', '<weld>');
is_deeply(['hello', 'world', 'weld'], references($mime->header_obj),
'references combines with In-Reply-To');
+
+ $mime->header_set('References', "<hello>\n\t<world>");
+ $mime->header_set('In-Reply-To');
+ is_deeply(references($mime->header_obj), ['hello', 'world'],
+ 'multiline References OK');
+ $mime->header_set('References', "<hello\tworld>");
+ is_deeply(references($mime->header_obj), ['helloworld'],
+ 'drop \t in References <656C30A1EFC89F6B2082D9B6@localhost>');
+ $mime->header_set('Message-ID', "<hello\tworld>");
+ is_deeply(mids($mime->header_obj), ['helloworld'],
+ 'drop \t in Message-ID');
}
done_testing();
diff --git a/t/nntpd.t b/t/nntpd.t
index 3698f98..960e83c 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -80,9 +80,10 @@ From: =?utf-8?Q?El=C3=A9anor?= <me\@example.com>
Cc: $addr
Message-Id: <nntp\@example.com>
Content-Type: text/plain; charset=utf-8
-Subject: Testing for =?utf-8?Q?El=C3=A9anor?=
+Subject: Testing for =?utf-8?Q?El=C3=A9anor?=
Date: Thu, 01 Jan 1970 06:06:06 +0000
Content-Transfer-Encoding: 8bit
+References: <ref tab squeezed>
This is a test message for El\xc3\xa9anor
EOF
@@ -139,7 +140,8 @@ EOF
'from' => "El\xc3\xa9anor <me\@example.com>",
'to' => "El\xc3\xa9anor <you\@example.com>",
'cc' => $addr,
- 'xref' => "example.com $group:1"
+ 'xref' => "example.com $group:1",
+ 'references' => '<reftabsqueezed>',
);
my $s = IO::Socket::INET->new(%opts);
@@ -189,7 +191,7 @@ EOF
"El\xc3\xa9anor <me\@example.com>",
'Thu, 01 Jan 1970 06:06:06 +0000',
'<nntp@example.com>',
- '',
+ '<reftabsqueezed>',
$len,
'1' ] }, "XOVER range works");
@@ -198,7 +200,7 @@ EOF
"El\xc3\xa9anor <me\@example.com>",
'Thu, 01 Jan 1970 06:06:06 +0000',
'<nntp@example.com>',
- '',
+ '<reftabsqueezed>',
$len,
'1' ] }, "XOVER by article works");
@@ -220,14 +222,15 @@ EOF
is($r[1], "0\tTesting for El\xc3\xa9anor\t" .
"El\xc3\xa9anor <me\@example.com>\t" .
"Thu, 01 Jan 1970 06:06:06 +0000\t" .
- "$mid\t\t$len\t1", 'OVER by Message-ID works');
+ "$mid\t<reftabsqueezed>\t$len\t1",
+ 'OVER by Message-ID works');
is($r[2], '.', 'correctly terminated response');
}
is_deeply($n->xhdr(qw(Cc 1-)), { 1 => 'test-nntpd@example.com' },
'XHDR Cc 1- works');
- is_deeply($n->xhdr(qw(References 1-)), { 1 => '' },
- 'XHDR References 1- works (empty string)');
+ is_deeply($n->xhdr(qw(References 1-)), { 1 => '<reftabsqueezed>' },
+ 'XHDR References 1- works)');
is_deeply($n->xhdr(qw(list-id 1-)), {},
'XHDR on invalid header returns empty');
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] searchidx: remove leftover debugging code
2018-04-20 3:27 [PATCH 0/3] searchidx fixes for NNTP XOVER Eric Wong
2018-04-20 3:27 ` [PATCH 1/3] disallow "\t" and "\n" in OVER headers Eric Wong
2018-04-20 3:27 ` [PATCH 2/3] searchidx: release lock again during v1 batch callback Eric Wong
@ 2018-04-20 3:27 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2018-04-20 3:27 UTC (permalink / raw)
To: meta
I was using this to trace the path of brian's message.
Fixes: 017fed7bc4d33ac4
("searchidx: regenerate and avoid article number gaps on full index")
---
lib/PublicInbox/SearchIdx.pm | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 8c3bc2d..4dc8135 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -551,16 +551,11 @@ sub read_log {
my %D;
my $line;
my $newest;
- my $mid = '20170114215743.5igbjup6qpsh3jfg@genre.crustytoothpaste.net';
while (defined($line = <$log>)) {
if ($line =~ /$addmsg/o) {
my $blob = $1;
delete $D{$blob} and next;
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
- my $mids = mids($mime->header_obj);
- foreach (@$mids) {
- warn "ADD $mid\n" if ($_ eq $mid);
- }
batch_adjust(\$max, $bytes, $batch_cb, $latest);
$add_cb->($self, $mime, $bytes, $blob);
} elsif ($line =~ /$delmsg/o) {
@@ -574,10 +569,6 @@ sub read_log {
# get the leftovers
foreach my $blob (keys %D) {
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
- my $mids = mids($mime->header_obj);
- foreach (@$mids) {
- warn "DEL $mid\n" if ($_ eq $mid);
- }
$del_cb->($self, $mime);
}
$batch_cb->($latest, $newest);
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread