user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] pop3: support initial_limit parameter in mailbox name
  2023-09-19 21:28  7%               ` Konstantin Ryabitsev
@ 2023-09-22  2:18  5%                 ` Eric Wong
  0 siblings, 0 replies; 13+ results
From: Eric Wong @ 2023-09-22  2:18 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> I'm game with that. Maybe even shorten that to l= and il=? I'm still worried
> about the field size limit a bit.

I suspect they're not needed, but we can always add them later
if they are[1].  Longer parameters are better for documentation
purposes since they're user-facing.

[1] it's safe to stuff arbitrarily long `&foo=bar' pairs in the
    query parameters if someone is able to test a bunch of clients.
    `apt-get source mpop' reveals it handles lines up to 500 bytes
    (`#define LINEBUFSIZE 501' in src/conf.c).

-------8<-------
Subject: [PATCH] pop3: support initial_limit parameter in mailbox name

`initial_limit' only affects the fetch for new users while
`limit' affects all users.  `initial_limit' is intended to be
better than the existing, absolute `limit' for 24/7 servers
(e.g. scheduled POP3 imports via webmail).  The existing `limit'
parameter remains and is best suited for systems with sporadic
Internet access.

Link: https://public-inbox.org/meta/20230918-barrel-unhearing-b63869@meerkat/
Fixes: 392d251f97d4 (pop3: support `?limit=$NUM' parameter in mailbox name)
---
 MANIFEST                 |   1 +
 lib/PublicInbox/POP3.pm  |  46 +++++++-----
 lib/PublicInbox/POP3D.pm |   5 +-
 t/pop3d-limit.t          | 146 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 180 insertions(+), 18 deletions(-)
 create mode 100644 t/pop3d-limit.t

diff --git a/MANIFEST b/MANIFEST
index 7dba3836..4693cbe0 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -558,6 +558,7 @@ t/plack-2-txt-bodies.eml
 t/plack-attached-patch.eml
 t/plack-qp.eml
 t/plack.t
+t/pop3d-limit.t
 t/pop3d.t
 t/precheck.t
 t/psgi_attach.eml
diff --git a/lib/PublicInbox/POP3.pm b/lib/PublicInbox/POP3.pm
index f97eccfd..6d24b17c 100644
--- a/lib/PublicInbox/POP3.pm
+++ b/lib/PublicInbox/POP3.pm
@@ -62,7 +62,7 @@ sub new {
 	(bless { pop3d => $pop3d }, $cls)->greet($sock)
 }
 
-# POP user is $UUID1@$NEWSGROUP.$SLICE
+# POP user is $UUID1@$NEWSGROUP[.$SLICE][?QUERY_ARGS]
 sub cmd_user ($$) {
 	my ($self, $mailbox) = @_;
 	$self->{salt} // return \"-ERR already authed\r\n";
@@ -72,26 +72,25 @@ sub cmd_user ($$) {
 	$user =~ tr/-//d; # most have dashes, some (dbus-uuidgen) don't
 	$user =~ m!\A[a-f0-9]{32}\z!i or return \"-ERR user has no UUID\r\n";
 
-	my $limit;
-	$mailbox =~ s/\?limit=([0-9]+)\z// and
-		$limit = $1 > UID_SLICE ? UID_SLICE : $1;
-
+	my %l;
+	if ($mailbox =~ s/\?(.*)\z//) { # query args
+		for (split(/&+/, $1)) {
+			/\A(initial_limit|limit)=([0-9]+)\z/ and $l{$1} = $2;
+		}
+		$self->{limits} = \%l;
+	}
 	my $slice = $mailbox =~ s/\.([0-9]+)\z// ? $1 + 0 : undef;
 
 	my $ibx = $self->{pop3d}->{pi_cfg}->lookup_newsgroup($mailbox) //
 		return \"-ERR $mailbox does not exist\r\n";
-	my $uidmax = $ibx->mm(1)->num_highwater // 0;
+	my $uidmax = $self->{uidmax} = $ibx->mm(1)->num_highwater // 0;
 	if (defined $slice) {
 		my $max = int($uidmax / UID_SLICE);
 		my $tip = "$mailbox.$max";
 		return \"-ERR $mailbox.$slice does not exist ($tip does)\r\n"
 			if $slice > $max;
-		$limit //= UID_SLICE;
-		$self->{uid_base} = ($slice * UID_SLICE) + UID_SLICE - $limit;
 		$self->{slice} = $slice;
-	} else { # latest $limit messages, 1k if unspecified
-		my $base = $uidmax - ($limit // 1000);
-		$self->{uid_base} = $base < 0 ? 0 : $base;
+	} else { # latest messages:
 		$self->{slice} = -1;
 	}
 	$self->{ibx} = $ibx;
@@ -102,12 +101,27 @@ sub cmd_user ($$) {
 
 sub _login_ok ($) {
 	my ($self) = @_;
-	if ($self->{pop3d}->lock_mailbox($self)) {
-		$self->{uid_max} = $self->{ibx}->over(1)->max;
-		\"+OK logged in\r\n";
-	} else {
-		\"-ERR [IN-USE] unable to lock maildrop\r\n";
+	$self->{pop3d}->lock_mailbox($self) or
+		return \"-ERR [IN-USE] unable to lock maildrop\r\n";
+
+	my $l = delete $self->{limits};
+	$l = defined($self->{uid_dele}) ? $l->{limit}
+					: ($l->{initial_limit} // $l->{limit});
+	my $uidmax = delete $self->{uidmax};
+	if ($self->{slice} >= 0) {
+		$self->{uid_base} = $self->{slice} * UID_SLICE;
+		if (defined $l) { # n.b: the last slice is not full:
+			my $max = int($uidmax/UID_SLICE) == $self->{slice} ?
+					($uidmax % UID_SLICE) : UID_SLICE;
+			my $off = $max - $l;
+			$self->{uid_base} += $off if $off > 0;
+		}
+	} else { # latest $l messages, or 1k if unspecified
+		my $base = $uidmax - ($l // 1000);
+		$self->{uid_base} = $base < 0 ? 0 : $base;
 	}
+	$self->{uid_max} = $self->{ibx}->over(1)->max;
+	\"+OK logged in\r\n";
 }
 
 sub cmd_apop {
diff --git a/lib/PublicInbox/POP3D.pm b/lib/PublicInbox/POP3D.pm
index bee668fc..2a9ccfdd 100644
--- a/lib/PublicInbox/POP3D.pm
+++ b/lib/PublicInbox/POP3D.pm
@@ -157,6 +157,7 @@ sub lock_mailbox {
 	my ($user_id, $ngid, $mbid, $txn_id);
 	my $uuid = delete $pop3->{uuid};
 	$dbh->begin_work;
+	my $creat = 0;
 
 	# 1. make sure the user exists, update `last_seen'
 	my $sth = $dbh->prepare_cached(<<'');
@@ -219,13 +220,13 @@ SELECT mailbox_id FROM mailboxes WHERE newsgroup_id = ? AND slice = ?
 	$sth = $dbh->prepare_cached(<<'');
 INSERT OR IGNORE INTO deletes (user_id,mailbox_id) VALUES (?,?)
 
-	if ($sth->execute($user_id, $mbid) == 0) {
+	if ($sth->execute($user_id, $mbid) == 0) { # fetching into existing
 		$sth = $dbh->prepare_cached(<<'', undef, 1);
 SELECT txn_id,uid_dele FROM deletes WHERE user_id = ? AND mailbox_id = ?
 
 		$sth->execute($user_id, $mbid);
 		($txn_id, $pop3->{uid_dele}) = $sth->fetchrow_array;
-	} else {
+	} else { # new user/mailbox combo
 		$txn_id = $dbh->last_insert_id(undef, undef,
 						'deletes', 'txn_id');
 	}
diff --git a/t/pop3d-limit.t b/t/pop3d-limit.t
new file mode 100644
index 00000000..39d2f918
--- /dev/null
+++ b/t/pop3d-limit.t
@@ -0,0 +1,146 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use PublicInbox::TestCommon;
+require_mods(qw(DBD::SQLite Net::POP3));
+$^O =~ /\A(?:linux|(?:free|net|open)bsd)\z/ or
+	require_mods(qw(File::FcntlLock));
+use autodie;
+my ($tmpdir, $for_destroy) = tmpdir();
+mkdir("$tmpdir/p3state");
+use PublicInbox::Eml;
+my $group = 'test.pop3d.limit';
+my $addr = 'pop3d-limit@example.com';
+
+my $add_msg = sub {
+	my ($im, $n) = @_;
+	$im->add(PublicInbox::Eml->new(<<EOM)) or die 'add dup';
+From: $n\@example.com
+Subject: msg $n
+To: $addr
+Message-ID: <mid-$n\@example.com>
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+
+body $n
+EOM
+};
+
+my $ibx = create_inbox 'pop3d-limit', version => 2, -primary_address => $addr,
+			indexlevel => 'basic', tmpdir => "$tmpdir/ibx", sub {
+	my ($im, $ibx) = @_;
+	$add_msg->($im, $_) for (1..3);
+	$im->done;
+	diag 'done';
+}; # /create_inbox
+
+my $pi_config = "$tmpdir/pi_config";
+{
+	open my $fh, '>', $pi_config;
+	print $fh <<EOF;
+[publicinbox]
+	pop3state = $tmpdir/p3state
+[publicinbox "pop3"]
+	inboxdir = $ibx->{inboxdir}
+	address = $addr
+	indexlevel = basic
+	newsgroup = $group
+EOF
+	close $fh;
+}
+
+my $plain = tcp_server();
+my $plain_addr = tcp_host_port($plain);
+my $env = { PI_CONFIG => $pi_config };
+my $p3d = start_script([qw(-pop3d -W0),
+	"--stdout=$tmpdir/out.log", "--stderr=$tmpdir/err.log" ],
+	$env, { 3 => $plain });
+my @np3args = ($plain->sockhost, Port => $plain->sockport);
+my $fetch_delete = sub {
+	my ($np3) = @_;
+	map {
+		my $msg = $np3->get($_);
+		$np3->delete($_);
+		PublicInbox::Eml->new(join('', @$msg));
+	} sort { $a <=> $b } keys %{$np3->list};
+};
+
+my $login_a = ('a'x32)."\@$group?initial_limit=2&limit=1";
+my $login_a0 = ('a'x32)."\@$group.0?initial_limit=2&limit=1";
+my $login_b = ('b'x32)."\@$group?limit=1";
+my $login_b0 = ('b'x32)."\@$group.0?limit=1";
+my $login_c = ('c'x32)."\@$group?limit=10";
+my $login_c0 = ('c'x32)."\@$group.0?limit=10";
+my $login_d = ('d'x32)."\@$group?limit=100000";
+my $login_d0 = ('d'x32)."\@$group.0?limit=100000";
+
+for my $login ($login_a, $login_a0) {
+	my $np3 = Net::POP3->new(@np3args);
+	$np3->login($login, 'anonymous') or xbail "login $login ($!)";
+	my @msg = $fetch_delete->($np3);
+	$np3->quit;
+	is_deeply([ map { $_->header('Message-ID') } @msg ],
+		[ qw(<mid-2@example.com> <mid-3@example.com>) ],
+		"initial_limit ($login)") or diag explain(\@msg);
+}
+
+for my $login ($login_b, $login_b0) {
+	my $np3 = Net::POP3->new(@np3args);
+	$np3->login($login, 'anonymous') or xbail "login $login ($!)";
+	my @msg = $fetch_delete->($np3);
+	$np3->quit;
+	is_deeply([ map { $_->header('Message-ID') } @msg ],
+		[ qw(<mid-3@example.com>) ],
+		"limit-only ($login)") or diag explain(\@msg);
+}
+
+for my $login ($login_c, $login_c0, $login_d, $login_d0) {
+	my $np3 = Net::POP3->new(@np3args);
+	$np3->login($login, 'anonymous') or xbail "login $login ($!)";
+	my @msg = $fetch_delete->($np3);
+	$np3->quit;
+	is_deeply([ map { $_->header('Message-ID') } @msg ],
+		[ qw(<mid-1@example.com> <mid-2@example.com>
+			<mid-3@example.com>) ],
+		"excessive limit ($login)") or diag explain(\@msg);
+}
+
+{ # add some new messages
+	my $im = $ibx->importer(0);
+	$add_msg->($im, $_) for (4..5);
+	$im->done;
+}
+
+for my $login ($login_a, $login_a0) {
+	my $np3 = Net::POP3->new(@np3args);
+	$np3->login($login, 'anonymous') or xbail "login $login ($!)";
+	my @msg = $fetch_delete->($np3);
+	$np3->quit;
+	is_deeply([ map { $_->header('Message-ID') } @msg ],
+		[ qw(<mid-5@example.com>) ],
+		"limit used (initial_limit ignored, $login)") or
+			diag explain(\@msg);
+}
+
+for my $login ($login_b, $login_b0) {
+	my $np3 = Net::POP3->new(@np3args);
+	$np3->login($login, 'anonymous') or xbail "login $login ($!)";
+	my @msg = $fetch_delete->($np3);
+	$np3->quit;
+	is_deeply([ map { $_->header('Message-ID') } @msg ],
+		[ qw(<mid-5@example.com>) ],
+		"limit-only after new messages ($login)") or
+		diag explain(\@msg);
+}
+
+for my $login ($login_c, $login_c0, $login_d, $login_d0) {
+	my $np3 = Net::POP3->new(@np3args);
+	$np3->login($login, 'anonymous') or xbail "login $login ($!)";
+	my @msg = $fetch_delete->($np3);
+	$np3->quit;
+	is_deeply([ map { $_->header('Message-ID') } @msg ],
+		[ qw(<mid-4@example.com> <mid-5@example.com>) ],
+		"excessive limit ($login)") or diag explain(\@msg);
+}
+
+done_testing;

^ permalink raw reply related	[relevance 5%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-18 21:14  7%             ` Eric Wong
@ 2023-09-19 21:28  7%               ` Konstantin Ryabitsev
  2023-09-22  2:18  5%                 ` [PATCH] pop3: support initial_limit " Eric Wong
  0 siblings, 1 reply; 13+ results
From: Konstantin Ryabitsev @ 2023-09-19 21:28 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Mon, Sep 18, 2023 at 09:14:22PM +0000, Eric Wong wrote:
> > Oh, I did notice what is probably unintentional behaviour -- passing
> > ?limit=XXX affects all mailbox access, not just the initial retrieval.
> > 
> > E.g. if I configured pop3 with ?limit=128, then leave for the weekend and
> > return on Monday, I will only be able to retrieve 128 new messages, regardless
> > of how many arrived over the weekend.
> > 
> > I'm not sure if this is what was intended -- I think it makes more sense to
> > have ?limit=XXX only affect the initial retrieval. In all other cases, when a
> > tracking uuid cookie is present, it should return all messages regardless of
> > ?limit=.
> > 
> > Does that make sense?
> 
> I think there should be an initial_limit parameter in addition to the
> current limit.   initial_limit would be more suited for cronjobs and
> such running on 24/7 systems.  The regular limit would be better
> for systems with intermittent access and could go weeks w/o being
> online (including situations where somebody restored a system from
> a months/years-old backup).

I'm game with that. Maybe even shorten that to l= and il=? I'm still worried
about the field size limit a bit.

> Not feeling well, will try to work on it once (or if) I feel better.

Please take care!

-K

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-18 13:46  7%           ` Konstantin Ryabitsev
@ 2023-09-18 21:14  7%             ` Eric Wong
  2023-09-19 21:28  7%               ` Konstantin Ryabitsev
  0 siblings, 1 reply; 13+ results
From: Eric Wong @ 2023-09-18 21:14 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> On Fri, Sep 15, 2023 at 08:41:10PM +0000, Eric Wong wrote:
> > Thanks, pushed the series as
> > a37e3ab3740c24c3 (pop3: limit default mailbox to 1K messages, 2023-09-14)
> > 392d251f97d46579 (pop3: support `?limit=$NUM' parameter in mailbox name, 2023-09-12)
> 
> Oh, I did notice what is probably unintentional behaviour -- passing
> ?limit=XXX affects all mailbox access, not just the initial retrieval.
> 
> E.g. if I configured pop3 with ?limit=128, then leave for the weekend and
> return on Monday, I will only be able to retrieve 128 new messages, regardless
> of how many arrived over the weekend.
> 
> I'm not sure if this is what was intended -- I think it makes more sense to
> have ?limit=XXX only affect the initial retrieval. In all other cases, when a
> tracking uuid cookie is present, it should return all messages regardless of
> ?limit=.
> 
> Does that make sense?

I think there should be an initial_limit parameter in addition to the
current limit.   initial_limit would be more suited for cronjobs and
such running on 24/7 systems.  The regular limit would be better
for systems with intermittent access and could go weeks w/o being
online (including situations where somebody restored a system from
a months/years-old backup).

Not feeling well, will try to work on it once (or if) I feel better.

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-15 20:41 14%         ` Eric Wong
@ 2023-09-18 13:46  7%           ` Konstantin Ryabitsev
  2023-09-18 21:14  7%             ` Eric Wong
  0 siblings, 1 reply; 13+ results
From: Konstantin Ryabitsev @ 2023-09-18 13:46 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Fri, Sep 15, 2023 at 08:41:10PM +0000, Eric Wong wrote:
> Thanks, pushed the series as
> a37e3ab3740c24c3 (pop3: limit default mailbox to 1K messages, 2023-09-14)
> 392d251f97d46579 (pop3: support `?limit=$NUM' parameter in mailbox name, 2023-09-12)

Oh, I did notice what is probably unintentional behaviour -- passing
?limit=XXX affects all mailbox access, not just the initial retrieval.

E.g. if I configured pop3 with ?limit=128, then leave for the weekend and
return on Monday, I will only be able to retrieve 128 new messages, regardless
of how many arrived over the weekend.

I'm not sure if this is what was intended -- I think it makes more sense to
have ?limit=XXX only affect the initial retrieval. In all other cases, when a
tracking uuid cookie is present, it should return all messages regardless of
?limit=.

Does that make sense?

-K

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-15 20:03  7%       ` Konstantin Ryabitsev
@ 2023-09-15 20:41 14%         ` Eric Wong
  2023-09-18 13:46  7%           ` Konstantin Ryabitsev
  0 siblings, 1 reply; 13+ results
From: Eric Wong @ 2023-09-15 20:41 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> Tested-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>

Thanks, pushed the series as
a37e3ab3740c24c3 (pop3: limit default mailbox to 1K messages, 2023-09-14)
392d251f97d46579 (pop3: support `?limit=$NUM' parameter in mailbox name, 2023-09-12)

^ permalink raw reply	[relevance 14%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-14  0:38  6%     ` Eric Wong
@ 2023-09-15 20:03  7%       ` Konstantin Ryabitsev
  2023-09-15 20:41 14%         ` Eric Wong
  0 siblings, 1 reply; 13+ results
From: Konstantin Ryabitsev @ 2023-09-15 20:03 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Thu, Sep 14, 2023 at 12:38:28AM +0000, Eric Wong wrote:
> > My initial target for deploying POP3 support is to allow Gmail users to
> > pull-subscribe to mailing lists, since Gmail is the #1 provider that we have
> > trouble with message delivery due to their draconian threshold limits.
> > However, I think if the default behaviour results in dumping 50,000 messages
> > into people's inboxes, they wouldn't use it, which is why I think we should
> > have a default that is lighter both on the server side and on the users.
> 
> OK, I think this could work (goes on top of my previous limit patch):

Yes, it looks good in my tests:

- specifying the username as `[uuid]@org.kernel.vger.linux-kernel` downloads
  1000 messages
- specifying the username as `[uuid]@org.kernel.vger.linux-kernel?limit=128`
  properly downloads only 128
- tested in both Claws-mail and Thunderbird

Tested-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>

Thanks!

-K

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-13 22:03  7%       ` Eric Wong
@ 2023-09-15 19:17  7%         ` Konstantin Ryabitsev
  0 siblings, 0 replies; 13+ results
From: Konstantin Ryabitsev @ 2023-09-15 19:17 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Wed, Sep 13, 2023 at 10:03:26PM +0000, Eric Wong wrote:
> > What if we move the uuid into the password field -- it seems it belongs there
> > anyway, as it's tied to the user cookie.
> 
> I've thought about that, too; but it can get tricky since passwords
> aren't visible in most UIs.  I've also seen some UIs (not POP3) which
> forbid copy+paste in password fields.
> 
> Furthermore, if a user wants to migrate to a different POP3 client;
> carrying their UUID with them is easier when it's readable in the
> username.  (I'm assuming users won't be bothered backup their UUID
> anywhere)

That makes sense.

> I'm open to supporting both ways; but I'm also not inclined to
> do so unless there's evidence of real-world POP3 clients being
> unable to handle the user names.
> 
> Documenting both ways can be overwhelming to users.

Yes, let's keep it as-is -- I'll test the patches shortly and follow up with
details.

-K

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-13 16:08  7%   ` Konstantin Ryabitsev
@ 2023-09-14  0:38  6%     ` Eric Wong
  2023-09-15 20:03  7%       ` Konstantin Ryabitsev
  0 siblings, 1 reply; 13+ results
From: Eric Wong @ 2023-09-14  0:38 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> On Tue, Sep 12, 2023 at 10:40:34PM +0000, Eric Wong wrote:
> > Perhaps 50K is too much?  I figured clients would have a way to
> > limit that, but I don't really pay attention to POP3 clients...
> 
> The few clients I looked at didn't give any option to specify how many remote
> messages I want to retrieve, so I think defaulting to 50,000 is not the right
> approach. Maybe the default limit should be something "last 7 days or 1000
> messages, whichever is larger"?

OK, 1K (or any other fixed limit) is fine and easiest.  7 days
(or any other time window) could get flooded if there's a nasty
spike of some sort.

> My initial target for deploying POP3 support is to allow Gmail users to
> pull-subscribe to mailing lists, since Gmail is the #1 provider that we have
> trouble with message delivery due to their draconian threshold limits.
> However, I think if the default behaviour results in dumping 50,000 messages
> into people's inboxes, they wouldn't use it, which is why I think we should
> have a default that is lighter both on the server side and on the users.

OK, I think this could work (goes on top of my previous limit patch):
-----------8<-----------
Subject: [PATCH] pop3: limit default mailbox to 1K messages

This is probably friendlier to webmail providers which
support importing mail from POP3.
---
 lib/PublicInbox/POP3.pm    | 7 ++++---
 lib/PublicInbox/WwwText.pm | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/POP3.pm b/lib/PublicInbox/POP3.pm
index 4a21ef5e..f97eccfd 100644
--- a/lib/PublicInbox/POP3.pm
+++ b/lib/PublicInbox/POP3.pm
@@ -72,7 +72,7 @@ sub cmd_user ($$) {
 	$user =~ tr/-//d; # most have dashes, some (dbus-uuidgen) don't
 	$user =~ m!\A[a-f0-9]{32}\z!i or return \"-ERR user has no UUID\r\n";
 
-	my $limit = UID_SLICE;
+	my $limit;
 	$mailbox =~ s/\?limit=([0-9]+)\z// and
 		$limit = $1 > UID_SLICE ? UID_SLICE : $1;
 
@@ -86,10 +86,11 @@ sub cmd_user ($$) {
 		my $tip = "$mailbox.$max";
 		return \"-ERR $mailbox.$slice does not exist ($tip does)\r\n"
 			if $slice > $max;
+		$limit //= UID_SLICE;
 		$self->{uid_base} = ($slice * UID_SLICE) + UID_SLICE - $limit;
 		$self->{slice} = $slice;
-	} else { # latest $limit messages
-		my $base = $uidmax - $limit;
+	} else { # latest $limit messages, 1k if unspecified
+		my $base = $uidmax - ($limit // 1000);
 		$self->{uid_base} = $base < 0 ? 0 : $base;
 		$self->{slice} = -1;
 	}
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index c31a7f86..f4508b3f 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -293,6 +293,9 @@ The POP3 password is: anonymous
 The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
 where \$(uuidgen) in the output of the `uuidgen' command on your system.
 The UUID in the username functions as a private cookie (don't share it).
+By default, only 1000 messages are retrieved.  You may download more
+by appending `?limit=NUM' (without quotes) to the username, where
+`NUM' is an integer between 1 and 50000.
 Idle accounts will expire periodically.
 EOM
 	}

^ permalink raw reply related	[relevance 6%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-13 15:33  7%     ` Konstantin Ryabitsev
@ 2023-09-13 22:03  7%       ` Eric Wong
  2023-09-15 19:17  7%         ` Konstantin Ryabitsev
  0 siblings, 1 reply; 13+ results
From: Eric Wong @ 2023-09-13 22:03 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> On Wed, Sep 13, 2023 at 06:20:40AM +0000, Eric Wong wrote:
> > Eric Wong <e@80x24.org> wrote:
> > > I'm not sure if `?' or `=' are allowed characters in POP3
> > > mailbox names.  In fact, I can't find any information on
> > > valid characters allowed in RFC 1081 nor RFC 1939.
> 
> It's a username, though, not mailbox name? There's no restriction on the
> characters or length of the username, though I'm guessing some UI clients may
> have their own limits regarding the length of the username field.

username == mailbox name as far as POP3 goes.

> > Of course, the parameters and all manner of special characters
> > can also be placed the password, so `anonymous?limit=1000'.
> > 
> > But somehow putting parameters in the "password" (even a
> > well-known and obvious one) feels wrong.
> 
> What if we move the uuid into the password field -- it seems it belongs there
> anyway, as it's tied to the user cookie.

I've thought about that, too; but it can get tricky since passwords
aren't visible in most UIs.  I've also seen some UIs (not POP3) which
forbid copy+paste in password fields.

Furthermore, if a user wants to migrate to a different POP3 client;
carrying their UUID with them is easier when it's readable in the
username.  (I'm assuming users won't be bothered backup their UUID
anywhere)

> username: newsgroup.name?params
> password: $(uuidgen)
> 
> So, in my example it becomes:
> 
> username: org.kernel.vger.git?limit=1000
> password: 288e5e35-1a35-46ef-b3d5-6d94c20aeab8
> 
> This could be backward-compatible with the current implementation -- if there
> is an @ in the username field, then the cookie is based on what's preceding
> it. If there's none, then we use the password field (unless it's "anonymous").
> 
> This way we're less likely to run into any problems with username length
> limitations set by MUAs.

Right, backwards compatibility isn't a problem either way.

I'm open to supporting both ways; but I'm also not inclined to
do so unless there's evidence of real-world POP3 clients being
unable to handle the user names.

Documenting both ways can be overwhelming to users.

Thanks.

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-12 22:40 13% ` [RFC] pop3: support `?limit=$NUM' parameter in mailbox name Eric Wong
  2023-09-13  6:20  7%   ` Eric Wong
@ 2023-09-13 16:08  7%   ` Konstantin Ryabitsev
  2023-09-14  0:38  6%     ` Eric Wong
  1 sibling, 1 reply; 13+ results
From: Konstantin Ryabitsev @ 2023-09-13 16:08 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Tue, Sep 12, 2023 at 10:40:34PM +0000, Eric Wong wrote:
> Perhaps 50K is too much?  I figured clients would have a way to
> limit that, but I don't really pay attention to POP3 clients...

The few clients I looked at didn't give any option to specify how many remote
messages I want to retrieve, so I think defaulting to 50,000 is not the right
approach. Maybe the default limit should be something "last 7 days or 1000
messages, whichever is larger"?

My initial target for deploying POP3 support is to allow Gmail users to
pull-subscribe to mailing lists, since Gmail is the #1 provider that we have
trouble with message delivery due to their draconian threshold limits.
However, I think if the default behaviour results in dumping 50,000 messages
into people's inboxes, they wouldn't use it, which is why I think we should
have a default that is lighter both on the server side and on the users.

-K

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-13  6:20  7%   ` Eric Wong
@ 2023-09-13 15:33  7%     ` Konstantin Ryabitsev
  2023-09-13 22:03  7%       ` Eric Wong
  0 siblings, 1 reply; 13+ results
From: Konstantin Ryabitsev @ 2023-09-13 15:33 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Wed, Sep 13, 2023 at 06:20:40AM +0000, Eric Wong wrote:
> Eric Wong <e@80x24.org> wrote:
> > I'm not sure if `?' or `=' are allowed characters in POP3
> > mailbox names.  In fact, I can't find any information on
> > valid characters allowed in RFC 1081 nor RFC 1939.

It's a username, though, not mailbox name? There's no restriction on the
characters or length of the username, though I'm guessing some UI clients may
have their own limits regarding the length of the username field.

> Of course, the parameters and all manner of special characters
> can also be placed the password, so `anonymous?limit=1000'.
> 
> But somehow putting parameters in the "password" (even a
> well-known and obvious one) feels wrong.

What if we move the uuid into the password field -- it seems it belongs there
anyway, as it's tied to the user cookie.

username: newsgroup.name?params
password: $(uuidgen)

So, in my example it becomes:

username: org.kernel.vger.git?limit=1000
password: 288e5e35-1a35-46ef-b3d5-6d94c20aeab8

This could be backward-compatible with the current implementation -- if there
is an @ in the username field, then the cookie is based on what's preceding
it. If there's none, then we use the password field (unless it's "anonymous").

This way we're less likely to run into any problems with username length
limitations set by MUAs.

-K

^ permalink raw reply	[relevance 7%]

* Re: [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  2023-09-12 22:40 13% ` [RFC] pop3: support `?limit=$NUM' parameter in mailbox name Eric Wong
@ 2023-09-13  6:20  7%   ` Eric Wong
  2023-09-13 15:33  7%     ` Konstantin Ryabitsev
  2023-09-13 16:08  7%   ` Konstantin Ryabitsev
  1 sibling, 1 reply; 13+ results
From: Eric Wong @ 2023-09-13  6:20 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Eric Wong <e@80x24.org> wrote:
> I'm not sure if `?' or `=' are allowed characters in POP3
> mailbox names.  In fact, I can't find any information on
> valid characters allowed in RFC 1081 nor RFC 1939.

Of course, the parameters and all manner of special characters
can also be placed the password, so `anonymous?limit=1000'.

But somehow putting parameters in the "password" (even a
well-known and obvious one) feels wrong.

*shrug*

^ permalink raw reply	[relevance 7%]

* [RFC] pop3: support `?limit=$NUM' parameter in mailbox name
  @ 2023-09-12 22:40 13% ` Eric Wong
  2023-09-13  6:20  7%   ` Eric Wong
  2023-09-13 16:08  7%   ` Konstantin Ryabitsev
  0 siblings, 2 replies; 13+ results
From: Eric Wong @ 2023-09-12 22:40 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> Hello:
> 
> I've been playing around with pop3, and I'm wondering if we can improve its
> usability by adding a "last NNN messages" pseudo-folder. Currently, if someone
> wants to access the git mailing list archive via pop3, they have to do the
> following:
> 
> - know that the username should be $(uuidgen)@org.kernel.vger.git.1 (the
>   default username would access slice 0, right? Or is it the last 50,000
>   messages?)

The /\.[0-9]+$/ slice is actually optional for POP3.
`$(uuidgen)@org.kernel.vger.git' alone will get you the latest 50k.

> - wait for their client to retrieve tens of thousands of unread messages on
>   first access

Perhaps 50K is too much?  I figured clients would have a way to
limit that, but I don't really pay attention to POP3 clients...

Patch below adds a `?limit=$NUM' parameter, but I'm not sure if
`?' or `=' are allowed in POP3 mailbox names.  mpop(1) doesn't
complain...  Haven't looked at other POP3 clients.

> - if the remote archive rolls over to the next slice, they have to edit their
>   account info to get new messages (unless I'm wrong about #1)

Yeah, that only applies to IMAP.  IMAP is a pain since connections
can be long-lived and per-connection MSN <=> UID mappings can grow
without bound after more messages arrive.

Perhaps our -imapd can be less nice and forcibly terminate
connections if the most recent window gets too big.

> Perhaps the default could be slightly different:
> 
> - $(uuidgen)@org.kernel.vger.git would start with an empty view (or something
>   like the last 10 messages)

Small numbers would be very unuseful, too, I think...

> - it would only get any new messages added to the archive
> 
> I think this would be a friendlier experience, but not sure how difficult it
> would be to implement. I'm also not 100% sure all my assumptions are correct,
> so please feel free to correct me.

No worries, the POP3 stuff hasn't seen much use.
IMAP's been hammered relentlessly by bots on my server, at least :>

Lightly-tested patch to support ?limit=$NUM

-------8<--------
Subject: [PATCH] pop3: support `?limit=$NUM' parameter in mailbox name

I'm not sure if `?' or `=' are allowed characters in POP3
mailbox names.  In fact, I can't find any information on
valid characters allowed in RFC 1081 nor RFC 1939.

In any case, it seems to work fine with mpop.
---
 lib/PublicInbox/POP3.pm | 18 ++++++++++++------
 xt/pop3d-mpop.t         |  4 ++--
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/POP3.pm b/lib/PublicInbox/POP3.pm
index d32793e4..4a21ef5e 100644
--- a/lib/PublicInbox/POP3.pm
+++ b/lib/PublicInbox/POP3.pm
@@ -41,6 +41,7 @@ use PublicInbox::IMAP; # for UID slice stuff
 
 use constant {
 	LINE_MAX => 512, # XXX unsure
+	UID_SLICE => PublicInbox::IMAP::UID_SLICE,
 };
 
 # XXX FIXME: duplicated stuff from NNTP.pm and IMAP.pm
@@ -70,20 +71,25 @@ sub cmd_user ($$) {
 	my $user = $1;
 	$user =~ tr/-//d; # most have dashes, some (dbus-uuidgen) don't
 	$user =~ m!\A[a-f0-9]{32}\z!i or return \"-ERR user has no UUID\r\n";
-	my $slice;
-	$mailbox =~ s/\.([0-9]+)\z// and $slice = $1 + 0;
+
+	my $limit = UID_SLICE;
+	$mailbox =~ s/\?limit=([0-9]+)\z// and
+		$limit = $1 > UID_SLICE ? UID_SLICE : $1;
+
+	my $slice = $mailbox =~ s/\.([0-9]+)\z// ? $1 + 0 : undef;
+
 	my $ibx = $self->{pop3d}->{pi_cfg}->lookup_newsgroup($mailbox) //
 		return \"-ERR $mailbox does not exist\r\n";
 	my $uidmax = $ibx->mm(1)->num_highwater // 0;
 	if (defined $slice) {
-		my $max = int($uidmax / PublicInbox::IMAP::UID_SLICE);
+		my $max = int($uidmax / UID_SLICE);
 		my $tip = "$mailbox.$max";
 		return \"-ERR $mailbox.$slice does not exist ($tip does)\r\n"
 			if $slice > $max;
-		$self->{uid_base} = $slice * PublicInbox::IMAP::UID_SLICE;
+		$self->{uid_base} = ($slice * UID_SLICE) + UID_SLICE - $limit;
 		$self->{slice} = $slice;
-	} else { # latest 50K messages
-		my $base = $uidmax - PublicInbox::IMAP::UID_SLICE;
+	} else { # latest $limit messages
+		my $base = $uidmax - $limit;
 		$self->{uid_base} = $base < 0 ? 0 : $base;
 		$self->{slice} = -1;
 	}
diff --git a/xt/pop3d-mpop.t b/xt/pop3d-mpop.t
index fc82bc6b..9da1050c 100644
--- a/xt/pop3d-mpop.t
+++ b/xt/pop3d-mpop.t
@@ -53,7 +53,7 @@ delivery maildir $tmpdir/md
 account default
 host ${\$sock->sockhost}
 port ${\$sock->sockport}
-user $uuid\@$newsgroup
+user $uuid\@$newsgroup?limit=10000
 auth user
 password anonymous
 received_header off
@@ -65,7 +65,7 @@ EOM
 	my $pid = spawn($cmd, undef, { 1 => 2 });
 	$pids{$pid} = $cmd;
 }
-
+diag "mpop is writing to $tmpdir/md ...";
 while (scalar keys %pids) {
 	my $pid = waitpid(-1, 0) or next;
 	my $cmd = delete $pids{$pid} or next;

^ permalink raw reply related	[relevance 13%]

Results 1-13 of 13 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-09-12 21:08     pop3 usability thoughts Konstantin Ryabitsev
2023-09-12 22:40 13% ` [RFC] pop3: support `?limit=$NUM' parameter in mailbox name Eric Wong
2023-09-13  6:20  7%   ` Eric Wong
2023-09-13 15:33  7%     ` Konstantin Ryabitsev
2023-09-13 22:03  7%       ` Eric Wong
2023-09-15 19:17  7%         ` Konstantin Ryabitsev
2023-09-13 16:08  7%   ` Konstantin Ryabitsev
2023-09-14  0:38  6%     ` Eric Wong
2023-09-15 20:03  7%       ` Konstantin Ryabitsev
2023-09-15 20:41 14%         ` Eric Wong
2023-09-18 13:46  7%           ` Konstantin Ryabitsev
2023-09-18 21:14  7%             ` Eric Wong
2023-09-19 21:28  7%               ` Konstantin Ryabitsev
2023-09-22  2:18  5%                 ` [PATCH] pop3: support initial_limit " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).