user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 07/34] URI IMAP support
Date: Sat, 27 Jun 2020 10:03:33 +0000	[thread overview]
Message-ID: <20200627100400.9871-8-e@yhbt.net> (raw)
In-Reply-To: <20200627100400.9871-1-e@yhbt.net>

We'll be supporting the IMAP URL scheme described in RFC 5092
for -watch, so add this module to fill in what the `URI' package
lacks.
---
 MANIFEST                   |   2 +
 lib/PublicInbox/URIimap.pm | 113 +++++++++++++++++++++++++++++++++++++
 t/uri_imap.t               |  65 +++++++++++++++++++++
 3 files changed, 180 insertions(+)
 create mode 100644 lib/PublicInbox/URIimap.pm
 create mode 100644 t/uri_imap.t

diff --git a/MANIFEST b/MANIFEST
index 158d7ca2d8e..ffd79c1f1b3 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -181,6 +181,7 @@ lib/PublicInbox/Syscall.pm
 lib/PublicInbox/TLS.pm
 lib/PublicInbox/TestCommon.pm
 lib/PublicInbox/Tmpfile.pm
+lib/PublicInbox/URIimap.pm
 lib/PublicInbox/Unsubscribe.pm
 lib/PublicInbox/UserContent.pm
 lib/PublicInbox/V2Writable.pm
@@ -335,6 +336,7 @@ t/spamcheck_spamc.t
 t/spawn.t
 t/thread-cycle.t
 t/time.t
+t/uri_imap.t
 t/utf8.eml
 t/v1-add-remove-add.t
 t/v1reindex.t
diff --git a/lib/PublicInbox/URIimap.pm b/lib/PublicInbox/URIimap.pm
new file mode 100644
index 00000000000..56b6002a379
--- /dev/null
+++ b/lib/PublicInbox/URIimap.pm
@@ -0,0 +1,113 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# cf. RFC 5092, which the `URI' package doesn't support
+#
+# This depends only on the documented public API of the `URI' dist,
+# not on internal `_'-prefixed subclasses such as `URI::_server'
+#
+# <https://metacpan.org/pod/URI::imap> exists, but it's not in
+# common distros.
+#
+# RFC 2192 also describes ";TYPE=<list_type>"
+package PublicInbox::URIimap;
+use strict;
+use URI::Split qw(uri_split uri_join); # part of URI
+use URI::Escape qw(uri_unescape);
+
+my %default_ports = (imap => 143, imaps => 993);
+
+sub new {
+	my ($class, $url) = @_;
+	$url =~ m!\Aimaps?://! ? bless \$url, $class : undef;
+}
+
+sub canonical {
+	my ($self) = @_;
+
+	# no #frag in RFC 5092 from what I can tell
+	my ($scheme, $auth, $path, $query, $_frag) = uri_split($$self);
+	$path =~ s!\A/+!/!; # excessive leading slash
+
+	# lowercase the host portion
+	$auth =~ s#\A(.*@)?(.*?)(?::([0-9]+))?\z#
+		my $ret = ($1//'').lc($2);
+		if (defined(my $port = $3)) {
+			if ($default_ports{lc($scheme)} != $port) {
+				$ret .= ":$port";
+			}
+		}
+		$ret#ei;
+
+	ref($self)->new(uri_join(lc($scheme), $auth, $path, $query));
+}
+
+sub host {
+	my ($self) = @_;
+	my (undef, $auth) = uri_split($$self);
+	$auth =~ s!\A.*?@!!;
+	$auth =~ s!:[0-9]+\z!!;
+	$auth =~ s!\A\[(.*)\]\z!$1!; # IPv6
+	uri_unescape($auth);
+}
+
+# unescaped, may be used for globbing
+sub path {
+	my ($self) = @_;
+	my (undef, undef, $path) = uri_split($$self);
+	$path =~ s!\A/+!!;
+	$path =~ s/;.*\z//; # ;UIDVALIDITY=nz-number
+	$path eq '' ? undef : $path;
+}
+
+sub mailbox {
+	my ($self) = @_;
+	my $path = path($self);
+	defined($path) ? uri_unescape($path) : undef;
+}
+
+# TODO: UIDVALIDITY, search, and other params
+
+sub port {
+	my ($self) = @_;
+	my ($scheme, $auth) = uri_split($$self);
+	$auth =~ /:([0-9]+)\z/ ? $1 + 0 : $default_ports{lc($scheme)};
+}
+
+sub authority {
+	my ($self) = @_;
+	my (undef, $auth) = uri_split($$self);
+	$auth
+}
+
+sub user {
+	my ($self) = @_;
+	my (undef, $auth) = uri_split($$self);
+	$auth =~ s/@.*\z// or return undef; # drop host:port
+	$auth =~ s/;.*\z//; # drop ;AUTH=...
+	$auth =~ s/:.*\z//; # drop password
+	uri_unescape($auth);
+}
+
+sub password {
+	my ($self) = @_;
+	my (undef, $auth) = uri_split($$self);
+	$auth =~ s/@.*\z// or return undef; # drop host:port
+	$auth =~ s/;.*\z//; # drop ;AUTH=...
+	$auth =~ s/\A[^:]+:// ? uri_unescape($auth) : undef; # drop ->user
+}
+
+sub auth {
+	my ($self) = @_;
+	my (undef, $auth) = uri_split($$self);
+	$auth =~ s/@.*\z//; # drop host:port
+	$auth =~ /;AUTH=(.+)\z/i ? uri_unescape($1) : undef;
+}
+
+sub scheme {
+	my ($self) = @_;
+	(uri_split($$self))[0];
+}
+
+sub as_string { ${$_[0]} }
+
+1;
diff --git a/t/uri_imap.t b/t/uri_imap.t
new file mode 100644
index 00000000000..a2e86a7ec9c
--- /dev/null
+++ b/t/uri_imap.t
@@ -0,0 +1,65 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+require_mods 'URI::Split';
+use_ok 'PublicInbox::URIimap';
+
+is(PublicInbox::URIimap->new('https://example.com/'), undef,
+	'invalid scheme ignored');
+
+my $uri = PublicInbox::URIimap->new('imaps://EXAMPLE.com/');
+is($uri->host, 'EXAMPLE.com', 'host ok');
+is($uri->canonical->host, 'example.com', 'host canonicalized');
+is($uri->canonical->as_string, 'imaps://example.com/', 'URI canonicalized');
+is($uri->port, 993, 'imaps port');
+is($uri->auth, undef);
+is($uri->user, undef);
+
+$uri = PublicInbox::URIimap->new('imaps://foo@0/');
+is($uri->host, '0', 'numeric host');
+is($uri->user, 'foo', 'user extracted');
+
+$uri = PublicInbox::URIimap->new('imap://0/INBOX.sub#frag')->canonical;
+is($uri->as_string, 'imap://0/INBOX.sub', 'no fragment');
+is($uri->scheme, 'imap');
+
+$uri = PublicInbox::URIimap->new('imaps://;AUTH=ANONYMOUS@0/');
+is($uri->auth, 'ANONYMOUS', 'AUTH=ANONYMOUS accepted');
+
+$uri = PublicInbox::URIimap->new('imaps://bar%40example.com;AUTH=99%25@0/');
+is($uri->auth, '99%', 'decoded AUTH');
+is($uri->user, 'bar@example.com', 'decoded user');
+is($uri->mailbox, undef, 'mailbox is undef');
+
+$uri = PublicInbox::URIimap->new('imaps://ipv6@[::1]');
+is($uri->host, '::1', 'IPv6 host');
+is($uri->mailbox, undef, 'mailbox is undef');
+
+$uri = PublicInbox::URIimap->new('imaps://0:666/INBOX');
+is($uri->port, 666, 'port read');
+is($uri->mailbox, 'INBOX');
+$uri = PublicInbox::URIimap->new('imaps://0/INBOX.sub');
+is($uri->mailbox, 'INBOX.sub');
+is($uri->scheme, 'imaps');
+
+is(PublicInbox::URIimap->new('imap://0:143/')->canonical->as_string,
+	'imap://0/');
+is(PublicInbox::URIimap->new('imaps://0:993/')->canonical->as_string,
+	'imaps://0/');
+
+$uri = PublicInbox::URIimap->new('imap://NSA:Hunter2@0/INBOX');
+is($uri->user, 'NSA');
+is($uri->password, 'Hunter2');
+
+$uri = PublicInbox::URIimap->new('imap://0/%');
+is($uri->mailbox, '%', "RFC 2192 '%' supported");
+$uri = PublicInbox::URIimap->new('imap://0/%25');
+$uri = PublicInbox::URIimap->new('imap://0/*');
+is($uri->mailbox, '*', "RFC 2192 '*' supported");
+
+# TODO: support UIDVALIDITY and other params
+
+done_testing;

  parent reply	other threads:[~2020-06-27 10:04 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-27 10:03 [PATCH 00/34] watch: add IMAP and NNTP support Eric Wong
2020-06-27 10:03 ` [PATCH 01/34] inboxwritable: ensure ssoma.lock exists on init Eric Wong
2020-06-27 10:03 ` [PATCH 02/34] inbox: warn on ->on_inbox_unlock exception Eric Wong
2020-06-27 10:03 ` [PATCH 03/34] IMAPTracker: Add a helper to track our place in reading imap mailboxes Eric Wong
2020-06-27 10:03 ` [PATCH 04/34] imaptracker: use ~/.local/share/public-inbox/imap.sqlite3 Eric Wong
2020-06-27 10:03 ` [PATCH 05/34] watchmaildir: hoist out compile_watchheaders Eric Wong
2020-06-27 10:03 ` [PATCH 06/34] watchmaildir: fix check for spam vs ham inbox conflicts Eric Wong
2020-06-27 10:03 ` Eric Wong [this message]
2020-06-27 10:03 ` [PATCH 08/34] watch: preliminary IMAP support Eric Wong
2020-06-27 10:03 ` [PATCH 09/34] kqnotify|fake_inotify: detect Maildir write ops Eric Wong
2020-06-27 10:03 ` [PATCH 10/34] watch: remove Filesys::Notify::Simple dependency Eric Wong
2020-06-27 10:03 ` [PATCH 11/34] watch: use signalfd for Maildir watching Eric Wong
2020-06-27 19:05   ` Kyle Meyer
2020-06-27 22:32     ` Eric Wong
2020-06-27 10:03 ` [PATCH 12/34] ds: remove fields.pm usage Eric Wong
2020-06-27 10:03 ` [PATCH 13/34] watch: wire up IMAP IDLE reapers to DS Eric Wong
2020-06-27 10:03 ` [PATCH 14/34] watch: support IMAP polling Eric Wong
2020-06-27 10:03 ` [PATCH 15/34] config: support ->urlmatch method for -watch Eric Wong
2020-06-27 10:03 ` [PATCH 16/34] watch: stop importers before forking Eric Wong
2020-06-27 10:03 ` [PATCH 17/34] watch: use UID SEARCH to avoid empty UID FETCH Eric Wong
2020-06-27 10:03 ` [PATCH 18/34] ds: add_timer: allow passing arg to callback Eric Wong
2020-06-27 10:03 ` [PATCH 19/34] imaptracker: add {url} field to reduce args Eric Wong
2020-06-27 10:03 ` [PATCH 20/34] imaptracker: drop {dbname} field Eric Wong
2020-06-27 10:03 ` [PATCH 21/34] watch: avoid long transaction to IMAPTracker Eric Wong
2020-06-27 10:03 ` [PATCH 22/34] watch: support imap.fetchBatchSize parameter Eric Wong
2020-06-27 10:03 ` [PATCH 23/34] watch: imap: be quiet about disconnecting on quit Eric Wong
2020-06-27 10:03 ` [PATCH 24/34] watch: support multiple watch: directives per-inbox Eric Wong
2020-06-27 10:03 ` [PATCH 25/34] watch: remove {mdir} array Eric Wong
2020-06-27 10:03 ` [PATCH 26/34] watch: just use ->urlmatch Eric Wong
2020-06-27 10:03 ` [PATCH 27/34] testcommon: $ENV{TAIL} supports non-@ARGV redirects Eric Wong
2020-06-27 10:03 ` [PATCH 28/34] watch: add NNTP support Eric Wong
2020-06-27 19:06   ` Kyle Meyer
2020-06-27 10:03 ` [PATCH 29/34] watch: show user-specified URL consistently Eric Wong
2020-06-27 10:03 ` [PATCH 30/34] watch: enable autoflush for STDOUT and STDERR Eric Wong
2020-06-27 10:03 ` [PATCH 31/34] watch: use our own "git credential" wrapper Eric Wong
2020-06-27 10:03 ` [PATCH 32/34] watch: support ~/.netrc via Net::Netrc Eric Wong
2020-06-27 10:03 ` [PATCH 33/34] imaptracker: use flock(2) around writes Eric Wong
2020-06-27 10:04 ` [PATCH 34/34] watch: simplify internal structures Eric Wong
2020-06-29 10:34 ` [PATCH 0/5] watch: Maildir fixes Eric Wong
2020-06-29 10:34   ` [PATCH 1/5] watch: check for duplicates in ->over before spamcheck Eric Wong
2020-06-29 10:34   ` [PATCH 2/5] watch: show path for warnings from spam messages Eric Wong
2020-06-29 10:34   ` [PATCH 3/5] watch: ensure SIGCHLD works in forked children Eric Wong
2020-06-29 10:34   ` [PATCH 4/5] spawn: unblock SIGCHLD in subprocess Eric Wong
2020-07-07  6:17     ` [PATCH 6/5] t/spawn: fix test reliability Eric Wong
2020-06-29 10:34   ` [PATCH 5/5] watch: make waitpid() synchronous for Maildir scans Eric Wong
2020-06-29 10:37     ` Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200627100400.9871-8-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).