From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 969FA1F8EC for ; Sat, 27 Jun 2020 10:04:01 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 07/34] URI IMAP support Date: Sat, 27 Jun 2020 10:03:33 +0000 Message-Id: <20200627100400.9871-8-e@yhbt.net> In-Reply-To: <20200627100400.9871-1-e@yhbt.net> References: <20200627100400.9871-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We'll be supporting the IMAP URL scheme described in RFC 5092 for -watch, so add this module to fill in what the `URI' package lacks. --- MANIFEST | 2 + lib/PublicInbox/URIimap.pm | 113 +++++++++++++++++++++++++++++++++++++ t/uri_imap.t | 65 +++++++++++++++++++++ 3 files changed, 180 insertions(+) create mode 100644 lib/PublicInbox/URIimap.pm create mode 100644 t/uri_imap.t diff --git a/MANIFEST b/MANIFEST index 158d7ca2d8e..ffd79c1f1b3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -181,6 +181,7 @@ lib/PublicInbox/Syscall.pm lib/PublicInbox/TLS.pm lib/PublicInbox/TestCommon.pm lib/PublicInbox/Tmpfile.pm +lib/PublicInbox/URIimap.pm lib/PublicInbox/Unsubscribe.pm lib/PublicInbox/UserContent.pm lib/PublicInbox/V2Writable.pm @@ -335,6 +336,7 @@ t/spamcheck_spamc.t t/spawn.t t/thread-cycle.t t/time.t +t/uri_imap.t t/utf8.eml t/v1-add-remove-add.t t/v1reindex.t diff --git a/lib/PublicInbox/URIimap.pm b/lib/PublicInbox/URIimap.pm new file mode 100644 index 00000000000..56b6002a379 --- /dev/null +++ b/lib/PublicInbox/URIimap.pm @@ -0,0 +1,113 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +# cf. RFC 5092, which the `URI' package doesn't support +# +# This depends only on the documented public API of the `URI' dist, +# not on internal `_'-prefixed subclasses such as `URI::_server' +# +# exists, but it's not in +# common distros. +# +# RFC 2192 also describes ";TYPE=" +package PublicInbox::URIimap; +use strict; +use URI::Split qw(uri_split uri_join); # part of URI +use URI::Escape qw(uri_unescape); + +my %default_ports = (imap => 143, imaps => 993); + +sub new { + my ($class, $url) = @_; + $url =~ m!\Aimaps?://! ? bless \$url, $class : undef; +} + +sub canonical { + my ($self) = @_; + + # no #frag in RFC 5092 from what I can tell + my ($scheme, $auth, $path, $query, $_frag) = uri_split($$self); + $path =~ s!\A/+!/!; # excessive leading slash + + # lowercase the host portion + $auth =~ s#\A(.*@)?(.*?)(?::([0-9]+))?\z# + my $ret = ($1//'').lc($2); + if (defined(my $port = $3)) { + if ($default_ports{lc($scheme)} != $port) { + $ret .= ":$port"; + } + } + $ret#ei; + + ref($self)->new(uri_join(lc($scheme), $auth, $path, $query)); +} + +sub host { + my ($self) = @_; + my (undef, $auth) = uri_split($$self); + $auth =~ s!\A.*?@!!; + $auth =~ s!:[0-9]+\z!!; + $auth =~ s!\A\[(.*)\]\z!$1!; # IPv6 + uri_unescape($auth); +} + +# unescaped, may be used for globbing +sub path { + my ($self) = @_; + my (undef, undef, $path) = uri_split($$self); + $path =~ s!\A/+!!; + $path =~ s/;.*\z//; # ;UIDVALIDITY=nz-number + $path eq '' ? undef : $path; +} + +sub mailbox { + my ($self) = @_; + my $path = path($self); + defined($path) ? uri_unescape($path) : undef; +} + +# TODO: UIDVALIDITY, search, and other params + +sub port { + my ($self) = @_; + my ($scheme, $auth) = uri_split($$self); + $auth =~ /:([0-9]+)\z/ ? $1 + 0 : $default_ports{lc($scheme)}; +} + +sub authority { + my ($self) = @_; + my (undef, $auth) = uri_split($$self); + $auth +} + +sub user { + my ($self) = @_; + my (undef, $auth) = uri_split($$self); + $auth =~ s/@.*\z// or return undef; # drop host:port + $auth =~ s/;.*\z//; # drop ;AUTH=... + $auth =~ s/:.*\z//; # drop password + uri_unescape($auth); +} + +sub password { + my ($self) = @_; + my (undef, $auth) = uri_split($$self); + $auth =~ s/@.*\z// or return undef; # drop host:port + $auth =~ s/;.*\z//; # drop ;AUTH=... + $auth =~ s/\A[^:]+:// ? uri_unescape($auth) : undef; # drop ->user +} + +sub auth { + my ($self) = @_; + my (undef, $auth) = uri_split($$self); + $auth =~ s/@.*\z//; # drop host:port + $auth =~ /;AUTH=(.+)\z/i ? uri_unescape($1) : undef; +} + +sub scheme { + my ($self) = @_; + (uri_split($$self))[0]; +} + +sub as_string { ${$_[0]} } + +1; diff --git a/t/uri_imap.t b/t/uri_imap.t new file mode 100644 index 00000000000..a2e86a7ec9c --- /dev/null +++ b/t/uri_imap.t @@ -0,0 +1,65 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use Test::More; +use PublicInbox::TestCommon; +require_mods 'URI::Split'; +use_ok 'PublicInbox::URIimap'; + +is(PublicInbox::URIimap->new('https://example.com/'), undef, + 'invalid scheme ignored'); + +my $uri = PublicInbox::URIimap->new('imaps://EXAMPLE.com/'); +is($uri->host, 'EXAMPLE.com', 'host ok'); +is($uri->canonical->host, 'example.com', 'host canonicalized'); +is($uri->canonical->as_string, 'imaps://example.com/', 'URI canonicalized'); +is($uri->port, 993, 'imaps port'); +is($uri->auth, undef); +is($uri->user, undef); + +$uri = PublicInbox::URIimap->new('imaps://foo@0/'); +is($uri->host, '0', 'numeric host'); +is($uri->user, 'foo', 'user extracted'); + +$uri = PublicInbox::URIimap->new('imap://0/INBOX.sub#frag')->canonical; +is($uri->as_string, 'imap://0/INBOX.sub', 'no fragment'); +is($uri->scheme, 'imap'); + +$uri = PublicInbox::URIimap->new('imaps://;AUTH=ANONYMOUS@0/'); +is($uri->auth, 'ANONYMOUS', 'AUTH=ANONYMOUS accepted'); + +$uri = PublicInbox::URIimap->new('imaps://bar%40example.com;AUTH=99%25@0/'); +is($uri->auth, '99%', 'decoded AUTH'); +is($uri->user, 'bar@example.com', 'decoded user'); +is($uri->mailbox, undef, 'mailbox is undef'); + +$uri = PublicInbox::URIimap->new('imaps://ipv6@[::1]'); +is($uri->host, '::1', 'IPv6 host'); +is($uri->mailbox, undef, 'mailbox is undef'); + +$uri = PublicInbox::URIimap->new('imaps://0:666/INBOX'); +is($uri->port, 666, 'port read'); +is($uri->mailbox, 'INBOX'); +$uri = PublicInbox::URIimap->new('imaps://0/INBOX.sub'); +is($uri->mailbox, 'INBOX.sub'); +is($uri->scheme, 'imaps'); + +is(PublicInbox::URIimap->new('imap://0:143/')->canonical->as_string, + 'imap://0/'); +is(PublicInbox::URIimap->new('imaps://0:993/')->canonical->as_string, + 'imaps://0/'); + +$uri = PublicInbox::URIimap->new('imap://NSA:Hunter2@0/INBOX'); +is($uri->user, 'NSA'); +is($uri->password, 'Hunter2'); + +$uri = PublicInbox::URIimap->new('imap://0/%'); +is($uri->mailbox, '%', "RFC 2192 '%' supported"); +$uri = PublicInbox::URIimap->new('imap://0/%25'); +$uri = PublicInbox::URIimap->new('imap://0/*'); +is($uri->mailbox, '*', "RFC 2192 '*' supported"); + +# TODO: support UIDVALIDITY and other params + +done_testing;