From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 708F91FBC9 for ; Wed, 10 Jun 2020 07:05:21 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 12/82] imap: support fetch for BODYSTRUCTURE and BODY Date: Wed, 10 Jun 2020 07:04:09 +0000 Message-Id: <20200610070519.18252-13-e@yhbt.net> In-Reply-To: <20200610070519.18252-1-e@yhbt.net> References: <20200610070519.18252-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: I'm not sure which clients use these, but it could be useful down the line. --- lib/PublicInbox/Eml.pm | 7 +-- lib/PublicInbox/IMAP.pm | 106 +++++++++++++++++++++++++++++++++++++++- t/imapd.t | 16 +++++- 3 files changed, 123 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 6f6874cd237..d2bd3915545 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -235,11 +235,11 @@ sub mp_descend ($$) { # $arg - user-supplied arg (think pthread_create) # $once - unref body scalar during iteration sub each_part { - my ($self, $cb, $arg, $once) = @_; + my ($self, $cb, $arg, $once, $all) = @_; my $p = mp_descend($self, $once // 0) or return $cb->([$self, 0, 0], $arg); - $cb->([$self, 0, 0], $arg) if $self->{-call_cb}; # rare + $cb->([$self, 0, 0], $arg) if ($all || $self->{-call_cb}); # rare $p = [ $p, 0 ]; my @s; # our virtual stack @@ -255,7 +255,8 @@ sub each_part { (my $nxt = mp_descend($sub, $nr))) { push(@s, $p) if scalar @{$p->[0]}; $p = [ $nxt, @idx, 0 ]; - $cb->([$sub, $depth, @idx], $arg) if $sub->{-call_cb}; + ($all || $sub->{-call_cb}) and + $cb->([$sub, $depth, @idx], $arg); } else { # a leaf node $cb->([$sub, $depth, @idx], $arg); } diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index ca9a0ea7d42..6c5e0290925 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -18,6 +18,7 @@ use base qw(PublicInbox::DS); use fields qw(imapd logged_in ibx long_cb -login_tag -idle_tag -idle_max); use PublicInbox::Eml; +use PublicInbox::EmlContentFoo qw(parse_content_disposition); use PublicInbox::DS qw(now); use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use Text::ParseWords qw(parse_line); @@ -248,6 +249,105 @@ sub eml_envelope ($) { ) . ')'; } +sub _esc_hash ($) { + my ($hash) = @_; + if ($hash && scalar keys %$hash) { + $hash = [ %$hash ]; # flatten hash into 1-dimensional array + '(' . join(' ', map { _esc($_) } @$hash) . ')'; + } else { + 'NIL'; + } +} + +sub body_disposition ($) { + my ($eml) = @_; + my $cd = $eml->header_raw('Content-Disposition') or return 'NIL'; + $cd = parse_content_disposition($cd); + my $buf = '('._esc($cd->{type}); + $buf .= ' ' . _esc_hash(delete $cd->{attributes}); + $buf .= ')'; +} + +sub body_leaf ($$;$) { + my ($eml, $structure, $hold) = @_; + my $buf = ''; + $eml->{is_submsg} and # parent was a message/(rfc822|news|global) + $buf .= eml_envelope($eml). ' '; + my $ct = $eml->ct; + $buf .= '('._esc($ct->{type}).' '; + $buf .= _esc($ct->{subtype}); + $buf .= ' ' . _esc_hash(delete $ct->{attributes}); + $buf .= ' ' . _esc($eml->header_raw('Content-ID')); + $buf .= ' ' . _esc($eml->header_raw('Content-Description')); + my $cte = $eml->header_raw('Content-Transfer-Encoding') // '7bit'; + $buf .= ' ' . _esc($cte); + $buf .= ' ' . $eml->{imap_body_len}; + $buf .= ' '.($eml->body_raw =~ tr/\n/\n/) if lc($ct->{type}) eq 'text'; + + # for message/(rfc822|global|news), $hold[0] should have envelope + $buf .= ' ' . (@$hold ? join('', @$hold) : 'NIL') if $hold; + + if ($structure) { + $buf .= ' '._esc($eml->header_raw('Content-MD5')); + $buf .= ' '. body_disposition($eml); + $buf .= ' '._esc($eml->header_raw('Content-Language')); + $buf .= ' '._esc($eml->header_raw('Content-Location')); + } + $buf .= ')'; +} + +sub body_parent ($$$) { + my ($eml, $structure, $hold) = @_; + my $ct = $eml->ct; + my $type = lc($ct->{type}); + if ($type eq 'multipart') { + my $buf = '('; + $buf .= @$hold ? join('', @$hold) : 'NIL'; + $buf .= ' '._esc($ct->{subtype}); + if ($structure) { + $buf .= ' '._esc_hash(delete $ct->{attributes}); + $buf .= ' '.body_disposition($eml); + $buf .= ' '._esc($eml->header_raw('Content-Language')); + $buf .= ' '._esc($eml->header_raw('Content-Location')); + } + $buf .= ')'; + @$hold = ($buf); + } else { # message/(rfc822|global|news) + @$hold = (body_leaf($eml, $structure, $hold)); + } +} + +# this is gross, but we need to process the parent part AFTER +# the child parts are done +sub bodystructure_prep { + my ($p, $q) = @_; + my ($eml, $depth) = @$p; # ignore idx + # set length here, as $eml->{bdy} gets deleted for message/rfc822 + $eml->{imap_body_len} = length($eml->body_raw); + push @$q, $eml, $depth; +} + +# for FETCH BODY and FETCH BODYSTRUCTURE +sub fetch_body ($;$) { + my ($eml, $structure) = @_; + my @q; + $eml->each_part(\&bodystructure_prep, \@q, 0, 1); + my $cur_depth = 0; + my @hold; + do { + my ($part, $depth) = splice(@q, -2); + my $is_mp_parent = $depth == ($cur_depth - 1); + $cur_depth = $depth; + + if ($is_mp_parent) { + body_parent($part, $structure, \@hold); + } else { + unshift @hold, body_leaf($part, $structure); + } + } while (@q); + join('', @hold); +} + sub uid_fetch_cb { # called by git->cat_async my ($bref, $oid, $type, $size, $fetch_m_arg) = @_; my ($self, undef, $ibx, undef, undef, $msgs, $want) = @$fetch_m_arg; @@ -286,7 +386,11 @@ sub uid_fetch_cb { # called by git->cat_async $self->msg_more(" $f {".length($$bref)."}\r\n"); $self->msg_more($$bref); } - # TODO BODY/BODYSTRUCTURE, specific headers + $want->{BODYSTRUCTURE} and + $self->msg_more(' BODYSTRUCTURE '.fetch_body($eml, 1)); + $want->{BODY} and + $self->msg_more(' BODY '.fetch_body($eml)); + $self->msg_more(")\r\n"); } diff --git a/t/imapd.t b/t/imapd.t index c31ac12f941..31154bdf3e7 100644 --- a/t/imapd.t +++ b/t/imapd.t @@ -7,7 +7,7 @@ use Time::HiRes (); use PublicInbox::TestCommon; use PublicInbox::Config; use PublicInbox::Spawn qw(which); -require_mods(qw(DBD::SQLite Mail::IMAPClient)); +require_mods(qw(DBD::SQLite Mail::IMAPClient Mail::IMAPClient::BodyStructure)); my $level = '-Lbasic'; SKIP: { @@ -190,6 +190,11 @@ for my $r ('1:*', '1') { $ret = $mic->fetch_hash($r, 'FLAGS') or BAIL_OUT "FETCH $@"; is_deeply($ret->{1}->{FLAGS}, '', 'no flags'); + + my $bs = $mic->get_bodystructure($r) or BAIL_OUT("bodystructure: $@"); + ok($bs, 'got a bodystructure'); + is(lc($bs->bodytype), 'text', '->bodytype'); + is(lc($bs->bodyenc), '8bit', '->bodyenc'); } # Mail::IMAPClient ->compress creates cyclic reference: @@ -217,10 +222,12 @@ $pi_config->each_inbox(sub { my $ng = $ibx->{newsgroup}; my $mic = Mail::IMAPClient->new(%mic_opt); ok($mic && $mic->login && $mic->IsAuthenticated, "authed $name"); + my $uidnext = $mic->uidnext($ng); # we'll fetch BODYSTRUCTURE on this + ok($uidnext, 'got uidnext for later fetch'); is_deeply([$mic->has_capability('IDLE')], ['IDLE'], "IDLE capa $name"); ok(!$mic->idle, "IDLE fails w/o SELECT/EXAMINE $name"); ok($mic->examine($ng), "EXAMINE $ng succeeds"); - ok($mic->idle, "IDLE succeeds on $ng"); + ok(my $idle_tag = $mic->idle, "IDLE succeeds on $ng"); open(my $fh, '<', 't/data/message_embed.eml') or BAIL_OUT("open: $!"); run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or @@ -280,6 +287,11 @@ $pi_config->each_inbox(sub { ok(@res = $mic->idle_data(11), "IDLE succeeds on $ng after HUP"); is(grep(/\A\* [0-9] EXISTS\b/, @res), 1, 'got EXISTS message'); ok((Time::HiRes::time() - $t0) < 10, 'IDLE client notified'); + ok($mic->done($idle_tag), 'IDLE DONE'); + my $bs = $mic->get_bodystructure($uidnext); + ok($bs, 'BODYSTRUCTURE ok for deeply nested'); + $ret = $mic->fetch_hash($uidnext, 'BODY') or BAIL_OUT "FETCH $@"; + ok($ret->{$uidnext}->{BODY}, 'got something in BODY'); }); $td->kill;