From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E66901FA19 for ; Tue, 27 Apr 2021 11:07:53 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/5] lei q + lcat: support --format=text output Date: Tue, 27 Apr 2021 11:07:53 +0000 Message-Id: <20210427110753.24609-6-e@80x24.org> In-Reply-To: <20210427110753.24609-1-e@80x24.org> References: <20210427110753.24609-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This is mainly for "lei lcat" where it's the default, but I find it useful anyways compared to the JSON view. Colors are loaded from ~/.config/lei/config, and fall back to using diff colors from a normal git config (e.g. ~/.gitconfig). --- MANIFEST | 1 + lib/PublicInbox/Hval.pm | 2 +- lib/PublicInbox/LeiLcat.pm | 2 +- lib/PublicInbox/LeiToMail.pm | 63 ++++++++- lib/PublicInbox/LeiViewText.pm | 237 +++++++++++++++++++++++++++++++++ lib/PublicInbox/ViewDiff.pm | 4 +- 6 files changed, 301 insertions(+), 8 deletions(-) create mode 100644 lib/PublicInbox/LeiViewText.pm diff --git a/MANIFEST b/MANIFEST index d3b46f8b..5933ddf4 100644 --- a/MANIFEST +++ b/MANIFEST @@ -218,6 +218,7 @@ lib/PublicInbox/LeiSucks.pm lib/PublicInbox/LeiTag.pm lib/PublicInbox/LeiToMail.pm lib/PublicInbox/LeiUp.pm +lib/PublicInbox/LeiViewText.pm lib/PublicInbox/LeiXSearch.pm lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index eab4738e..00b3c8b4 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -34,7 +34,7 @@ my %escape_sequence = ( "\x7f" => '\\x7f', # DEL ); -my %xhtml_map = ( +our %xhtml_map = ( '"' => '"', '&' => '&', "'" => ''', diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm index f10452be..87729acf 100644 --- a/lib/PublicInbox/LeiLcat.pm +++ b/lib/PublicInbox/LeiLcat.pm @@ -109,7 +109,7 @@ sub lei_lcat { $opt->{sort} //= 'relevance'; $mset_opt{relevance} = 1; $lei->{mset_opt} = \%mset_opt; - $opt->{'format'} //= 'mboxrd' unless defined($opt->{output}); + $opt->{'format'} //= 'text' unless defined($opt->{output}); if ($lei->{opt}->{stdin}) { return $lei->fail(<<'') if @argv; no args allowed on command-line with --stdin diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 8b2f82dc..fa3af710 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -309,6 +309,26 @@ sub _imap_write_cb ($$) { } } +sub _text_write_cb ($$) { + my ($self, $lei) = @_; + my $dedupe = $lei->{dedupe}; + $dedupe->prepare_dedupe if $dedupe; + my $lvt = $lei->{lvt}; + my $ovv = $lei->{ovv}; + $lei->{1} // die "no stdout ($ovv->{dst})"; # redirected earlier + $lei->{1}->autoflush(1); + binmode $lei->{1}, ':utf8'; + my $lse = $lei->{lse}; # may be undef + sub { # for git_to_mail + my ($bref, $smsg, $eml) = @_; + $lse->xsmsg_vmd($smsg) if $lse; + $eml //= PublicInbox::Eml->new($bref); # copy bref + return if $dedupe && $dedupe->is_dup($eml, $smsg); + my $lk = $ovv->lock_for_scope; + $lei->out(${$lvt->eml_to_text($smsg, $eml)}, "\n"); + } +} + sub write_cb { # returns a callback for git_to_mail my ($self, $lei) = @_; # _mbox_write_cb, _maildir_write_cb or _imap_write_cb @@ -329,8 +349,6 @@ sub new { $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/'; } elsif (substr($fmt, 0, 4) eq 'mbox') { require PublicInbox::MboxReader; - (-d $dst || (-e _ && !-w _)) and die - "$dst exists and is not a writable file\n"; $self->can("eml2$fmt") or die "bad mbox format: $fmt\n"; $self->{base_type} = 'mbox'; } elsif ($fmt =~ /\Aimaps?\z/) { # TODO .onion support @@ -347,9 +365,23 @@ sub new { $dst = $lei->{ovv}->{dst} = $$uri; # canonicalized $lei->{net} = $net; $self->{base_type} = 'imap'; + } elsif ($fmt eq 'text') { + require PublicInbox::LeiViewText; + $lei->{lvt} = PublicInbox::LeiViewText->new($lei); + $self->{base_type} = 'text'; } else { die "bad mail --format=$fmt\n"; } + if ($self->{base_type} =~ /\A(?:text|mbox)\z/) { + (-d $dst || (-e _ && !-w _)) and die + "$dst exists and is not a writable file\n"; + } + if ($self->{base_type} eq 'text') { + my @err = map { + defined($lei->{opt}->{$_}) ? "--$_" : (); + } (qw(mua save)); + die "@err incompatible with $fmt\n" if @err; + } $self->{dst} = $dst; $lei->{dedupe} = $lei->{lss} // do { my $dd_cls = 'PublicInbox::'. @@ -429,6 +461,29 @@ sub _do_augment_imap { } } +sub _pre_augment_text { + my ($self, $lei) = @_; + my $dst = $lei->{ovv}->{dst}; + my $out; + my $devfd = $lei->path_to_fd($dst) // die "bad $dst"; + if ($devfd >= 0) { + $out = $lei->{$devfd}; + } else { # normal-looking path + if (-p $dst) { + open $out, '>', $dst or die "open($dst): $!"; + } elsif (-f _ || !-e _) { + # text allows augment, HTML/Atom won't + my $mode = $lei->{opt}->{augment} ? '>>' : '>'; + open $out, $mode, $dst or die "open($mode, $dst): $!"; + } else { + die "$dst is not a file or FIFO\n"; + } + } + $lei->{ovv}->ovv_out_lk_init if !$lei->{ovv}->{lock_path}; + $lei->{1} = $out; + undef; +} + sub _pre_augment_mbox { my ($self, $lei) = @_; my $dst = $lei->{ovv}->{dst}; @@ -523,8 +578,8 @@ sub pre_augment { # fast (1 disk seek), runs in same process as post_augment sub do_augment { # slow, runs in wq worker my ($self, $lei) = @_; # _do_augment_maildir, _do_augment_mbox, or _do_augment_imap - my $m = "_do_augment_$self->{base_type}"; - $self->$m($lei); + my $m = $self->can("_do_augment_$self->{base_type}") or return; + $m->($self, $lei); } # fast (spawn compressor or mkdir), runs in same process as pre_augment diff --git a/lib/PublicInbox/LeiViewText.pm b/lib/PublicInbox/LeiViewText.pm new file mode 100644 index 00000000..6f5fca49 --- /dev/null +++ b/lib/PublicInbox/LeiViewText.pm @@ -0,0 +1,237 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# PublicInbox::Eml to (optionally colorized) text coverter for terminals +# the non-HTML counterpart to PublicInbox::View +package PublicInbox::LeiViewText; +use strict; +use v5.10.1; +use PublicInbox::MsgIter qw(msg_part_text); +use PublicInbox::ContentHash qw(git_sha); +use PublicInbox::MID qw(references); +use PublicInbox::View; +use PublicInbox::Hval; +use PublicInbox::ViewDiff; +use PublicInbox::Spawn qw(popen_rd); +use Term::ANSIColor; + +sub _xs { + # xhtml_map works since we don't search for HTML ([&<>'"]) + $_[0] =~ s/([\x7f\x00-\x1f])/$PublicInbox::Hval::xhtml_map{$1}/sge; +} + +my %DEFAULT_COLOR = ( + # mutt names, loaded from ~/.config/lei/config + quoted => 'blue', + hdrdefault => 'cyan', + status => 'bright_cyan', # smsg stuff + + # git names and defaults, falls back to ~/.gitconfig + new => 'green', + old => 'red', + meta => 'bold', + frag => 'cyan', + func => undef, + context => undef, +); + +sub my_colored { + my ($self, $slot) = @_; # $_[2] = buffer + my $val = $self->{"color.$slot"} //= + $self->{-leicfg}->{"color.$slot"} // + $self->{-gitcfg}->{"color.diff.$slot"} // + $self->{-gitcfg}->{"diff.color.$slot"} // + $DEFAULT_COLOR{$slot}; + $val = $val->[-1] if ref($val) eq 'ARRAY'; + if (defined $val) { + # git doesn't use "_", Term::ANSIColor does + $val =~ s/\Abright([^_])/bright_$1/i; + ${$self->{obuf}} .= Term::ANSIColor::colored($_[2], lc $val); + } else { + ${$self->{obuf}} .= $_[2]; + } +} + +sub uncolored { ${$_[0]->{obuf}} .= $_[2] } + +sub new { + my ($cls, $lei) = @_; + my $self = bless { %{$lei->{opt}}, -colored => \&uncolored }, $cls; + return $self unless $self->{color} || -t $lei->{1}; + my $cmd = [ qw(git config -z --includes -l) ]; + my ($r, $pid) = popen_rd($cmd, undef, { 2 => $lei->{2} }); + my $cfg = PublicInbox::Config::config_fh_parse($r, "\0", "\n"); + waitpid($pid, 0); + if ($?) { + $lei->err("# git-config failed, no color (non-fatal)"); + return $self; + } + $self->{-colored} = \&my_colored; + $self->{-gitcfg} = $cfg; + $self->{-leicfg} = $lei->{cfg}; + $self; +} + +sub hdr_buf ($$) { + my ($self, $eml) = @_; + my $hbuf = ''; + for my $f (qw(From To Cc)) { + for my $v ($eml->header($f)) { + next if $v !~ /\S/; + PublicInbox::View::fold_addresses($v); + _xs($v); + $hbuf .= "$f: $v\n"; + } + } + for my $f (qw(Subject Date Newsgroups Message-ID X-Message-ID)) { + for my $v ($eml->header($f)) { + _xs($v); + $hbuf .= "$f: $v\n"; + } + } + if (my @irt = $eml->header_raw('In-Reply-To')) { + for my $v (@irt) { + _xs($v); + $hbuf .= "In-Reply-To: $v\n"; + } + } else { + my $refs = references($eml); + if (defined(my $irt = pop @$refs)) { + _xs($irt); + $hbuf .= "In-Reply-To: <$irt>\n"; + } + if (@$refs) { + my $max = $self->{-max_cols}; + $hbuf .= 'References: ' . + join("\n\t", map { '<'._xs($_).'>' } @$refs) . + ">\n"; + } + } + $self->{-colored}->($self, 'hdrdefault', $hbuf .= "\n"); +} + +sub attach_note ($$$$;$) { + my ($self, $ct, $p, $fn, $err) = @_; + my ($part, $depth, $idx) = @$p; + my $obuf = $self->{obuf}; + my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...) + $$obuf .= <{-smsg}->{blob} // ''; + $blob .= ':' if $blob ne ''; + $$obuf .= "[-- Attachment $blob$idx "; + _xs($ct); + my $size = length($part->body); + my $ts = "Type: $ct, Size: $size bytes"; + my $d = $part->header('Content-Description') // $fn // ''; + _xs($d); + $$obuf .= $d eq '' ? "$ts --]\n" : "$d --]\n[-- $ts --]\n"; + hdr_buf($self, $part) if $part->{is_submsg}; +} + +sub flush_text_diff ($$) { + my ($self, $cur) = @_; + my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $$cur); + undef $$cur; # free memory + my $dctx; + my $obuf = $self->{obuf}; + my $colored = $self->{-colored}; + while (defined(my $x = shift @top)) { + if (scalar(@top) >= 4 && + $top[1] =~ $PublicInbox::ViewDiff::IS_OID && + $top[0] =~ $PublicInbox::ViewDiff::IS_OID) { + splice(@top, 0, 4); + $dctx = 1; + $colored->($self, 'meta', $x); + } elsif ($dctx) { + # Quiet "Complex regular subexpression recursion limit" + # warning. Perl will truncate matches upon hitting + # that limit, giving us more (and shorter) scalars than + # would be ideal, but otherwise it's harmless. + # + # We could replace the `+' metacharacter with `{1,100}' + # to limit the matches ourselves to 100, but we can + # let Perl do it for us, quietly. + no warnings 'regexp'; + + for my $s (split(/((?:(?:^\+[^\n]*\n)+)| + (?:(?:^-[^\n]*\n)+)| + (?:^@@ [^\n]+\n))/xsm, $x)) { + if (!defined($dctx)) { + ${$self->{obuf}} .= $s; + } elsif ($s =~ s/\A(@@ \S+ \S+ @@\s*)//) { + $colored->($self, 'frag', $1); + $colored->($self, 'func', $s); + } elsif ($s =~ /\A\+/) { + $colored->($self, 'new', $s); + } elsif ($s =~ /\A-- $/sm) { # email sig starts + $dctx = undef; + ${$self->{obuf}} .= $s; + } elsif ($s =~ /\A-/) { + $colored->($self, 'old', $s); + } else { + $colored->($self, 'context', $s); + } + } + } else { + ${$self->{obuf}} .= $x; + } + } +} + +sub add_text_buf { # callback for Eml->each_part + my ($p, $self) = @_; + my ($part, $depth, $idx) = @$p; + my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + my ($s, $err) = msg_part_text($part, $ct); + return attach_note($self, $ct, $p, $fn) unless defined $s; + hdr_buf($self, $part) if $part->{is_submsg}; + $s =~ s/\r\n/\n/sg; + _xs($s); + $s .= "\n" unless substr($s, -1, 1) eq "\n"; + my $diff = ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms); + my @sections = PublicInbox::MsgIter::split_quotes($s); + undef $s; # free memory + if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) { + # badly-encoded message with $err? tell the world about it! + attach_note($self, $ct, $p, $fn, $err); + ${$self->{obuf}} .= "\n"; + } + my $colored = $self->{-colored}; + for my $cur (@sections) { + if ($cur =~ /\A>/) { + $colored->($self, 'quoted', $cur); + } elsif ($diff) { + flush_text_diff($self, \$cur); + } else { + ${$self->{obuf}} .= $cur; + } + undef $cur; # free memory + } +} + +# returns an arrayref suitable for $lei->out or print +sub eml_to_text { + my ($self, $smsg, $eml) = @_; + local $Term::ANSIColor::EACHLINE = "\n"; + $self->{obuf} = \(my $obuf = ''); + $self->{-smsg} = $smsg; + $self->{-max_cols} = ($self->{columns} //= 80) - 8; # for header wrap + my @h = (); + for my $f (qw(blob pct)) { + push @h, "$f:$smsg->{$f}" if defined $smsg->{$f}; + } + @h = ("# @h\n") if @h; + for my $f (qw(kw L)) { + my $v = $smsg->{$f} or next; + push @h, "# $f:".join(',', @$v)."\n" if @$v; + } + $self->{-colored}->($self, 'status', join('', @h)); + hdr_buf($self, $eml); + $eml->each_part(\&add_text_buf, $self, 1); + delete $self->{obuf}; +} + +1; diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 8fe7261f..e9a7bf69 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -30,7 +30,7 @@ my $DIFFSTAT_COMMENT = my $NULL_TO_BLOB = qr/^(index $OID_NULL\.\.)($OID_BLOB)\b/ms; my $BLOB_TO_NULL = qr/^index ($OID_BLOB)(\.\.$OID_NULL)\b/ms; my $BLOB_TO_BLOB = qr/^index ($OID_BLOB)\.\.($OID_BLOB)/ms; -my $EXTRACT_DIFFS = qr/( +our $EXTRACT_DIFFS = qr/( (?: # begin header stuff, don't capture filenames, here, # but instead wait for the --- and +++ lines. (?:^diff\x20--git\x20$FN\x20$FN$LF) @@ -41,7 +41,7 @@ my $EXTRACT_DIFFS = qr/( ^index\x20($OID_BLOB)\.\.($OID_BLOB)$ANY*$LF ^---\x20($FN)$LF ^\+{3}\x20($FN)$LF)/msx; -my $IS_OID = qr/\A$OID_BLOB\z/s; +our $IS_OID = qr/\A$OID_BLOB\z/s; # link to line numbers in blobs sub diff_hunk ($$$$) {