From 4b9be88e2465ffcbf91901f06e19a749f6bdf818 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 27 Apr 2021 11:07:52 +0000 Subject: lei lcat: extract Message-IDs from URLs and show them It's a wrapper around "lei q" which extracts Message-IDs from URLs, "<$MSGID>", "id:$MSGID" and attempts to display the local version of the message. Its main purpose is to extract Message-IDs out of commonly-understood URLs to save users bandwidth and time by displaying the message locally. When reading from stdin, it will discard things it doesn't understand, so you can just pipe an entire "Link: $URL" line to it and it'll attempt to pluck the Message-ID out of the URL. --- lib/PublicInbox/LeiLcat.pm | 125 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 lib/PublicInbox/LeiLcat.pm (limited to 'lib/PublicInbox/LeiLcat.pm') diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm new file mode 100644 index 00000000..f10452be --- /dev/null +++ b/lib/PublicInbox/LeiLcat.pm @@ -0,0 +1,125 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# lcat: local cat, display a local message by Message-ID or blob, +# extracting from URL necessary +# "lei lcat " +package PublicInbox::LeiLcat; +use strict; +use v5.10.1; +use PublicInbox::LeiViewText; +use URI::Escape qw(uri_unescape); +use URI; +use PublicInbox::MID qw($MID_EXTRACT); + +sub lcat_redispatch { + my ($lei, $out, $op_p) = @_; + my $l = bless { %$lei }, ref($lei); + delete $l->{sock}; + $l->{''} = $op_p; # daemon only + eval { + $l->qerr("# updating $out"); + up1($l, $out); + $l->qerr("# $out done"); + }; + $l->err($@) if $@; +} + +sub extract_1 ($$) { + my ($lei, $x) = @_; + if ($x =~ m!\b([a-z]+?://\S+)!i) { + my $u = $1; + $u =~ s/[\>\]\)\,\.\;]+\z//; + $u = URI->new($u); + my $p = $u->path; + my $term; + if ($p =~ m!([^/]+\@[^/]+)!) { # common msgid pattern + $term = 'mid:'.uri_unescape($1); + + # is it a URL which returns the full thread? + if ($u->scheme =~ /\Ahttps?/i && + $p =~ m!/(?:T/?|t/?|t\.mbox\.gz|t\.atom)\b!) { + + $lei->{mset_opt}->{threads} = 1; + } + } elsif ($u->scheme =~ /\Ahttps?/i && + # some msgids don't have '@', see if it looks like + # a public-inbox URL: + $p =~ m!/([^/]+)/(raw|t/?|T/?| + t\.mbox\.gz|t\.atom)\z!x) { + $lei->{mset_opt}->{threads} = 1 if $2 && $2 ne 'raw'; + $term = 'mid:'.uri_unescape($1); + } + $term; + } elsif ($x =~ $MID_EXTRACT) { # <$MSGID> + "mid:$1"; + } elsif ($x =~ /\b((?:m|mid):\S+)/) { # our own prefixes (and mairix) + $1; + } elsif ($x =~ /\bid:(\S+)/) { # notmuch convention + "mid:$1"; + } else { + undef; + } +} + +sub extract_all { + my ($lei, @argv) = @_; + my $strict = !$lei->{opt}->{stdin}; + my @q; + for my $x (@argv) { + if (my $term = extract_1($lei,$x)) { + push @q, $term; + } elsif ($strict) { + return $lei->fail(<<""); +could not extract Message-ID from $x + + } + } + @q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv"); +} + +sub _stdin { # PublicInbox::InputPipe::consume callback for --stdin + my ($lei) = @_; # $_[1] = $rbuf + if (defined($_[1])) { + $_[1] eq '' and return eval { + if (my $dfd = $lei->{3}) { + chdir($dfd) or return $lei->fail("fchdir: $!"); + } + my @argv = split(/\s+/, $lei->{mset_opt}->{qstr}); + $lei->{mset_opt}->{qstr} = extract_all($lei, @argv) + or return; + $lei->_start_query; + }; + $lei->{mset_opt}->{qstr} .= $_[1]; + } else { + $lei->fail("error reading stdin: $!"); + } +} + +sub lei_lcat { + my ($lei, @argv) = @_; + my $lxs = $lei->lxs_prepare or return; + $lei->ale->refresh_externals($lxs); + my $sto = $lei->_lei_store(1); + $lei->{lse} = $sto->search; + my $opt = $lei->{opt}; + my %mset_opt = map { $_ => $opt->{$_} } qw(threads limit offset); + $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0; + $mset_opt{limit} //= 10000; + $opt->{sort} //= 'relevance'; + $mset_opt{relevance} = 1; + $lei->{mset_opt} = \%mset_opt; + $opt->{'format'} //= 'mboxrd' unless defined($opt->{output}); + if ($lei->{opt}->{stdin}) { + return $lei->fail(<<'') if @argv; +no args allowed on command-line with --stdin + + require PublicInbox::InputPipe; + PublicInbox::InputPipe::consume($lei->{0}, \&_stdin, $lei); + return; + } + $lei->{mset_opt}->{qstr} = extract_all($lei, @argv) or return; + $lei->_start_query; +} + +1; -- cgit v1.2.3-24-ge0c7