From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id AB4BD1FA19 for ; Tue, 26 Oct 2021 10:35:58 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 7/9] lei p2q: use LeiInput for multi-patch series Date: Tue, 26 Oct 2021 10:35:55 +0000 Message-Id: <20211026103557.2738-8-e@80x24.org> In-Reply-To: <20211026103557.2738-1-e@80x24.org> References: <20211026103557.2738-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The LeiInput backend now allows p2q to work like any other command which reads .eml, .patch, mbox*, Maildir, IMAP, and NNTP input. Running "git format-patch --stdout -1 $COMMIT" remains supported. This is intended to allow lower memory use while parsing "git log --pretty=mboxrd -p" output. Previously, the entire output of "git log" would be slurped into memory at once. The intended use is to allow easy(-ish :P) searching for unapplied patches as documented in the new example in the manpage. --- Documentation/lei-p2q.pod | 6 ++ lib/PublicInbox/LEI.pm | 4 +- lib/PublicInbox/LeiInput.pm | 30 ++++++++-- lib/PublicInbox/LeiP2q.pm | 115 ++++++++++++++++++------------------ 4 files changed, 89 insertions(+), 66 deletions(-) diff --git a/Documentation/lei-p2q.pod b/Documentation/lei-p2q.pod index 2e0b1ab676ed..4bc5d25f8ef0 100644 --- a/Documentation/lei-p2q.pod +++ b/Documentation/lei-p2q.pod @@ -77,6 +77,12 @@ Suppress feedback messages. # to view results on a remote HTTP(S) public-inbox instance $BROWSER https://example.com/pub-inbox/?q=$(lei p2q --uri $COMMIT_OID) + # to view unapplied patches for a given $FILE from the past year: + echo \( rt:last.year.. AND dfn:$FILE \) AND NOT \( \ + $(git log -p --pretty=mboxrd --since=last.year $FILE | + lei p2q -F mboxrd ) + \) | lei q -o /tmp/unapplied + =head1 CONTACT Feedback welcome via plain-text mail to L diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 93a7b426de3c..f9d24f29c87d 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -274,9 +274,9 @@ our %CMD = ( # sorted in order of importance/use: 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!), @net_opt, @c_opt ], -'p2q' => [ 'FILE|COMMIT_OID|--stdin', +'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin', "use a patch to generate a query for `lei q --stdin'", - qw(stdin| want|w=s@ uri debug), @c_opt ], + qw(stdin| in-format|F=s want|w=s@ uri debug), @net_opt, @c_opt ], 'config' => [ '[...]', sub { 'git-config(1) wrapper for '._config_path($_[0]); }, qw(config-file|system|global|file|f=s), # for conflict detection diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 2621fc1f9d05..540681e3ff6b 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -64,6 +64,11 @@ sub input_mbox_cb { # base MboxReader callback $self->input_eml_cb($eml); } +sub input_net_cb { # imap_each, nntp_each cb + my ($url, $uid, $kw, $eml, $self) = @_; + $self->input_eml_cb($eml); +} + # import a single file handle of $name # Subclass must define ->input_eml_cb and ->input_mbox_cb sub input_fh { @@ -108,10 +113,10 @@ sub handle_http_input ($$@) { grep(/\A--compressed\z/, @$curl) or $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); eval { $self->input_fh('mboxrd', $fh, $url, @args) }; - my $err = $@; + my @err = ($@ ? $@ : ()); $ar->join; - $? || $err and - $lei->child_error($?, "@$cmd failed".$err ? " $err" : ''); + push(@err, "\$?=$?") if $?; + $lei->child_error($?, "@$cmd failed: @err") if @err; } sub input_path_url { @@ -184,7 +189,17 @@ EOM $self, @args); } } elsif ($self->{missing_ok} && !-e $input) { # don't ->fail - $self->folder_missing("$ifmt:$input"); + if ($lei->{cmd} eq 'p2q') { + my $fp = [ qw(git format-patch --stdout -1), $input ]; + my $rdr = { 2 => $lei->{2} }; + my $fh = popen_rd($fp, undef, $rdr); + eval { $self->input_fh('eml', $fh, $input, @args) }; + my @err = ($@ ? $@ : ()); + close($fh) or push @err, "\$?=$?"; + $lei->child_error($?, "@$fp failed: @err") if @err; + } else { + $self->folder_missing("$ifmt:$input"); + } } else { $lei->fail("$ifmt_pfx$input unsupported (TODO)"); } @@ -330,9 +345,12 @@ $input is `eml', not --in-format=$in_fmt } push @md, $input; } elsif ($self->{missing_ok} && !-e $input) { - # for lei rm-watch - $may_sync and $input = 'maildir:'. + if ($lei->{cmd} eq 'p2q') { + # will run "git format-patch" + } elsif ($may_sync) { # for lei rm-watch + $input = 'maildir:'. $lei->abs_path($input); + } } else { return $lei->fail("Unable to handle $input") } diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm index 08ec81c5295e..09ec0a079bb9 100644 --- a/lib/PublicInbox/LeiP2q.pm +++ b/lib/PublicInbox/LeiP2q.pm @@ -1,16 +1,16 @@ -# Copyright (C) 2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # front-end for the "lei patch-to-query" sub-command package PublicInbox::LeiP2q; use strict; use v5.10.1; -use parent qw(PublicInbox::IPC); +use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::Eml; use PublicInbox::Smsg; use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Git qw(git_unquote); -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::OnDestroy; use URI::Escape qw(uri_escape_utf8); my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!; @@ -28,8 +28,16 @@ sub xphrase ($) { } ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g); } +sub add_qterm ($$@) { + my ($self, $p, @v) = @_; + for (@v) { + $self->{qseen}->{"$p\0$_"} //= + push(@{$self->{qterms}->{$p}}, $_); + } +} + sub extract_terms { # eml->each_part callback - my ($p, $lei) = @_; + my ($p, $self) = @_; my $part = $p->[0]; # ignore $depth and @idx; my $ct = $part->content_type || 'text/plain'; my ($s, undef) = msg_part_text($part, $ct); @@ -38,7 +46,7 @@ sub extract_terms { # eml->each_part callback # TODO: b: nq: q: for (split(/\n/, $s)) { if ($in_diff && s/^ //) { # diff context - push @{$lei->{qterms}->{dfctx}}, xphrase($_); + add_qterm($self, 'dfctx', xphrase($_)); } elsif (/^-- $/) { # email signature begins $in_diff = undef; } elsif (m!^diff --git $FN $FN!) { @@ -46,21 +54,21 @@ sub extract_terms { # eml->each_part callback $in_diff = 1; } elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) { my ($oa, $ob) = ($1, $2); - push @{$lei->{qterms}->{dfpre}}, $oa; - push @{$lei->{qterms}->{dfpost}}, $ob; + add_qterm($self, 'dfpre', $oa); + add_qterm($self, 'dfpost', $ob); # who uses dfblob? } elsif (m!^(?:---|\+{3}) ($FN)!) { next if $1 eq '/dev/null'; my $fn = (split(m!/!, git_unquote($1.''), 2))[1]; - push @{$lei->{qterms}->{dfn}}, xphrase($fn); + add_qterm($self, 'dfn', xphrase($fn)); } elsif ($in_diff && s/^\+//) { # diff added - push @{$lei->{qterms}->{dfb}}, xphrase($_); + add_qterm($self, 'dfb', xphrase($_)); } elsif ($in_diff && s/^-//) { # diff removed - push @{$lei->{qterms}->{dfa}}, xphrase($_); + add_qterm($self, 'dfa', xphrase($_)); } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) { # traditional diff w/o -p } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) { - push @{$lei->{qterms}->{dfhh}}, xphrase($1); + add_qterm($self, 'dfhh', xphrase($1)); } elsif (/^(?:dis)similarity index/ || /^(?:old|new) mode/ || /^(?:deleted|new) file mode/ || @@ -92,53 +100,43 @@ my %pfx2smsg = ( rt => [ qw(ts) ], # ditto... ); -sub do_p2q { # via wq_do - my ($self) = @_; - my $lei = $self->{lei}; - my $want = $lei->{opt}->{want} // [ qw(dfpost7) ]; - my @want = split(/[, ]+/, "@$want"); - for (@want) { - /\A(?:(d|dt|rt):)?([0-9]+)(\.(?:day|weeks)s?)?\z/ or next; - my ($pfx, $n, $unit) = ($1, $2, $3); - $n *= 86400 * ($unit =~ /week/i ? 7 : 1); - $_ = [ $pfx, $n ]; - } - my $smsg = bless {}, 'PublicInbox::Smsg'; - my $in = $self->{0}; - my @cmd; - unless ($in) { - my $input = $self->{input}; - my $devfd = $lei->path_to_fd($input) // return; - if ($devfd >= 0) { - $in = $lei->{$devfd}; - } elsif (-e $input) { - open($in, '<', $input) or - return $lei->fail("open < $input: $!"); - } else { - @cmd = (qw(git format-patch --stdout -1), $input); - $in = popen_rd(\@cmd, undef, { 2 => $lei->{2} }); +sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh + my ($self, $eml) = @_; + my $diff_want = $self->{diff_want} // do { + my $want = $self->{lei}->{opt}->{want} // [ qw(dfpost7) ]; + my @want = split(/[, ]+/, "@$want"); + for (@want) { + /\A(?:(d|dt|rt):)?([0-9]+)(\.(?:day|weeks)s?)?\z/ + or next; + my ($pfx, $n, $unit) = ($1, $2, $3); + $n *= 86400 * ($unit =~ /week/i ? 7 : 1); + $_ = [ $pfx, $n ]; } + $self->{want_order} = \@want; + $self->{diff_want} = +{ map { $_ => 1 } @want }; }; - my $str = do { local $/; <$in> }; - @cmd && !close($in) and return $lei->fail("E: @cmd failed: $?"); - my $eml = PublicInbox::Eml->new(\$str); - $lei->{diff_want} = +{ map { $_ => 1 } @want }; + my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); while (my ($pfx, $fields) = each %pfx2smsg) { - next unless $lei->{diff_want}->{$pfx}; + next unless $diff_want->{$pfx}; for my $f (@$fields) { my $v = $smsg->{$f} // next; - push @{$lei->{qterms}->{$pfx}}, xphrase($v); + add_qterm($self, $pfx, xphrase($v)); } } - $eml->each_part(\&extract_terms, $lei, 1); + $eml->each_part(\&extract_terms, $self, 1); +} + +sub emit_query { + my ($self) = @_; + my $lei = $self->{lei}; if ($lei->{opt}->{debug}) { my $json = ref(PublicInbox::Config->json)->new; $json->utf8->canonical->pretty; - print { $lei->{2} } $json->encode($lei->{qterms}); + print { $lei->{2} } $json->encode($self->{qterms}); } my (@q, %seen); - for my $pfx (@want) { + for my $pfx (@{$self->{want_order}}) { if (ref($pfx) eq 'ARRAY') { my ($p, $t_range) = @$pfx; # TODO @@ -148,7 +146,7 @@ sub do_p2q { # via wq_do } else { my $plusminus = ($pfx =~ s/\A([\+\-])//) ? $1 : ''; my $end = ($pfx =~ s/([0-9\*]+)\z//) ? $1 : ''; - my $x = delete($lei->{qterms}->{$pfx}) or next; + my $x = delete($self->{qterms}->{$pfx}) or next; my $star = $end =~ tr/*//d ? '*' : ''; my $min_len = ($end || 0) + 0; @@ -181,24 +179,25 @@ sub do_p2q { # via wq_do } sub lei_p2q { # the "lei patch-to-query" entry point - my ($lei, $input) = @_; - my $self = bless {}, __PACKAGE__; - if ($lei->{opt}->{stdin}) { - $self->{0} = delete $lei->{0}; # guard from _lei_atfork_child - } else { - $self->{input} = $input; - } - my ($op_c, $ops) = $lei->workers_start($self, 1); + my ($lei, @inputs) = @_; + $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin}; + my $self = bless { missing_ok => 1 }, __PACKAGE__; + $self->prepare_inputs($lei, \@inputs) or return; + my $ops = {}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; + (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; - $self->wq_io_do('do_p2q', []); - $self->wq_close; + net_merge_all_done($self) unless $lei->{auth}; $lei->wait_wq_events($op_c, $ops); } sub ipc_atfork_child { my ($self) = @_; - $self->{lei}->_lei_atfork_child; - $self->SUPER::ipc_atfork_child; + PublicInbox::LeiInput::input_only_atfork_child($self); + PublicInbox::OnDestroy->new($$, \&emit_query, $self); } +no warnings 'once'; +*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; + 1;