From mboxrd@z Thu Jan 1 00:00:00 1970 From: Rasmus Villemoes Subject: [PATCH 2/2] git-send-email: Cache generated message-ids, use them when prompting Date: Wed, 21 Aug 2013 19:04:22 +0000 Message-ID: <1377111862-13199-3-git-send-email-rv@rasmusvillemoes.dk> References: <1376701126-5759-1-git-send-email-rv@rasmusvillemoes.dk> <1377111862-13199-1-git-send-email-rv@rasmusvillemoes.dk> Cc: Rasmus Villemoes To: gitster@pobox.com, sandals@crustytoothpaste.net, git@vger.kernel.org X-From: git-owner@vger.kernel.org Wed Aug 21 21:05:27 2013 Return-path: Envelope-to: gcvg-git-2@plane.gmane.org Received: from vger.kernel.org ([209.132.180.67]) by plane.gmane.org with esmtp (Exim 4.69) (envelope-from ) id 1VCDig-0002IP-BY for gcvg-git-2@plane.gmane.org; Wed, 21 Aug 2013 21:05:26 +0200 Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752615Ab3HUTFU (ORCPT ); Wed, 21 Aug 2013 15:05:20 -0400 Received: from mail-ee0-f52.google.com ([74.125.83.52]:39220 "EHLO mail-ee0-f52.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751969Ab3HUTFT (ORCPT ); Wed, 21 Aug 2013 15:05:19 -0400 Received: by mail-ee0-f52.google.com with SMTP id c41so462054eek.39 for ; Wed, 21 Aug 2013 12:05:17 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20120113; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=kR/K/a6Co0NeCKqFAPY9FgbwRJhBqYUtUBIBNCM2MSQ=; b=bAnL0zvW462OcQok4IvxgF/0kKjILnvVfGOMP+/7+srqxvhUXYMLReAVA4/O2ysQ+f FuWxfBszD9mj1gv7mYh3eRnBNDZ3u1/g9bcDzjUWby6Cu9pKgHIUfU2ImAO/7p/rCD/I uGgy78ePorGL6x5kN5wBayw9uRRpVQVnyv3lz5Jy9RuV3xVGGFchI5rXvzxDnKwl5wEy UYsyWSzfq+06+QNk4fzKWbE+kl1x7IixPN43xXG/7p3op0lpugCZdCS1KxKY/EFPqm4o 9DBBGwyQsIg3Mh/RhaSdbIZs2vEIzCClTciGvnIgFUJi+dJTkdkUWCglDxoffyTwjz5n TqnQ== X-Gm-Message-State: ALoCoQlfgsekWYWZJxW4W8/SK989RO410GU/CaFhug+5M+FlM7krf83V6wDZ+jhiV0oyxzo5u2eW X-Received: by 10.15.48.197 with SMTP id h45mr12532982eew.0.1377111917663; Wed, 21 Aug 2013 12:05:17 -0700 (PDT) Received: from villemoes-sl500.decode.is (wildmoose.dk. [83.169.18.19]) by mx.google.com with ESMTPSA id b45sm11933719eef.4.1969.12.31.16.00.00 (version=TLSv1.1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Wed, 21 Aug 2013 12:05:16 -0700 (PDT) X-Mailer: git-send-email 1.8.4.rc3.2.g61bff3f In-Reply-To: <1377111862-13199-1-git-send-email-rv@rasmusvillemoes.dk> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org Archived-At: Allow the user to specify a file (sendemail.msgidcachefile) in which to store the message-ids generated by git-send-email, along with time and subject information. When prompting for a Message-ID to be used in In-Reply-To, that file can be used to generate a list of options. When composing v2 or v3 of a patch or patch series, this avoids the need to get one's MUA to display the Message-ID of the earlier email (which is cumbersome in some MUAs) and then copy-paste that. Listing all previously sent emails is useless, so currently only the 10 most "relevant" emails. "Relevant" is based on a simple scoring, which might need to be revised: Count the words in the old subject which also appear in the subject of the first email to be sent; add a bonus if the old email was first in a batch (that is, [00/74] is more likely to be relevant than [43/74]). Resort to comparing timestamps (newer is more relevant) when the scores tie. To limit disk usage, the oldest half of the cached entries are expunged when the cache file exceeds sendemail.msgidcachemaxsize (default 100kB). This also ensures that we will never have to read, score, and sort 1000s of entries on each invocation of git-send-email. Signed-off-by: Rasmus Villemoes --- git-send-email.perl | 133 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 6 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index ac3b02d..5094267 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -203,6 +203,7 @@ my ($validate, $confirm); my (@suppress_cc); my ($auto_8bit_encoding); my ($compose_encoding); +my ($msgid_cache_file, $msgid_cache_maxsize); my ($debug_net_smtp) = 0; # Net::SMTP, see send_message() @@ -237,6 +238,8 @@ my %config_settings = ( "from" => \$sender, "assume8bitencoding" => \$auto_8bit_encoding, "composeencoding" => \$compose_encoding, + "msgidcachefile" => \$msgid_cache_file, + "msgidcachemaxsize" => \$msgid_cache_maxsize, ); my %config_path_settings = ( @@ -796,11 +799,23 @@ sub expand_one_alias { @bcclist = expand_aliases(@bcclist); @bcclist = validate_address_list(sanitize_address_list(@bcclist)); +if ($compose && $compose > 0) { + @files = ($compose_filename . ".final", @files); +} + if ($thread && !defined $initial_reply_to && $prompting) { + my @choices = (); + if ($msgid_cache_file) { + my $first_subject = get_patch_subject($files[0]); + $first_subject =~ s/^GIT: //; + @choices = msgid_cache_getmatches($first_subject, 10); + @choices = map {[$_->{id}, sprintf "[%s] %s", format_2822_time($_->{epoch}), $_->{subject}]} @choices; + } $initial_reply_to = ask( "Message-ID to be used as In-Reply-To for the first email (if any)? ", default => "", - valid_re => qr/\@.*\./, confirm_only => 1); + valid_re => qr/\@.*\./, confirm_only => 1, + choices => \@choices); } if (defined $initial_reply_to) { $initial_reply_to =~ s/^\s* 0) { - @files = ($compose_filename . ".final", @files); -} - # Variables we set as part of the loop over files our ($message_id, %mail, $subject, $reply_to, $references, $message, $needs_confirm, $message_num, $ask_default); @@ -1136,7 +1147,7 @@ sub send_message { my $to = join (",\n\t", @recipients); @recipients = unique_email_list(@recipients,@cc,@bcclist); @recipients = (map { extract_valid_address_or_die($_) } @recipients); - my $date = format_2822_time($time++); + my $date = format_2822_time($time); my $gitversion = '@@GIT_VERSION@@'; if ($gitversion =~ m/..GIT_VERSION../) { $gitversion = Git::version(); @@ -1477,6 +1488,11 @@ foreach my $t (@files) { my $message_was_sent = send_message(); + if ($message_was_sent && $msgid_cache_file && !$dry_run) { + msgid_cache_this($message_id, $message_num == 1 ? 1 : 0, , $time, $subject); + } + $time++; + # set up for the next message if ($thread && $message_was_sent && ($chain_reply_to || !defined $reply_to || length($reply_to) == 0 || @@ -1521,6 +1537,8 @@ sub cleanup_compose_files { $smtp->quit if $smtp; +msgid_cache_write() if $msgid_cache_file && !$dry_run; + sub unique_email_list { my %seen; my @emails; @@ -1569,3 +1587,106 @@ sub body_or_subject_has_nonascii { } return 0; } + +my @msgid_new_entries; +sub msgid_cache_this { + my $msgid = shift; + my $first = shift; + my $epoch = shift; + my $subject = shift; + # Make sure there are no tabs which will confuse us, and save + # some valuable horizontal real-estate by removing redundant + # whitespace. + if ($subject) { + $subject =~ s/^\s+|\s+$//g; + $subject =~ s/\s+/ /g; + } + # Replace undef or the empty string by an actual string. + $subject = '(none)' if (!defined $subject || $subject eq ''); + + push @msgid_new_entries, {id => $msgid, first => $first, subject => $subject, epoch => $epoch}; +} + + +# For now, use a simple tab-separated format: +# +# $id\t$wasfirst\t$unixtime\t$subject\n +sub msgid_cache_read { + my $fh; + my $line; + my @entries; + if (not open ($fh, '<', $msgid_cache_file)) { + # A non-existing cache file is ok, but should we warn if errno != ENOENT? + return (); + } + while ($line = <$fh>) { + chomp($line); + my ($id, $first, $epoch, $subject) = split /\t/, $line; + push @entries, {id=>$id, first=>$first, epoch=>$epoch, subject=>$subject}; + } + close($fh); + return @entries; +} + +sub msgid_cache_getmatches { + my ($first_subject, $maxentries) = @_; + my @list = msgid_cache_read(); + + # We need to find the message-ids which are most likely to be + # useful. There are probably better ways to do this, but for + # now we simply count how many words in the old subject also + # appear in $first_subject. + my %words = map {$_ => 1} msgid_subject_words($first_subject); + for my $item (@list) { + # Emails which were first in a batch are more likely + # to be used for followups (cf. the example in "man + # git-send-email"), so give those a head start. + my $score = $item->{first} ? 3 : 0; + for (msgid_subject_words($item->{subject})) { + $score++ if exists $words{$_}; + } + $item->{score} = $score; + } + @list = sort {$b->{score} <=> $a->{score} || + $b->{epoch} <=> $a->{epoch}} @list; + @list = @list[0 .. $maxentries-1] if (@list > $maxentries); + return @list; +} + +sub msgid_subject_words { + my $subject = shift; + # Ignore initial "[PATCH 02/47]" + $subject =~ s/^\s*\[.*?\]//; + my @words = split /\s+/, $subject; + # Ignore short words. + @words = grep { length > 3 } @words; + return @words; +} + +sub msgid_cache_write { + msgid_cache_do_write(1, \@msgid_new_entries); + + if (defined $msgid_cache_maxsize && $msgid_cache_maxsize =~ m/^\s*([0-9]+)\s*([kKmMgG]?)$/) { + my %SI = ('' => 1, 'k' => 1e3, 'm' => 1e6, 'g' => 1e9); + $msgid_cache_maxsize = $1 * $SI{lc($2)}; + } + else { + $msgid_cache_maxsize = 100000; + } + if (-s $msgid_cache_file > $msgid_cache_maxsize) { + my @entries = msgid_cache_read(); + splice @entries, 0, int(@entries/2); + msgid_cache_do_write(0, \@entries); + } +} + +sub msgid_cache_do_write { + my $append = shift; + my $entries = shift; + my $fh; + if (not open($fh, $append ? '>>' : '>', $msgid_cache_file)) { + die "cannot open $msgid_cache_file for writing: $!"; + } + printf $fh "%s\t%d\t%s\t%s\n", $_->{id}, $_->{first}, $_->{epoch}, $_->{subject} for (@$entries); + close($fh); +} -- 1.7.9.5