about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/IMAPD.pm2
-rw-r--r--lib/PublicInbox/LeiQuery.pm2
-rw-r--r--lib/PublicInbox/Reply.pm9
-rw-r--r--lib/PublicInbox/Search.pm6
-rw-r--r--lib/PublicInbox/SearchIdx.pm75
-rw-r--r--lib/PublicInbox/SearchThread.pm13
6 files changed, 70 insertions, 37 deletions
diff --git a/lib/PublicInbox/IMAPD.pm b/lib/PublicInbox/IMAPD.pm
index 661d6537..d8814324 100644
--- a/lib/PublicInbox/IMAPD.pm
+++ b/lib/PublicInbox/IMAPD.pm
@@ -94,7 +94,7 @@ sub imapd_refresh_finalize {
         }
 }
 
-sub imapd_refresh_step { # pi_cfg->iterate_start cb
+sub imapd_refresh_step { # PublicInbox::ConfigIter cb
         my ($pi_cfg, $section, $imapd) = @_;
         if (defined($section)) {
                 return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 51ee3d9c..c998e5c0 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -185,7 +185,7 @@ sub _complete_q {
 # FIXME: Getopt::Long doesn't easily let us support support options with
 # '.' in them (e.g. --http1.1)
 # TODO: should we depend on "-c http.*" options for things which have
-# analogues in git(1)? that would reduce likelyhood of conflicts with
+# analogues in git(1)? that would reduce likelihood of conflicts with
 # our other CLI options
 # Note: some names are renamed to avoid potential conflicts,
 # see %lei2curl in lib/PublicInbox/LeiCurl.pm
diff --git a/lib/PublicInbox/Reply.pm b/lib/PublicInbox/Reply.pm
index d96fadfc..592dfb62 100644
--- a/lib/PublicInbox/Reply.pm
+++ b/lib/PublicInbox/Reply.pm
@@ -1,10 +1,10 @@
-# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # For reply instructions and address generation in WWW UI
 package PublicInbox::Reply;
 use strict;
-use warnings;
+use v5.10.1;
 use URI::Escape qw/uri_escape_utf8/;
 use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href);
 use PublicInbox::Address;
@@ -81,7 +81,6 @@ sub mailto_arg_link {
                 # no $subj for $href below
         } else {
                 push @arg, "--to=$to";
-                $to = uri_escape_utf8($to);
                 $subj = uri_escape_utf8($subj);
         }
         my @cc = sort values %$cc;
@@ -106,6 +105,10 @@ sub mailto_arg_link {
         # anyways.
         return (\@arg, '', $reply_to_all) if $obfs;
 
+        # keep `@' instead of using `%40' for RFC 6068
+        utf8::encode($to);
+        $to =~ s!([^A-Za-z0-9\-\._~\@])!$URI::Escape::escapes{$1}!ge;
+
         # order matters, Subject is the least important header,
         # so it is last in case it's lost/truncated in a copy+paste
         my $href = "mailto:$to?In-Reply-To=$irt${cc}&Subject=$subj";
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 523003b3..b6141f68 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # based on notmuch, but with no concept of folders, files or flags
 #
@@ -118,9 +118,10 @@ my %bool_pfx_external = (
         dfpre => 'XDFPRE',
         dfpost => 'XDFPOST',
         dfblob => 'XDFPRE XDFPOST',
+        patchid => 'XDFID',
 );
 
-my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST';
+my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID';
 my %prob_prefix = (
         # for mairix compatibility
         s => 'S',
@@ -178,6 +179,7 @@ EOF
         'dfpre:' => 'match pre-image git blob ID',
         'dfpost:' => 'match post-image git blob ID',
         'dfblob:' => 'match either pre or post-image git blob ID',
+        'patchid:' => "match `git patch-id --stable' output",
         'rt:' => <<EOF,
 match received time, like `d:' if sender's clock was correct
 EOF
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 85fae4ad..cbfe7816 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -18,6 +18,7 @@ use PublicInbox::MsgIter;
 use PublicInbox::IdxStack;
 use Carp qw(croak carp);
 use POSIX qw(strftime);
+use Fcntl qw(SEEK_SET);
 use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn);
@@ -35,9 +36,8 @@ our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff :
         # assume a typical 64-bit system has 8x more RAM than a
         # typical 32-bit system:
         (($Config{ptrsize} >= 8 ? 8192 : 1024) * 1024);
-
 use constant DEBUG => !!$ENV{DEBUG};
-
+my $BASE85 = qr/\A[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+\z/;
 my $xapianlevels = qr/\A(?:full|medium)\z/;
 my $hex = '[a-f0-9]';
 my $OID = $hex .'{40,}';
@@ -236,8 +236,8 @@ sub index_old_diff_fn {
 
         # no renames or space support for traditional diffs,
         # find the number of leading common paths to strip:
-        my @fa = split('/', $fa);
-        my @fb = split('/', $fb);
+        my @fa = split(m'/', $fa);
+        my @fb = split(m'/', $fb);
         while (scalar(@fa) && scalar(@fb)) {
                 $fa = join('/', @fa);
                 $fb = join('/', @fb);
@@ -257,38 +257,59 @@ sub index_diff ($$$) {
         my ($self, $txt, $doc) = @_;
         my %seen;
         my $in_diff;
-        my @xnq;
-        my $xnq = \@xnq;
-        foreach (split(/\n/, $txt)) {
-                if ($in_diff && s/^ //) { # diff context
+        my $xnq = [];
+        my @l = split(/\n/, $$txt);
+        undef $$txt;
+        while (defined($_ = shift @l)) {
+                if ($in_diff && /^GIT binary patch/) {
+                        push @$xnq, $_;
+                        while (@l && $l[0] =~ /^literal /) {
+                                # TODO allow searching by size range?
+                                # allows searching by exact size via:
+                                # "literal $SIZE"
+                                push @$xnq, shift(@l);
+
+                                # skip base85 and empty lines
+                                while (@l && ($l[0] =~ /$BASE85/o ||
+                                                $l[0] !~ /\S/)) {
+                                        shift @l;
+                                }
+                                # loop hits trailing "literal 0\nHcmV?d00001\n"
+                        }
+                } elsif ($in_diff && s/^ //) { # diff context
                         index_diff_inc($self, $_, 'XDFCTX', $xnq);
                 } elsif (/^-- $/) { # email signature begins
                         $in_diff = undef;
-                } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) {
-                        # wait until "---" and "+++" to capture filenames
+                } elsif (m!^diff --git ("?[^/]+/.+) ("?[^/]+/.+)\z!) {
+                        # capture filenames here for binary diffs:
+                        my ($fa, $fb) = ($1, $2);
+                        push @$xnq, $_;
                         $in_diff = 1;
-                        push @xnq, $_;
+                        $fa = (split(m'/', git_unquote($fa), 2))[1];
+                        $fb = (split(m'/', git_unquote($fb), 2))[1];
+                        $seen{$fa}++ or index_diff_inc($self, $fa, 'XDFN', $xnq);
+                        $seen{$fb}++ or index_diff_inc($self, $fb, 'XDFN', $xnq);
                 # traditional diff:
                 } elsif (m/^diff -(.+) (\S+) (\S+)$/) {
                         my ($opt, $fa, $fb) = ($1, $2, $3);
-                        push @xnq, $_;
+                        push @$xnq, $_;
                         # only support unified:
                         next unless $opt =~ /[uU]/;
                         $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb,
                                                         $xnq);
                 } elsif (m!^--- ("?[^/]+/.+)!) {
                         my $fn = $1;
-                        $fn = (split('/', git_unquote($fn), 2))[1];
+                        $fn = (split(m'/', git_unquote($fn), 2))[1];
                         $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq);
                         $in_diff = 1;
                 } elsif (m!^\+\+\+ ("?[^/]+/.+)!)  {
                         my $fn = $1;
-                        $fn = (split('/', git_unquote($fn), 2))[1];
+                        $fn = (split(m'/', git_unquote($fn), 2))[1];
                         $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq);
                         $in_diff = 1;
                 } elsif (/^--- (\S+)/) {
-                        $in_diff = $1;
-                        push @xnq, $_;
+                        $in_diff = $1; # old diff filename
+                        push @$xnq, $_;
                 } elsif (defined $in_diff && /^\+\+\+ (\S+)/) {
                         $in_diff = index_old_diff_fn($self, \%seen, $in_diff,
                                                         $1, $xnq);
@@ -314,19 +335,19 @@ sub index_diff ($$$) {
                                 /^(?:dis)?similarity index / ||
                                 /^\\ No newline at end of file/ ||
                                 /^Binary files .* differ/) {
-                        push @xnq, $_;
+                        push @$xnq, $_;
                 } elsif ($_ eq '') {
                         # possible to be in diff context, some mail may be
                         # stripped by MUA or even GNU diff(1).  "git apply"
                         # treats a bare "\n" as diff context, too
                 } else {
-                        push @xnq, $_;
+                        push @$xnq, $_;
                         warn "non-diff line: $_\n" if DEBUG && $_ ne '';
                         $in_diff = undef;
                 }
         }
 
-        index_text($self, join("\n", @xnq), 1, 'XNQ');
+        index_text($self, join("\n", @$xnq), 1, 'XNQ');
 }
 
 sub index_xapian { # msg_iter callback
@@ -349,6 +370,20 @@ sub index_xapian { # msg_iter callback
         defined $s or return;
         $_[0]->[0] = $part = undef; # free memory
 
+        if ($s =~ /^(?:diff|---|\+\+\+) /ms) {
+                open(my $fh, '+>:utf8', undef) or die "open: $!";
+                open(my $eh, '+>', undef) or die "open: $!";
+                $fh->autoflush(1);
+                print $fh $s or die "print: $!";
+                sysseek($fh, 0, SEEK_SET) or die "sysseek: $!";
+                my $id = ($self->{ibx} // $self->{eidx})->git->qx(
+                                                [qw(patch-id --stable)],
+                                                {}, { 0 => $fh, 2 => $eh });
+                $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1);
+                seek($eh, 0, SEEK_SET) or die "seek: $!";
+                while (<$eh>) { warn $_ }
+        }
+
         # split off quoted and unquoted blocks:
         my @sections = PublicInbox::MsgIter::split_quotes($s);
         undef $s; # free memory
@@ -358,7 +393,7 @@ sub index_xapian { # msg_iter callback
                 } else {
                         # does it look like a diff?
                         if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
-                                index_diff($self, $txt, $doc);
+                                index_diff($self, \$txt, $doc);
                         } else {
                                 index_text($self, $txt, 1, 'XNQ');
                         }
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index f07dd696..00ae9fac 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -38,13 +38,13 @@ sub thread {
                 # TODO: move this to a more appropriate place, breaks tests
                 # if we do it during psgi_cull
                 delete $_->{num};
-
-                PublicInbox::SearchThread::Msg::cast($_);
+                bless $_, 'PublicInbox::SearchThread::Msg';
                 if (exists $id_table{$_->{mid}}) {
                         $_->{children} = [];
                         push @imposters, $_; # we'll deal with them later
                         undef;
                 } else {
+                        $_->{children} = {}; # will become arrayref later
                         $id_table{$_->{mid}} = $_;
                         defined($_->{references});
                 }
@@ -108,13 +108,6 @@ sub ghost {
         }, __PACKAGE__;
 }
 
-# give a existing smsg the methods of this class
-sub cast {
-        my ($smsg) = @_;
-        $smsg->{children} = {};
-        bless $smsg, __PACKAGE__;
-}
-
 sub topmost {
         my ($self) = @_;
         my @q = ($self);
@@ -174,7 +167,7 @@ sub order_children {
         while (defined($cur = shift @q)) {
                 # the {children} hashref here...
                 my @c = grep { !$seen{$_}++ && visible($_, $ibx) }
-                        values %{$cur->{children}};
+                        values %{delete $cur->{children}};
                 $ordersub->(\@c) if scalar(@c) > 1;
                 $cur->{children} = \@c; # ...becomes an arrayref
                 push @q, @c;