about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-03-21 15:50:46 +0600
committerEric Wong <e@80x24.org>2021-03-21 10:35:11 +0000
commit26f6047fff20159085e28bd1e8a6e83a1e952fe0 (patch)
tree2f80f7218f27dbc5cf737e32f6de7a61d6d93e5b /lib/PublicInbox
parent47d21e78be57a8f4615be5872e08ec4539423d1f (diff)
downloadpublic-inbox-26f6047fff20159085e28bd1e8a6e83a1e952fe0.tar.gz
This will let us tie keywords from remote externals
to those which only exist in local externals.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/ContentHash.pm15
-rw-r--r--lib/PublicInbox/LeiDedupe.pm9
-rw-r--r--lib/PublicInbox/LeiXSearch.pm6
3 files changed, 19 insertions, 11 deletions
diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm
index 4dbe7b50..112b1ea6 100644
--- a/lib/PublicInbox/ContentHash.pm
+++ b/lib/PublicInbox/ContentHash.pm
@@ -8,9 +8,9 @@
 # See L<public-inbox-v2-format(5)> manpage for more details.
 package PublicInbox::ContentHash;
 use strict;
-use warnings;
-use base qw/Exporter/;
-our @EXPORT_OK = qw/content_hash content_digest/;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(content_hash content_digest git_sha);
 use PublicInbox::MID qw(mids references);
 use PublicInbox::MsgIter;
 
@@ -94,4 +94,13 @@ sub content_hash ($) {
         content_digest($_[0])->digest;
 }
 
+sub git_sha ($$) {
+        my ($n, $eml) = @_;
+        my $dig = Digest::SHA->new($n);
+        my $buf = $eml->as_string;
+        $dig->add('blob '.length($buf)."\0");
+        $dig->add($buf);
+        $dig;
+}
+
 1;
diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm
index 5fec9384..a62b3a7c 100644
--- a/lib/PublicInbox/LeiDedupe.pm
+++ b/lib/PublicInbox/LeiDedupe.pm
@@ -3,7 +3,7 @@
 package PublicInbox::LeiDedupe;
 use strict;
 use v5.10.1;
-use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::ContentHash qw(content_hash git_sha);
 use Digest::SHA ();
 
 # n.b. mutt sets most of these headers not sure about Bytes
@@ -18,12 +18,7 @@ sub _regen_oid ($) {
                 push @stash, [ $k, \@v ];
                 $eml->header_set($k); # restore below
         }
-        my $dig = Digest::SHA->new(1); # XXX SHA256 later
-        my $buf = $eml->as_string;
-        $dig->add('blob '.length($buf)."\0");
-        $dig->add($buf);
-        undef $buf;
-
+        my $dig = git_sha(1, $eml);
         for my $kv (@stash) { # restore stashed headers
                 my ($k, @v) = @$kv;
                 $eml->header_set($k, @v);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 17171a7f..b6aaf3e1 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -18,6 +18,7 @@ use PublicInbox::MID qw(mids);
 use PublicInbox::Smsg;
 use PublicInbox::Eml;
 use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR);
+use PublicInbox::ContentHash qw(git_sha);
 
 sub new {
         my ($class) = @_;
@@ -207,10 +208,13 @@ sub query_mset { # non-parallel for non-"--threads" users
 
 sub each_remote_eml { # callback for MboxReader->mboxrd
         my ($eml, $self, $lei, $each_smsg) = @_;
-        if ($self->{import_sto} && !$lei->{ale}->xoids_for($eml, 1)) {
+        my $xoids = $lei->{ale}->xoids_for($eml, 1);
+        if ($self->{import_sto} && !$xoids) {
                 $self->{import_sto}->ipc_do('add_eml', $eml);
         }
         my $smsg = bless {}, 'PublicInbox::Smsg';
+        $smsg->{blob} = $xoids ? (keys(%$xoids))[0]
+                                : git_sha(1, $eml)->hexdigest;
         $smsg->populate($eml);
         $smsg->parse_references($eml, mids($eml));
         $smsg->{$_} //= '' for qw(from to cc ds subject references mid);