about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-01-21 19:46:16 +0000
committerEric Wong <e@80x24.org>2021-01-22 16:18:01 -0400
commit0bc18178bc8cd08a28befbb6b3d2e7cccfc62589 (patch)
tree3c11d8c580bf1be71c8b13ce4f365dcaf9fd9125 /lib
parentaa680c4c254aaaa531d263ae7a85e5015d8a2f6a (diff)
downloadpublic-inbox-0bc18178bc8cd08a28befbb6b3d2e7cccfc62589.tar.gz
From_ lines are shown when mbox* variants are output to stdout,
making {oid} and {pct} information visible without risking being
propagated to other importer processes if they were in
lei-specific X-* headers.

Maildirs already had OIDs in the filename, now they gain Xapian
{pct} in case anybody cares.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/LeiOverview.pm9
-rw-r--r--lib/PublicInbox/LeiToMail.pm60
2 files changed, 38 insertions, 31 deletions
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 47d9eb31..7a4fa857 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -224,8 +224,9 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
                 my $git_dir = $git->{git_dir};
                 sub {
                         my ($smsg, $mitem) = @_;
-                        $l2m->wq_do('write_mail', \@io, $git_dir,
-                                        $smsg->{blob}, $lei_ipc, $smsg->{kw});
+                        $smsg->{pct} = get_pct($mitem) if $mitem;
+                        $l2m->wq_do('write_mail', \@io, $git_dir, $smsg,
+                                        $lei_ipc);
                 }
         } elsif ($l2m) {
                 my $wcb = $l2m->write_cb($lei);
@@ -234,8 +235,8 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
                 my $g2m = $l2m->can('git_to_mail');
                 sub {
                         my ($smsg, $mitem) = @_;
-                        $git->cat_async($smsg->{blob}, $g2m,
-                                        [ $wcb, $smsg->{kw} ]);
+                        $smsg->{pct} = get_pct($mitem) if $mitem;
+                        $git->cat_async($smsg->{blob}, $g2m, [ $wcb, $smsg ]);
                 };
         } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
                 my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 1be0b09c..3dcce9e7 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -32,14 +32,14 @@ my %kw2status = (
 );
 
 sub _mbox_hdr_buf ($$$) {
-        my ($eml, $type, $kw) = @_;
+        my ($eml, $type, $smsg) = @_;
         $eml->header_set($_) for (qw(Lines Bytes Content-Length));
 
         # Messages are always 'O' (non-\Recent in IMAP), it saves
         # MUAs the trouble of rewriting the mbox if no other
         # changes are made
         my %hdr = (Status => [ 'O' ]); # set Status, X-Status
-        for my $k (@$kw) {
+        for my $k (@{$smsg->{kw} // []}) {
                 if (my $ent = $kw2status{$k}) {
                         push @{$hdr{$ent->[0]}}, $ent->[1];
                 } else { # X-Label?
@@ -53,9 +53,11 @@ sub _mbox_hdr_buf ($$$) {
 
         # fixup old bug from import (pre-a0c07cba0e5d8b6a)
         $$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+        my $ident = $smsg->{blob} // 'lei';
+        if (defined(my $pct = $smsg->{pct})) { $ident .= "=$pct" }
 
         substr($$buf, 0, 0, # prepend From line
-                "From lei\@$type Thu Jan  1 00:00:00 1970$eml->{crlf}");
+                "From $ident\@$type Thu Jan  1 00:00:00 1970$eml->{crlf}");
         $buf;
 }
 
@@ -71,8 +73,8 @@ sub _print_full {
 }
 
 sub eml2mboxrd ($;$) {
-        my ($eml, $kw) = @_;
-        my $buf = _mbox_hdr_buf($eml, 'mboxrd', $kw);
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxrd', $smsg);
         if (my $bdy = delete $eml->{bdy}) {
                 $$bdy =~ s/^(>*From )/>$1/gm;
                 $$buf .= $eml->{crlf};
@@ -84,8 +86,8 @@ sub eml2mboxrd ($;$) {
 }
 
 sub eml2mboxo {
-        my ($eml, $kw) = @_;
-        my $buf = _mbox_hdr_buf($eml, 'mboxo', $kw);
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxo', $smsg);
         if (my $bdy = delete $eml->{bdy}) {
                 $$bdy =~ s/^From />From /gm;
                 $$buf .= $eml->{crlf};
@@ -108,8 +110,8 @@ sub _mboxcl_common ($$$) {
 
 # mboxcl still escapes "From " lines
 sub eml2mboxcl {
-        my ($eml, $kw) = @_;
-        my $buf = _mbox_hdr_buf($eml, 'mboxcl', $kw);
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxcl', $smsg);
         my $crlf = $eml->{crlf};
         if (my $bdy = delete $eml->{bdy}) {
                 $$bdy =~ s/^From />From /gm;
@@ -121,8 +123,8 @@ sub eml2mboxcl {
 
 # mboxcl2 has no "From " escaping
 sub eml2mboxcl2 {
-        my ($eml, $kw) = @_;
-        my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $kw);
+        my ($eml, $smsg) = @_;
+        my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $smsg);
         my $crlf = $eml->{crlf};
         if (my $bdy = delete $eml->{bdy}) {
                 _mboxcl_common($buf, $bdy, $crlf);
@@ -140,10 +142,11 @@ sub git_to_mail { # git->cat_async callback
                         warn "unexpected type=$type for $oid\n";
                 }
         }
-        if ($size > 0) {
-                my ($write_cb, $kw) = @$arg;
-                $write_cb->($bref, $oid, $kw);
+        my ($write_cb, $smsg) = @$arg;
+        if ($smsg->{blob} ne $oid) {
+                die "BUG: expected=$smsg->{blob} got=$oid";
         }
+        $write_cb->($bref, $smsg) if $size > 0;
 }
 
 sub reap_compress { # dwaitpid callback
@@ -247,11 +250,11 @@ sub _mbox_write_cb ($$) {
         my $dedupe = $lei->{dedupe};
         $dedupe->prepare_dedupe;
         sub { # for git_to_mail
-                my ($buf, $oid, $kw) = @_;
+                my ($buf, $smsg) = @_;
                 return unless $out;
                 my $eml = PublicInbox::Eml->new($buf);
-                if (!$dedupe->is_dup($eml, $oid)) {
-                        $buf = $eml2mbox->($eml, $kw);
+                if (!$dedupe->is_dup($eml, $smsg->{blob})) {
+                        $buf = $eml2mbox->($eml, $smsg);
                         my $lk = $ovv->lock_for_scope;
                         eval { $write->($out, $buf) };
                         if ($@) {
@@ -283,12 +286,15 @@ sub _augment_file { # _maildir_each_file cb
 sub _unlink { unlink($_[0]) }
 
 sub _buf2maildir {
-        my ($dst, $buf, $oid, $kw) = @_;
+        my ($dst, $buf, $smsg) = @_;
+        my $kw = $smsg->{kw} // [];
         my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw));
         my $rand = ''; # chosen by die roll :P
         my ($tmp, $fh, $final);
+        my $common = $smsg->{blob};
+        if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" }
         do {
-                $tmp = $dst.'tmp/'.$rand."oid=$oid";
+                $tmp = $dst.'tmp/'.$rand.$common;
         } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY) &&
                 $! == EEXIST && ($rand = int(rand 0x7fffffff).','));
         if (print $fh $$buf and close($fh)) {
@@ -299,14 +305,14 @@ sub _buf2maildir {
                 $dst .= 'cur/';
                 $rand = '';
                 do {
-                        $final = $dst.$rand."oid=$oid:2,$sfx";
+                        $final = $dst.$rand.$common.':2,'.$sfx;
                 } while (!link($tmp, $final) && $! == EEXIST &&
                         ($rand = int(rand 0x7fffffff).','));
                 unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
         } else {
                 my $err = $!;
                 unlink($tmp);
-                die "Error writing $oid to $dst: $err";
+                die "Error writing $smsg->{blob} to $dst: $err";
         }
 }
 
@@ -316,12 +322,12 @@ sub _maildir_write_cb ($$) {
         $dedupe->prepare_dedupe;
         my $dst = $lei->{ovv}->{dst};
         sub { # for git_to_mail
-                my ($buf, $oid, $kw) = @_;
-                return _buf2maildir($dst, $buf, $oid, $kw) if !$dedupe;
+                my ($buf, $smsg) = @_;
+                return _buf2maildir($dst, $buf, $smsg) if !$dedupe;
                 my $eml = PublicInbox::Eml->new($$buf); # copy buf
-                return if $dedupe->is_dup($eml, $oid);
+                return if $dedupe->is_dup($eml, $smsg->{blob});
                 undef $eml;
-                _buf2maildir($dst, $buf, $oid, $kw);
+                _buf2maildir($dst, $buf, $smsg);
         }
 }
 
@@ -447,7 +453,7 @@ sub post_augment { # fast (spawn compressor or mkdir), runs in main daemon
 }
 
 sub write_mail { # via ->wq_do
-        my ($self, $git_dir, $oid, $lei, $kw) = @_;
+        my ($self, $git_dir, $smsg, $lei) = @_;
         my $not_done = delete $self->{4}; # write end of {each_smsg_done}
         my $wcb = $self->{wcb} //= do { # first message
                 my %sig = $lei->atfork_child_wq($self);
@@ -456,7 +462,7 @@ sub write_mail { # via ->wq_do
                 $self->write_cb($lei);
         };
         my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir);
-        $git->cat_async($oid, \&git_to_mail, [ $wcb, $kw, $not_done ]);
+        $git->cat_async($smsg->{blob}, \&git_to_mail, [$wcb, $smsg, $not_done]);
 }
 
 sub ipc_atfork_prepare {