about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-04-05 09:34:09 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-04-05 10:27:13 +0000
commite194adb858f2fdb220aa43f3ce398ff9e77cc1d3 (patch)
treefa0bd1e9c1552e3e166573f9b998d5d520980d1f /lib
parenta699c1ea84e9bb2ebd76a1bf1094b686bc520bda (diff)
downloadpublic-inbox-e194adb858f2fdb220aa43f3ce398ff9e77cc1d3.tar.gz
Sorting large msets is a waste when it comes to mboxes
since MUAs should thread and sort them as the user desires.

This forces us to rework each of the mbox download mechanisms
to be more independent of each other, but might make things
easier to reason about.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Mbox.pm139
-rw-r--r--lib/PublicInbox/Search.pm6
2 files changed, 83 insertions, 62 deletions
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index c66ccaa7..c5e1cb9c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -138,13 +138,24 @@ sub thread_mbox {
         my ($ctx, $srch, $sfx) = @_;
         eval { require IO::Compress::Gzip };
         return sub { need_gzip(@_) } if $@;
-        my $prev = 0;
+        my $mid = $ctx->{mid};
+        my $msgs = $srch->get_thread($mid, 0);
+        return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
+        my $prev = $msgs->[-1]->{num};
+        my $i = 0;
         my $cb = sub {
-                my $msgs = $srch->get_thread($ctx->{mid}, $prev);
-                $prev = $msgs->[-1]->{num} if scalar(@$msgs);
-                $msgs;
+                while (1) {
+                        if (my $smsg = $msgs->[$i++]) {
+                                return $smsg;
+                        }
+                        # refill result set
+                        $msgs = $srch->get_thread($mid, $prev);
+                        return unless @$msgs;
+                        $prev = $msgs->[-1]->{num};
+                        $i = 0;
+                }
         };
-        PublicInbox::MboxGz->response($ctx, $cb);
+        PublicInbox::MboxGz->response($ctx, $cb, $msgs->[0]->subject);
 }
 
 sub emit_range {
@@ -159,22 +170,55 @@ sub emit_range {
         mbox_all($ctx, $query);
 }
 
+sub mbox_all_ids {
+        my ($ctx) = @_;
+        my $prev = 0;
+        my $ids = $ctx->{-inbox}->mm->ids_after(\$prev) or return
+                [404, [qw(Content-Type text/plain)], ["No results found\n"]];
+        my $i = 0;
+        my $over = $ctx->{srch}->{over_ro};
+        my $cb = sub {
+                do {
+                        while ((my $num = $ids->[$i++])) {
+                                my $smsg = $over->get_art($num) or next;
+                                return $smsg;
+                        }
+                        $ids = $ctx->{-inbox}->mm->ids_after(\$prev);
+                        $i = 0;
+                } while (@$ids);
+                undef;
+        };
+        return PublicInbox::MboxGz->response($ctx, $cb, 'all');
+}
+
 sub mbox_all {
         my ($ctx, $query) = @_;
 
         eval { require IO::Compress::Gzip };
         return sub { need_gzip(@_) } if $@;
-        if ($query eq '') {
-                my $prev = 0;
-                my $cb = sub { $ctx->{-inbox}->mm->ids_after(\$prev) };
-                return PublicInbox::MboxGz->response($ctx, $cb, 'all');
-        }
-        my $opts = { offset => 0 };
+        return mbox_all_ids($ctx) if $query eq '';
+        my $opts = { mset => 2 };
         my $srch = $ctx->{srch};
+        my $mset = $srch->query($query, $opts);
+        $opts->{offset} = $mset->size or
+                        return [404, [qw(Content-Type text/plain)],
+                                ["No results found\n"]];
+        my $i = 0;
         my $cb = sub { # called by MboxGz->getline
-                my $msgs = $srch->query($query, $opts);
-                $opts->{offset} += scalar @$msgs;
-                $msgs;
+                while (1) {
+                        while (my $mi = (($mset->items)[$i++])) {
+                                my $doc = $mi->get_document;
+                                my $smsg = $srch->retry_reopen(sub {
+                                        PublicInbox::SearchMsg->load_doc($doc);
+                                }) or next;
+                                return $smsg;
+                        }
+                        # refill result set
+                        $mset = $srch->query($query, $opts);
+                        my $size = $mset->size or return;
+                        $opts->{offset} += $size;
+                        $i = 0;
+                }
         };
         PublicInbox::MboxGz->response($ctx, $cb, 'results-'.$query);
 }
@@ -206,7 +250,6 @@ sub new {
                 gz => IO::Compress::Gzip->new(\$buf, Time => 0),
                 cb => $cb,
                 ctx => $ctx,
-                msgs => [],
         }, $class;
 }
 
@@ -214,60 +257,34 @@ sub response {
         my ($class, $ctx, $cb, $fn) = @_;
         my $body = $class->new($ctx, $cb);
         # http://www.iana.org/assignments/media-types/application/gzip
-        $body->{hdr} = [ 'Content-Type', 'application/gzip' ];
-        $body->{fn} = $fn;
-        my $hdr = $body->getline; # fill in Content-Disposition filename
-        [ 200, $hdr, $body ];
-}
-
-sub set_filename ($$) {
-        my ($fn, $msg) = @_;
-        return to_filename($fn) if defined($fn);
-
-        PublicInbox::Mbox::subject_fn($msg);
+        my @h = qw(Content-Type application/gzip);
+        if ($fn) {
+                $fn = to_filename($fn);
+                push @h, 'Content-Disposition', "inline; filename=$fn.mbox.gz";
+        }
+        [ 200, \@h, $body ];
 }
 
 # called by Plack::Util::foreach or similar
 sub getline {
         my ($self) = @_;
         my $ctx = $self->{ctx} or return;
-        my $ibx = $ctx->{-inbox};
-        my $gz = $self->{gz};
-        my $msgs = $self->{msgs};
-        do {
-                # work on existing result set
-                while (defined(my $smsg = shift @$msgs)) {
-                        # ids_after may return integers
-                        ref($smsg) or
-                                $smsg = $ctx->{srch}->{over_ro}->get_art($smsg);
-
-                        my $msg = eval { $ibx->msg_by_smsg($smsg) } or next;
-                        $msg = Email::Simple->new($msg);
-                        $gz->write(PublicInbox::Mbox::msg_str($ctx, $msg,
-                                                                $smsg->mid));
-
-                        # use subject of first message as subject
-                        if (my $hdr = delete $self->{hdr}) {
-                                my $fn = set_filename($self->{fn}, $msg);
-                                push @$hdr, 'Content-Disposition',
-                                                "inline; filename=$fn.mbox.gz";
-                                return $hdr;
-                        }
-                        my $bref = $self->{buf};
-                        if (length($$bref) >= 8192) {
-                                my $ret = $$bref; # copy :<
-                                ${$self->{buf}} = '';
-                                return $ret;
-                        }
-
-                        # be fair to other clients on public-inbox-httpd:
-                        return '';
+        while (my $smsg = $self->{cb}->()) {
+                my $msg = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+                $msg = Email::Simple->new($msg);
+                $self->{gz}->write(PublicInbox::Mbox::msg_str($ctx, $msg,
+                                $smsg->{mid}));
+                my $bref = $self->{buf};
+                if (length($$bref) >= 8192) {
+                        my $ret = $$bref; # copy :<
+                        ${$self->{buf}} = '';
+                        return $ret;
                 }
 
-                # refill result set
-                $msgs = $self->{msgs} = $self->{cb}->();
-        } while (@$msgs);
-        $gz->close;
+                # be fair to other clients on public-inbox-httpd:
+                return '';
+        }
+        delete($self->{gz})->close;
         # signal that we're done and can return undef next call:
         delete $self->{ctx};
         ${delete $self->{buf}};
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 4e014f4e..9eb07284 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -219,7 +219,11 @@ sub _enquire_once {
         $enquire->set_query($query);
         $opts ||= {};
         my $desc = !$opts->{asc};
-        if ($opts->{relevance}) {
+        if (($opts->{mset} || 0) == 2) {
+                $enquire->set_docid_order(Search::Xapian::ENQ_ASCENDING());
+                $enquire->set_weighting_scheme(Search::Xapian::BoolWeight->new);
+                delete $self->{enquire};
+        } elsif ($opts->{relevance}) {
                 $enquire->set_sort_by_relevance_then_value(TS, $desc);
         } else {
                 $enquire->set_sort_by_value_then_relevance(TS, $desc);