user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] lei inspect: learn "num:" and "docid:" prefixes
Date: Thu, 17 Jun 2021 22:00:45 +0000	[thread overview]
Message-ID: <20210617220047.11225-2-e@80x24.org> (raw)
In-Reply-To: <20210617220047.11225-1-e@80x24.org>

"num:" is useful for inspecting Inbox-ish directories, while
"docid:" can be used for any Xapian DB (not just stuff managed
by our code).
---
 lib/PublicInbox/LeiInspect.pm | 73 +++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm
index eb2634b4..30714764 100644
--- a/lib/PublicInbox/LeiInspect.pm
+++ b/lib/PublicInbox/LeiInspect.pm
@@ -57,6 +57,75 @@ sub inspect_sync_folder ($$) {
 	$ent
 }
 
+sub inspect_docid ($$;$) {
+	my ($lei, $docid, $ent) = @_;
+	require PublicInbox::Search;
+	$ent //= {};
+	my $xdb;
+	if ($xdb = delete $ent->{xdb}) { # from inspect_num
+	} elsif (defined(my $dir = $lei->{opt}->{dir})) {
+		no warnings 'once';
+		$xdb = $PublicInbox::Search::X{Database}->new($dir);
+	} else {
+		$xdb = $lei->{lse}->xdb;
+	}
+	$xdb or return $lei->fail('no Xapian DB');
+	my $doc = $xdb->get_document($docid); # raises
+	my $data = $doc->get_data;
+	$ent->{docid} = $docid;
+	$ent->{data_length} = length($data);
+	$ent->{description} => $doc->get_description;
+	$ent->{$_} = $doc->$_ for (qw(termlist_count values_count));
+	my $cur = $doc->termlist_begin;
+	my $end = $doc->termlist_end;
+	for (; $cur != $end; $cur++) {
+		my $tn = $cur->get_termname;
+		$tn =~ s/\A([A-Z]+)// or warn "$tn no prefix! (???)";
+		my $term = ($1 // '');
+		push @{$ent->{terms}->{$term}}, $tn;
+	}
+	@$_ = sort(@$_) for values %{$ent->{terms} // {}};
+	$cur = $doc->values_begin;
+	$end = $doc->values_end;
+	for (; $cur != $end; $cur++) {
+		my $n = $cur->get_valueno;
+		my $v = $cur->get_value;
+		my $iv = PublicInbox::Search::sortable_unserialise($v);
+		$v = $iv + 0 if defined $iv;
+		# not using ->[$n] since we may have large gaps in $n
+		$ent->{'values'}->{$n} = $v;
+	}
+	$ent;
+}
+
+sub inspect_num ($$) {
+	my ($lei, $num) = @_;
+	my ($docid, $ibx);
+	my $ent = { num => $num };
+	if (defined(my $dir = $lei->{opt}->{dir})) {
+		my $num2docid = $lei->{lse}->can('num2docid');
+		if (-f "$dir/ei.lock") {
+			require PublicInbox::ExtSearch;
+			$ibx = PublicInbox::ExtSearch->new($dir);
+		} elsif (-f "$dir/inbox.lock" || -d "$dir/public-inbox") {
+			require PublicInbox::Inbox; # v2, v1
+			$ibx = bless { inboxdir => $dir }, 'PublicInbox::Inbox';
+		}
+		$ent->{xdb} = $ibx->xdb //
+			return $lei->fail("no Xapian DB for $dir");
+		$docid = $num2docid->($ibx, $num);
+	} else {
+		$ibx = $lei->{lse};
+		$lei->{lse}->xdb; # set {nshard} for num2docid
+		$docid = $lei->{lse}->num2docid($num);
+	}
+	if ($ibx && $ibx->over) {
+		my $smsg = $ibx->over->get_art($num);
+		$ent->{smsg} = { %$smsg } if $smsg;
+	}
+	inspect_docid($lei, $docid, $ent);
+}
+
 sub inspect1 ($$$) {
 	my ($lei, $item, $more) = @_;
 	my $ent;
@@ -72,6 +141,10 @@ sub inspect1 ($$$) {
 		}
 	} elsif ($item =~ m!\A(?:maildir|mh):!i || -d $item) {
 		$ent = inspect_sync_folder($lei, $item);
+	} elsif ($item =~ m!\Adocid:([0-9]+)\z!) {
+		$ent = inspect_docid($lei, $1 + 0);
+	} elsif ($item =~ m!\Anum:([0-9]+)\z!) {
+		$ent = inspect_num($lei, $1 + 0);
 	} else { # TODO: more things
 		return $lei->fail("$item not understood");
 	}

  reply	other threads:[~2021-06-17 22:00 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-17 22:00 [PATCH 0/3] lei: internal bug fixups Eric Wong
2021-06-17 22:00 ` Eric Wong [this message]
2021-06-17 22:00 ` [PATCH 2/3] lei_input: prefix bare Maildir paths w/ "maildir:" Eric Wong
2021-06-17 22:00 ` [PATCH 3/3] lei/store: cull redundant docids based on blob OID Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210617220047.11225-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).