From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, RP_MATCHES_RCVD,URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 013AA20A47 for ; Thu, 28 Apr 2016 02:00:38 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/2] import: document API for public consumption Date: Thu, 28 Apr 2016 02:00:32 +0000 Message-Id: <20160428020033.6048-2-e@80x24.org> In-Reply-To: <20160428020033.6048-1-e@80x24.org> References: <20160428020033.6048-1-e@80x24.org> List-Id: This is probably trivial enough to be final? --- Documentation/include.mk | 2 +- lib/PublicInbox/Git.pm | 54 +++++++++++++++++++- lib/PublicInbox/Import.pm | 127 ++++++++++++++++++++++++++++++++++++++++++++-- t/import.t | 2 +- 4 files changed, 178 insertions(+), 7 deletions(-) diff --git a/Documentation/include.mk b/Documentation/include.mk index 4669ac5..9427887 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -6,7 +6,7 @@ RSYNC = rsync RSYNC_DEST = public-inbox.org:/srv/public-inbox/ docs := README COPYING INSTALL TODO $(shell git ls-files 'Documentation/*.txt') INSTALL = install -POD2MAN = pod2man +POD2MAN ?= pod2man POD2MAN_OPTS = -v --stderr -d 1994-10-02 -c 'public-inbox user manual' pod2man = $(POD2MAN) $(POD2MAN_OPTS) POD2TEXT = pod2text diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index c406c03..d821182 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -3,7 +3,7 @@ # # Used to read files from a git repository without excessive forking. # Used in our web interfaces as well as our -nntpd server. -# This is based on code in Git.pm which is GPLv2, but modified to avoid +# This is based on code in Git.pm which is GPLv2+, but modified to avoid # dependence on environment variables for compatibility with mod_perl. # There are also API changes to simplify our usage and data set. package PublicInbox::Git; @@ -134,3 +134,55 @@ sub cleanup { sub DESTROY { cleanup(@_) } 1; +__END__ +=pod + +=head1 NAME + +PublicInbox::Git - git wrapper + +=head1 VERSION + +version 1.0 + +=head1 SYNOPSIS + + use PublicInbox::Git; + chomp(my $git_dir = `git rev-parse --git-dir`); + $git_dir or die "GIT_DIR= must be specified\n"; + my $git = PublicInbox::Git->new($git_dir); + +=head1 DESCRIPTION + +Unstable API outside of the L method. +It requires L to be installed. + +=head1 METHODS + +=cut + +=head2 new + + my $git = PublicInbox::Git->new($git_dir); + +Initialize a new PublicInbox::Git object for use with L +This is the only public API method we support. Everything else +in this module is subject to change. + +=head1 SEE ALSO + +L, L + +=head1 CONTACT + +All feedback welcome via plain-text mail to L + +The mail archives are hosted at L + +=head1 COPYRIGHT + +Copyright (C) 2016 all contributors L + +License: AGPL-3.0+ L + +=cut diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 8dd11d0..f9c05da 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -66,7 +66,7 @@ sub now2822 () { } # returns undef on non-existent -# (-1, msg) on mismatch +# ('MISMATCH', msg) on mismatch # (:MARK, msg) on success sub remove { my ($self, $mime) = @_; # mime = Email::MIME @@ -76,13 +76,13 @@ sub remove { my ($r, $w) = $self->gfi_start; my $tip = $self->{tip}; - return if $tip eq ''; + return ('MISSING', undef) if $tip eq ''; print $w "ls $tip $path\n" or wfail; local $/ = "\n"; my $check = <$r>; defined $check or die "EOF from fast-import / ls: $!"; - return if $check =~ /\Amissing /; + return ('MISSING', undef) if $check =~ /\Amissing /; $check =~ m!\A100644 blob ([a-f0-9]{40})\t!s or die "not blob: $check"; my $blob = $1; print $w "cat-blob $blob\n" or wfail; @@ -107,7 +107,7 @@ sub remove { my $cur = Email::MIME->new($buf); if ($cur->header('Subject') ne $mime->header('Subject') || $cur->body ne $mime->body) { - return (-1, $cur); + return ('MISMATCH', $cur); } my $ref = $self->{ref}; @@ -215,3 +215,122 @@ sub done { } 1; +__END__ +=pod + +=head1 NAME + +PublicInbox::Import - message importer for public-inbox + +=head1 VERSION + +version 1.0 + +=head1 SYNOPSYS + + use Email::MIME; + use PublicInbox::Git; + use PublicInbox::Import; + + chomp(my $git_dir = `git rev-parse --git-dir`); + $git_dir or die "GIT_DIR= must be specified\n"; + my $git = PublicInbox::Git->new($git_dir); + my @committer = ('inbox', 'inbox@example.org'); + my $im = PublicInbox::Import->new($git, @committer); + + # to add a message: + my $message = "From: \n". + "Subject: test message \n" . + "Date: Thu, 01 Jan 1970 00:00:00 +0000\n" . + "Message-ID: \n". + "\ntest message"; + my $parsed = Email::MIME->new($message); + my $ret = $im->add($parsed); + if (!defined $ret) { + warn "duplicate: ", + $parsed->header_obj->header_raw('Message-ID'), "\n"; + } else { + print "imported at mark $ret\n"; + } + $im->done; + + # to remove a message + my $junk = Email::MIME->new($message); + my ($mark, $orig) = $im->remove($junk); + if ($mark eq 'MISSING') { + print "not found\n"; + } elsif ($mark eq 'MISMATCH') { + print "Message exists but does not match\n\n", + $orig->as_string, "\n",; + } else { + print "removed at mark $mark\n\n", + $orig->as_string, "\n"; + } + $im->done; + +=head1 DESCRIPTION + +An importer and remover for public-inboxes which takes L +messages as input and stores them in a ssoma repository as +documented in L, +except it does not allow duplicate Message-IDs. + +It requires L and L to be installed. + +=head1 METHODS + +=cut + +=head2 new + + my $im = PublicInbox::Import->new($git, @committer); + +Initialize a new PublicInbox::Import object. + +=head2 add + + my $parsed = Email::MIME->new($message); + $im->add($parsed); + +Adds a message to to the git repository. This will acquire +C<$GIT_DIR/ssoma.lock> and start L if necessary. + +Messages added will not be visible to other processes until L +is called, but L may be called on them. + +=head2 remove + + my $junk = Email::MIME->new($message); + my ($code, $orig) = $im->remove($junk); + +Removes a message from the repository. On success, it returns +a ':'-prefixed numeric code representing the git-fast-import +mark and the original messages as an Email::MIME object. +If the message could not be found, the code is "MISSING" +and the original message is undef. If there is a mismatch where +the "Message-ID" is matched but the subject and body do not match, +the returned code is "MISMATCH" and the conflicting message +is returned as orig. + +=head2 done + +Finalizes the L and unlocks the repository. +Calling this is required to finalize changes to a repository. + +=head1 SEE ALSO + +L + +=head1 CONTACT + +All feedback welcome via plain-text mail to L + +The mail archives are hosted at L + +=head1 COPYRIGHT + +Copyright (C) 2016 all contributors L + +License: AGPL-3.0+ L + +=cut diff --git a/t/import.t b/t/import.t index 6918484..09c0036 100644 --- a/t/import.t +++ b/t/import.t @@ -57,7 +57,7 @@ is(undef, $im->remove($mime), 'remove is idempotent'); # mismatch on identical Message-ID $mime->header_set('Message-ID', ''); ($mark, $msg) = $im->remove($mime); -is($mark, -1, 'mark == -1 on mismatch'); +is($mark, 'MISMATCH', 'mark == MISMATCH on mismatch'); is($msg->header('Message-ID'), '', 'Message-ID matches'); isnt($msg->header('Subject'), $mime->header('Subject'), 'subject mismatch'); -- EW