about summary refs log tree commit homepage
path: root/lib/PublicInbox/NNTP.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/NNTP.pm')
-rw-r--r--lib/PublicInbox/NNTP.pm560
1 files changed, 560 insertions, 0 deletions
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
new file mode 100644
index 00000000..f1aaed48
--- /dev/null
+++ b/lib/PublicInbox/NNTP.pm
@@ -0,0 +1,560 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+package PublicInbox::NNTP;
+use strict;
+use warnings;
+use base qw(Danga::Socket);
+use fields qw(nntpd article ng);
+use PublicInbox::Msgmap;
+use PublicInbox::GitCatFile;
+use PublicInbox::MID qw(mid2path);
+use Email::Simple;
+use Data::Dumper qw(Dumper);
+use POSIX qw(strftime);
+use constant {
+        r501 => '501 command syntax error',
+};
+
+my @OVERVIEW = qw(Subject From Date Message-ID References Bytes Lines);
+my %OVERVIEW = map { $_ => 1 } @OVERVIEW;
+
+# disable commands with easy DoS potential:
+# LISTGROUP could get pretty bad, too...
+my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr);
+
+sub new {
+        my ($class, $sock, $nntpd) = @_;
+        my $self = fields::new($class);
+        $self->SUPER::new($sock);
+        $self->{nntpd} = $nntpd;
+        res($self, '201 server ready - post via email');
+        $self->watch_read(1);
+        $self;
+}
+
+# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
+sub process_line {
+        my ($self, $l) = @_;
+        my ($req, @args) = split(/\s+/, $l);
+        $req = lc($req);
+        $req = eval {
+                no strict 'refs';
+                $req = $DISABLED{$req} ? undef : *{'cmd_'.$req}{CODE};
+        };
+        return res($self, '500 command not recognized') unless $req;
+
+        my $res = eval { $req->($self, @args) };
+        my $err = $@;
+        if ($err && !$self->{closed}) {
+                chomp($l = Dumper(\$l));
+                warning('error from: ', $l, ' ', $err);
+                $res = '503 program fault - command not performed';
+        }
+        return 0 unless defined $res;
+        res($self, $res);
+}
+
+sub cmd_mode {
+        my ($self, $arg) = @_;
+        return r501 unless defined $arg;
+        $arg = uc $arg;
+        return r501 unless $arg eq 'READER';
+        '200 reader status acknowledged';
+}
+
+sub cmd_slave {
+        my ($self, @x) = @_;
+        return r501 if @x;
+        '202 slave status noted';
+}
+
+sub cmd_xgtitle {
+        my ($self, $wildmat) = @_;
+        more($self, '282 list of groups and descriptions follows');
+        list_newsgroups($self, $wildmat);
+        '.'
+}
+
+sub list_overview_fmt {
+        my ($self) = @_;
+        more($self, $_ . ':') foreach @OVERVIEW;
+}
+
+sub list_active {
+        my ($self, $wildmat) = @_;
+        wildmat2re($wildmat);
+        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
+                $ng->{name} =~ $wildmat or next;
+                group_line($self, $ng);
+        }
+}
+
+sub list_active_times {
+        my ($self, $wildmat) = @_;
+        wildmat2re($wildmat);
+        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
+                $ng->{name} =~ $wildmat or next;
+                my $c = eval { $ng->mm->created_at } || time;
+                more($self, "$ng->{name} $c $ng->{address}");
+        }
+}
+
+sub list_newsgroups {
+        my ($self, $wildmat) = @_;
+        wildmat2re($wildmat);
+        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
+                $ng->{name} =~ $wildmat or next;
+                my $d = $ng->description;
+                more($self, "$ng->{name} $d");
+        }
+}
+
+# LIST SUBSCRIPTIONS not supported
+sub cmd_list {
+        my ($self, $arg, $wildmat, @x) = @_;
+        if (defined $arg) {
+                $arg = lc $arg;
+                $arg =~ tr/./_/;
+                $arg = "list_$arg";
+                return '503 function not performed' if $DISABLED{$arg};
+                $arg = eval {
+                        no strict 'refs';
+                        *{$arg}{CODE};
+                };
+                return r501 unless $arg;
+                more($self, '215 information follows');
+                $arg->($self, $wildmat, @x);
+        } else {
+                more($self, '215 list of newsgroups follows');
+                foreach my $ng (values %{$self->{nntpd}->{groups}}) {
+                        group_line($self, $ng);
+                }
+        }
+        '.'
+}
+
+sub cmd_listgroup {
+        my ($self, $group) = @_;
+        if (defined $group) {
+                my $res = cmd_group($self, $group);
+                return $res if ($res !~ /\A211 /);
+                more($self, $res);
+        }
+
+        my $ng = $self->{ng} or return '412 no newsgroup selected';
+        # Ugh this can be silly expensive for big groups
+        $ng->mm->each_id_batch(sub {
+                my ($ary) = @_;
+                more($self, join("\r\n", @$ary));
+        });
+        '.'
+}
+
+sub parse_time {
+        my ($date, $time, $gmt) = @_;
+        use Time::Local qw();
+        my ($YY, $MM, $DD) = unpack('A2A2A2', $date);
+        my ($hh, $mm, $ss) = unpack('A2A2A2', $time);
+        if (defined $gmt) {
+                $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt\n";
+                $gmt = 1;
+        }
+        my @now = $gmt ? gmtime : localtime;
+        if ($YY > strftime('%y', @now)) {
+                my $cur_year = $now[5] + 1900;
+                $YY += int($cur_year / 1000) * 1000 - 100;
+        }
+
+        if ($gmt) {
+                Time::Local::timegm($ss, $mm, $hh, $DD, $MM - 1, $YY);
+        } else {
+                Time::Local::timelocal($ss, $mm, $hh, $DD, $MM - 1, $YY);
+        }
+}
+
+sub group_line {
+        my ($self, $ng) = @_;
+        my ($min, $max) = $ng->mm->minmax;
+        more($self, "$ng->{name} $max $min n") if defined $min && defined $max;
+}
+
+sub cmd_newgroups {
+        my ($self, $date, $time, $gmt, $dists) = @_;
+        my $ts = eval { parse_time($date, $time, $gmt) };
+        return r501 if $@;
+
+        # TODO dists
+        more($self, '231 list of new newsgroups follows');
+        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
+                my $c = eval { $ng->mm->created_at } || 0;
+                next unless $c > $ts;
+                group_line($self, $ng);
+        }
+        '.'
+}
+
+sub wildmat2re {
+        return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*');
+        my %keep;
+        my $salt = rand;
+        use Digest::SHA qw(sha1_hex);
+        my $tmp = $_[0];
+
+        $tmp =~ s#(?<!\\)\[(.+)(?<!\\)\]#
+                my $orig = $1;
+                my $key = sha1_hex($orig . $salt);
+                $orig =~ s/([^\w\-])+/\Q$1/g;
+                $keep{$key} = $orig;
+                $key
+                #gex;
+        my %map = ('*' => '.*', '?' => '.' );
+        $tmp =~ s#(?<!\\)([^\w\\])#$map{$1} || "\Q$1"#ge;
+        if (scalar %keep) {
+                $tmp =~ s#([a-f0-9]{40})#
+                        my $orig = $keep{$1};
+                        defined $orig ? $orig : $1;
+                        #ge;
+        }
+        $_[0] = qr/\A$tmp\z/;
+}
+
+sub ngpat2re {
+        return $_[0] = qr/\A\z/ unless defined $_[0];
+        my %map = ('*' => '.*', ',' => '|');
+        $_[0] =~ s!(.)!$map{$1} || "\Q$1"!ge;
+        $_[0] = qr/\A(?:$_[0])\z/;
+}
+
+sub cmd_newnews {
+        my ($self, $newsgroups, $date, $time, $gmt, $dists) = @_;
+        my $ts = eval { parse_time($date, $time, $gmt) };
+        return r501 if $@;
+        more($self, '230 list of new articles by message-id follows');
+        my ($keep, $skip) = split('!', $newsgroups, 2);
+        ngpat2re($keep);
+        ngpat2re($skip);
+        $ts .= '..';
+
+        my $opts = { asc => 1, limit => 1000 };
+        foreach my $ng (values %{$self->{nntpd}->{groups}}) {
+                $ng->{name} =~ $keep or next;
+                $ng->{name} =~ $skip and next;
+                my $srch = $ng->search or next;
+                $opts->{offset} = 0;
+
+                while (1) {
+                        my $res = $srch->query($ts, $opts);
+                        my $msgs = $res->{msgs};
+                        my $nr = scalar @$msgs or last;
+                        more($self, '<' .
+                                join(">\r\n<", map { $_->mid } @$msgs ).
+                                '>');
+                        $opts->{offset} += $nr;
+                }
+        }
+        '.';
+}
+
+sub cmd_group {
+        my ($self, $group) = @_;
+        my $no_such = '411 no such news group';
+        my $ng = $self->{nntpd}->{groups}->{$group} or return $no_such;
+
+        $self->{ng} = $ng;
+        my ($min, $max) = $ng->mm->minmax;
+        $min ||= 0;
+        $max ||= 0;
+        $self->{article} = $min;
+        my $est_size = $max - $min;
+        "211 $est_size $min $max $group";
+}
+
+sub article_adj {
+        my ($self, $off) = @_;
+        my $ng = $self->{ng} or return '412 no newsgroup selected';
+
+        my $n = $self->{article};
+        defined $n or return '420 no current article has been selected';
+
+        $n += $off;
+        my $mid = $ng->mm->mid_for($n);
+        unless ($mid) {
+                $n = $off > 0 ? 'next' : 'previous';
+                return "421 no $n article in this group";
+        }
+        $self->{article} = $n;
+        "223 $n <$mid> article retrieved - request text separately";
+}
+
+sub cmd_next { article_adj($_[0], 1) }
+sub cmd_last { article_adj($_[0], -1) }
+
+# We want to encourage using email and CC-ing everybody involved to avoid
+# the single-point-of-failure a single server provides.
+sub cmd_post {
+        my ($self) = @_;
+        my $ng = $self->{ng};
+        $ng ? "440 mailto:$ng->{address} to post" : '440 posting not allowed'
+}
+
+sub cmd_quit {
+        my ($self) = @_;
+        res($self, '205 closing connection - goodbye!');
+        $self->close;
+        undef;
+}
+
+sub art_lookup {
+        my ($self, $art, $set_headers) = @_;
+        my $ng = $self->{ng} or return '412 no newsgroup has been selected';
+        my ($n, $mid);
+        my $err;
+        if (defined $art) {
+                if ($art =~ /\A\d+\z/o) {
+                        $err = '423 no such article number in this group';
+                        $n = int($art);
+                        goto find_mid;
+                } elsif ($art =~ /\A<([^>]+)>\z/) {
+                        $err = '430 no such article found';
+                        $mid = $1;
+                        $n = $ng->mm->num_for($mid);
+                        defined $mid or return $err;
+                } else {
+                        return r501;
+                }
+        } else {
+                $err = '420 no current article has been selected';
+                $n = $self->{article};
+                defined $n or return $err;
+find_mid:
+                $mid = $ng->mm->mid_for($n);
+                defined $mid or return $err;
+        }
+
+        my $o = 'HEAD:' . mid2path($mid);
+        my $s = eval { Email::Simple->new($ng->gcf->cat_file($o)) };
+        return $err unless $s;
+        if ($set_headers) {
+                $s->header_set('Newsgroups', $ng->{name});
+                $s->header_set('Lines', $s->body =~ tr!\n!\n!);
+                $s->header_set('Xref', "$ng->{domain} $ng->{name}:$n");
+
+                # must be last
+                if ($set_headers == 2) {
+                        $s->header_set('Bytes', bytes::length($s->as_string));
+                        $s->body_set('');
+                }
+        }
+        [ $n, $mid, $s ];
+}
+
+sub simple_body_write {
+        my ($self, $s) = @_;
+        my $body = $s->body;
+        $s->body_set('');
+        $body =~ s/^\./../smg;
+        do_more($self, $body);
+        '.'
+}
+
+sub header_str {
+        my ($s) = @_;
+        my $h = $s->header_obj;
+        $h->header_set('Bytes');
+        $h->as_string
+}
+
+sub cmd_article {
+        my ($self, $art) = @_;
+        my $r = $self->art_lookup($art, 1);
+        return $r unless ref $r;
+        my ($n, $mid, $s) = @$r;
+        more($self, "220 $n <$mid> article retrieved - head and body follow");
+        do_more($self, header_str($s));
+        do_more($self, "\r\n");
+        simple_body_write($self, $s);
+}
+
+sub cmd_head {
+        my ($self, $art) = @_;
+        my $r = $self->art_lookup($art, 2);
+        return $r unless ref $r;
+        my ($n, $mid, $s) = @$r;
+        more($self, "221 $n <$mid> article retrieved - head follows");
+        do_more($self, header_str($s));
+        '.'
+}
+
+sub cmd_body {
+        my ($self, $art) = @_;
+        my $r = $self->art_lookup($art, 0);
+        return $r unless ref $r;
+        my ($n, $mid, $s) = @$r;
+        more($self, "222 $n <$mid> article retrieved - body follows");
+        simple_body_write($self, $s);
+}
+
+sub cmd_stat {
+        my ($self, $art) = @_;
+        my $r = $self->art_lookup($art, 0);
+        return $r unless ref $r;
+        my ($n, $mid, undef) = @$r;
+        "223 $n <$mid> article retrieved - request text separately";
+}
+
+sub cmd_ihave { '435 article not wanted - do not send it' }
+
+sub cmd_date { '111 '.strftime('%Y%m%d%H%M%S', gmtime(time)) }
+
+sub cmd_help {
+        my ($self) = @_;
+        more($self, '100 help text follows');
+        '.'
+}
+
+sub get_range {
+        my ($self, $range) = @_;
+        my $ng = $self->{ng} or return '412 no news group has been selected';
+        defined $range or return '420 No article(s) selected';
+        my ($beg, $end);
+        my ($min, $max) = $ng->mm->minmax;
+        if ($range =~ /\A(\d+)\z/) {
+                $beg = $end = $1;
+        } elsif ($range =~ /\A(\d+)-\z/) {
+                ($beg, $end) = ($1, $max);
+        } elsif ($range =~ /\A(\d+)-(\d+)\z/) {
+                ($beg, $end) = ($1, $2);
+        } else {
+                return r501;
+        }
+        $beg = $min if ($beg < $min);
+        $end = $max if ($end > $max);
+        return '420 No article(s) selected' if ($beg > $end);
+        [ $beg, $end ];
+}
+
+sub xhdr {
+        my ($r, $header) = @_;
+        $r = $r->[2]->header_obj->header($header);
+        defined $r or return;
+        $r =~ s/[\r\n\t]+/ /sg;
+        $r;
+}
+
+sub cmd_xhdr {
+        my ($self, $header, $range) = @_;
+        defined $self->{ng} or return '412 no news group currently selected';
+        unless (defined $range) {
+                defined($range = $self->{article}) or
+                        return '420 no current article has been selected';
+        }
+        if ($range =~ /\A<(.+)>\z/) { # Message-ID
+                my $r = $self->art_lookup($range, 2);
+                return $r unless ref $r;
+                more($self, '221 Header follows');
+                if (defined($r = xhdr($r, $header))) {
+                        more($self, "<$range> $r");
+                }
+        } else { # numeric range
+                my $r = get_range($self, $range);
+                return $r unless ref $r;
+                my ($beg, $end) = @$r;
+                more($self, '221 Header follows');
+                foreach my $i ($beg..$end) {
+                        $r = $self->art_lookup($i, 2);
+                        next unless ref $r;
+                        defined($r = xhdr($r, $header)) or next;
+                        more($self, "$i $r");
+                }
+        }
+        '.';
+}
+
+sub cmd_xover {
+        my ($self, $range) = @_;
+        my $r = get_range($self, $range);
+        return $r unless ref $r;
+        my ($beg, $end) = @$r;
+        more($self, "224 Overview information follows for $beg to $end");
+        foreach my $i ($beg..$end) {
+                my $r = $self->art_lookup($i, 2);
+                next unless ref $r;
+                more($self, join("\t", $r->[0],
+                                map {
+                                        my $h = xhdr($r, $_);
+                                        defined $h ? $h : '';
+                                } @OVERVIEW ));
+        }
+        '.';
+}
+
+sub res {
+        my ($self, $line) = @_;
+        do_write($self, $line . "\r\n");
+}
+
+sub more {
+        my ($self, $line) = @_;
+        do_more($self, $line . "\r\n");
+}
+
+sub do_write {
+        my ($self, $data) = @_;
+        my $done = $self->write($data);
+        die if $self->{closed};
+
+        # Do not watch for readability if we have data in the queue,
+        # instead re-enable watching for readability when we can
+        $self->watch_read(0) unless $done;
+
+        $done;
+}
+
+use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0;
+
+sub do_more {
+        my ($self, $data) = @_;
+        if (MSG_MORE && !scalar @{$self->{write_buf}}) {
+                my $n = send($self->{sock}, $data, MSG_MORE);
+                if (defined $n) {
+                        my $dlen = bytes::length($data);
+                        return 1 if $n == $dlen; # all done!
+                        $data = bytes::substr($data, $n, $dlen - $n);
+                }
+        }
+        $self->do_write($data);
+}
+
+# callbacks for by Danga::Socket
+
+sub event_hup { $_[0]->close }
+sub event_err { $_[0]->close }
+
+sub event_write {
+        my ($self) = @_;
+        # only continue watching for readability when we are done writing:
+        $self->write(undef) == 1 and $self->watch_read(1);
+}
+
+sub event_read {
+        my ($self) = @_;
+        use constant LINE_MAX => 512; # RFC 977 section 2.3
+        use Time::HiRes qw(gettimeofday tv_interval);
+        my $r = 1;
+        my $buf = $self->read(LINE_MAX) or return $self->close;
+        while ($r > 0 && $$buf =~ s/\A([^\r\n]+)\r?\n//) {
+                my $line = $1;
+                my $t0 = [ gettimeofday ];
+                $r = eval { $self->process_line($line) };
+                printf(STDERR "$line %0.6f\n",
+                        tv_interval($t0, [gettimeofday]));
+        }
+        return $self->close if $r < 0;
+        my $len = bytes::length($$buf);
+        return $self->close if ($len >= LINE_MAX);
+        $self->push_back_read($buf) if ($len);
+}
+
+sub warning { print STDERR @_, "\n" }
+
+1;