From dc03cabb5d167618797e9e8a6ec615bda7b0638b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 19 Sep 2020 09:37:11 +0000 Subject: gcf2: transparently retry on missing OID Since we only get OIDs from trusted local data sources (over.sqlite3), we can safely retry within the -gcf2 process without worry about clients spamming us with requests for invalid OIDs and triggering reopens. --- script/public-inbox-gcf2 | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'script') diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2 index 51811698..d2d2ac8b 100755 --- a/script/public-inbox-gcf2 +++ b/script/public-inbox-gcf2 @@ -3,12 +3,33 @@ # License: AGPL-3.0+ eval { require PublicInbox::Gcf2 }; die "libgit2 development package or Inline::C missing for $0: $@\n" if $@; +my @dirs; # may get big (30K-100K) my $gcf2 = PublicInbox::Gcf2::new(); +use IO::Handle; # autoflush +STDERR->autoflush(1); +STDOUT->autoflush(1); + while () { chomp; if (m!\A/!) { # +/path/to/git-dir + push @dirs, $_; $gcf2->add_alternate("$_/objects"); - } else { - $gcf2->cat_oid(1, $_); + } elsif (!$gcf2->cat_oid(1, $_)) { + # retry once if missing. We only get unabbreviated OIDs + # from SQLite or Xapian DBs, here, so malicious clients + # can't trigger excessive retries: + my $oid = $_; + warn "I: $$ $oid missing, retrying...\n"; + + # clients may need to wait a bit for this: + $gcf2 = PublicInbox::Gcf2::new(); + $gcf2->add_alternate("$_/objects") for @dirs; + + if ($gcf2->cat_oid(1, $oid)) { + warn "I: $$ $oid found after retry\n"; + } else { + warn "W: $$ $oid missing after retry\n"; + print "$oid missing\n"; # mimic git-cat-file + } } } -- cgit v1.2.3-24-ge0c7