From 95cb3e48fc5c4e847cdc111c2c8c9f0b70bdea56 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 23 Nov 2020 23:32:29 +0000 Subject: extsearchidx: deduplicate alternates based on st_dev + st_ino This allows us to filter out duplicate alternates entries in case there's symlinks or bind mounts in play, as I (and perhaps some other users) tend to use symlinks and/or bind mounts heavily. --- lib/PublicInbox/ExtSearchIdx.pm | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 2cdc31cb..7ab0c4af 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -396,18 +396,28 @@ sub idx_init { # similar to V2Writable my $info_dir = "$ALL/objects/info"; my $alt = "$info_dir/alternates"; my $mode = 0644; - my (%old, @old, %new, @new); + my (@old, @new, %seen); # seen: st_dev + st_ino if (-e $alt) { open(my $fh, '<', $alt) or die "open $alt: $!"; $mode = (stat($fh))[2] & 07777; - while (<$fh>) { - push @old, $_ if !$old{$_}++; + while (my $line = <$fh>) { + chomp(my $d = $line); + if (my @st = stat($d)) { + next if $seen{"$st[0]\0$st[1]"}++; + } else { + warn "W: stat($d) failed (from $alt): $!\n"; + } + push @old, $line; } } for my $ibx (@{$self->{ibx_list}}) { my $line = $ibx->git->{git_dir} . "/objects\n"; - next if $old{$line}; - $new{$line} = 1; + chomp(my $d = $line); + if (my @st = stat($d)) { + next if $seen{"$st[0]\0$st[1]"}++; + } else { + warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n"; + } push @new, $line; } if (scalar @new) { -- cgit v1.2.3-24-ge0c7