about summary refs log tree commit homepage
path: root/lib/PublicInbox/OverIdx.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/OverIdx.pm')
-rw-r--r--lib/PublicInbox/OverIdx.pm7
1 files changed, 4 insertions, 3 deletions
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 2e3d4534..0c8a4d9e 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -243,12 +243,13 @@ sub link_refs {
         $tid;
 }
 
-# normalize subjects so they are suitable as pathnames for URLs
-# XXX: consider for removal
+# normalize subjects somewhat, they used to be ASCII-only but now
+# we use \w for UTF-8 support.  We may still drop it entirely and
+# rely on Xapian for subject matches...
 sub subject_path ($) {
         my ($subj) = @_;
         $subj = subject_normalized($subj);
-        $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
+        $subj =~ s![^\w\.~/\-]+!_!g;
         lc($subj);
 }