bug-gnulib@gnu.org mirror (unofficial)
 help / color / mirror / Atom feed
* [New module] Persistent Hash Array Mapped Tries (HAMTs)
@ 2020-10-09 21:07 Marc Nieper-Wißkirchen
  2020-10-10 14:35 ` Bruno Haible
  2020-10-10 14:54 ` Bruno Haible
  0 siblings, 2 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-09 21:07 UTC (permalink / raw)
  To: bug-gnulib

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

Hi,

after I have contributed two comparably trivial modules to Gnulib, I
would like to contribute a less trivial module this time. It
implements a persistent version of Phil Bagwell's HAMTs, which has
been popularized by Clojure. HAMTs can be used when a persistent
(functional/pure) version of a data structure akin to hash tables is
needed. For example, the dynamic environment of a (possibly
multi-threaded) Lisp or Scheme can be modeled with persistent HAMTs.

Please take a look at the attached patch.

Thank you,

Marc

[-- Attachment #2: 0001-hamt-New-module.patch --]
[-- Type: text/x-patch, Size: 32489 bytes --]

From 39a1ac1f78c8d00b1dfad4f260318a6fb5cf5584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Nieper-Wi=C3=9Fkirchen?= <marc@nieper-wisskirchen.de>
Date: Wed, 7 Oct 2020 09:53:48 +0200
Subject: [PATCH] hamt: New module.

This module provides (persistent) hash array mapped tries.

* MODULES.html.sh: Add hamt.
* lib/hamt.c, lib/hamt.h, modules/hamt, modules/hamt-tests,
tests/test-hamt.c: New files.
---
 MODULES.html.sh    |   1 +
 lib/hamt.c         | 812 +++++++++++++++++++++++++++++++++++++++++++++
 lib/hamt.h         |  97 ++++++
 modules/hamt       |  29 ++
 modules/hamt-tests |  11 +
 tests/test-hamt.c  | 148 +++++++++
 6 files changed, 1098 insertions(+)
 create mode 100644 lib/hamt.c
 create mode 100644 lib/hamt.h
 create mode 100644 modules/hamt
 create mode 100644 modules/hamt-tests
 create mode 100644 tests/test-hamt.c

diff --git a/MODULES.html.sh b/MODULES.html.sh
index a8a629e29..b48ca2bc4 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2028,6 +2028,7 @@ func_all_modules ()
   func_module hash-pjw
   func_module hash-pjw-bare
   func_module hash
+  func_module hamt
   func_module readline
   func_module readtokens
   func_module readtokens0
diff --git a/lib/hamt.c b/lib/hamt.c
new file mode 100644
index 000000000..4876d5809
--- /dev/null
+++ b/lib/hamt.c
@@ -0,0 +1,812 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include "hamt.h"
+
+#include <flexmember.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "count-one-bits.h"
+#include "verify.h"
+#include "xalloc.h"
+
+/* See: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience
+   Department, École Polytechnique Fédérale de Lausanne.
+
+   http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf
+
+   We implement a persistent version of hash array mapped tires.  Each
+   updating operation returns a new hamt, which shares structure with
+   the original one.  If persistence is not needed, transient hash
+   tables are probably faster. */
+
+typedef
+#ifdef GL_HAMT_THREAD_SAFE
+_Atomic
+#endif
+size_t ref_counter;
+
+/* Hash values are of type size_t.  For each level of the trie, we use
+   5 bits (corresponding to lg2 of the width of a 32-bit word.  */
+#define MAX_DEPTH ((SIZE_WIDTH + 4) / 5)
+
+/***************/
+/* Entry Types */
+/***************/
+
+/* Leaf nodes are of type element.  Non-leaf nodes are either
+   subtries or, if at maximal depth, buckets.  */
+enum entry_type
+{
+  element_entry = 0,
+  subtrie_entry = 1,
+  bucket_entry = 2
+};
+
+/* Return the type an entry.  */
+static enum entry_type
+entry_type (const Hamt_entry *entry)
+{
+  return entry->ref_count & 3;
+}
+
+/********************/
+/* Reference Counts */
+/********************/
+
+/* Initialize the reference counter, storing its type.  */
+static void
+init_ref_counter (ref_counter *counter, enum entry_type type)
+{
+  *counter = 4 + type;
+}
+
+/* Increase the reference counter of an entry.  */
+static void
+inc_ref_counter (ref_counter *counter)
+{
+  *counter += 4;
+}
+
+/* Decrease the entry reference counter.  Return false if the entry
+   can be deleted.  */
+static bool
+dec_ref_counter (ref_counter *counter)
+{
+  *counter -= 4;
+  return *counter >= 4;
+}
+
+/**************/
+/* Structures */
+/**************/
+
+/* Different generations of a hamt share a function table.  */
+struct function_table
+{
+  Hamt_hasher *hasher;
+  Hamt_comparator *comparator;
+  Hamt_freer *freer;
+  ref_counter ref_count;
+};
+
+/* Different generations of a hamt share subtries.  A singleton
+   subtrie is modelled as a single element.  */
+struct subtrie
+{
+  ref_counter ref_count;
+  /* Nodes carry labels from 0 to 31.  The i-th bit in map is set if
+     the node labelled i is present.  */
+  uint32_t map;
+  Hamt_entry *nodes [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* Buckets are used when different elements have the same hash values.  */
+struct bucket
+{
+  ref_counter ref_counter;
+  size_t elt_count;
+  Hamt_entry *elts [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* A hamt consists of its function table and the root entry.  */
+struct hamt
+{
+  struct function_table *functions;
+  /* The root entry is NULL for an empty HAMT.  */
+  Hamt_entry *root;
+};
+
+/*******************/
+/* Function Tables */
+/*******************/
+
+/* Allocate and initialize a function table.  */
+static struct function_table *
+create_function_table (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                       Hamt_freer *freer)
+{
+  struct function_table *functions = XMALLOC (struct function_table);
+  functions->hasher = hasher;
+  functions->comparator = comparator;
+  functions->freer = freer;
+  functions->ref_count = 1;
+  return functions;
+}
+
+/* Increment the reference count and return the function table. */
+static struct function_table *
+copy_function_table (struct function_table *function_table)
+{
+  ++function_table->ref_count;
+  return function_table;
+}
+
+/* Decrease the reference count and free the function table if the
+   reference count drops to zero.  */
+static void
+free_function_table (struct function_table *function_table)
+{
+  if (--function_table->ref_count)
+    return;
+  free (function_table);
+}
+
+/************/
+/* Elements */
+/************/
+
+/* Return an element's hash.  */
+static size_t
+hash_element (const struct function_table *functions, const Hamt_entry *elt)
+{
+  return functions->hasher (elt);
+}
+
+/* Compare two elements.  */
+static bool
+compare_elements (const struct function_table *functions,
+                  const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return functions->comparator (elt1, elt2);
+}
+
+/* Free an element.  */
+static void
+free_element (const struct function_table *functions, Hamt_entry *elt)
+{
+  if (dec_ref_counter (&elt->ref_count))
+    return;
+  functions->freer (elt);
+}
+
+/* Return the initialized element.  */
+static Hamt_entry *
+init_element (Hamt_entry *elt)
+{
+  init_ref_counter (&elt->ref_count, element_entry);
+  return elt;
+}
+
+/***********/
+/* Buckets */
+/***********/
+
+/* Allocate a partially initialized bucket with a given number of elements.  */
+static struct bucket *
+alloc_bucket (size_t elt_count)
+{
+  struct bucket *bucket
+    = xmalloc (FLEXSIZEOF (struct bucket, elts,
+                           sizeof (Hamt_entry) * elt_count));
+  init_ref_counter (&bucket->ref_counter, bucket_entry);
+  bucket->elt_count = elt_count;
+  return bucket;
+}
+
+/***********/
+/* Entries */
+/***********/
+
+/* Calculate and return the number of nodes in a subtrie.  */
+static int
+trienode_count (const struct subtrie *subtrie)
+{
+  return count_one_bits_l (subtrie->map);
+}
+
+/* Allocate a partially initialized subtrie with a given number of nodes.  */
+static struct subtrie *
+alloc_subtrie (int node_count)
+{
+  struct subtrie *subtrie
+    = xmalloc (FLEXSIZEOF (struct subtrie, nodes,
+                           sizeof (Hamt_entry) * node_count));
+  init_ref_counter (&subtrie->ref_count, subtrie_entry);
+  return subtrie;
+}
+
+/* Return a conceptually copy of an entry.  */
+static Hamt_entry *
+copy_entry (Hamt_entry *entry)
+{
+  inc_ref_counter (&entry->ref_count);
+  return entry;
+}
+
+/* Return a new subtrie that has the j-th node replaced.  */
+static struct subtrie *
+replace_entry (struct subtrie *subtrie, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie);
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map;
+  for (int k = 0; k < n; ++k)
+    {
+      if (k == j)
+        new_subtrie->nodes [k] = entry;
+      else
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+    }
+  return new_subtrie;
+}
+
+/* Return a new subtrie that has an entry labelled i inserted at
+   the j-th position.  */
+static struct subtrie *
+insert_entry (struct subtrie *subtrie, int i, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie) + 1;
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map | (1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k > j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k - 1]);
+      else
+        new_subtrie->nodes [k] = entry;
+    }
+  return new_subtrie;
+}
+
+/* Return a new entry that has the entry labelled i removed from
+   position j.  */
+static Hamt_entry *
+remove_subtrie_entry (struct subtrie *subtrie, int i, int j)
+{
+  int n = trienode_count (subtrie) - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (subtrie->nodes [1]);
+      return copy_entry (subtrie->nodes [0]);
+    }
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map & ~(1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k >= j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k + 1]);
+    }
+  return (Hamt_entry *) new_subtrie;
+}
+
+/* Return a new entry that has the entry at position j removed.  */
+static Hamt_entry *
+remove_bucket_entry (struct bucket *bucket, int j)
+{
+  int n = bucket->elt_count - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (bucket->elts [1]);
+      return copy_entry (bucket->elts [0]);
+    }
+  struct bucket *new_bucket = alloc_bucket (n);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k]);
+      else if (k >= j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k + 1]);
+    }
+  return (Hamt_entry *) new_bucket;
+}
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+/* Create a new, empty hash array mapped trie.  */
+Hamt *
+hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+             Hamt_freer *freer)
+{
+  struct function_table *functions
+    = create_function_table (hasher, comparator, freer);
+  Hamt *hamt = XMALLOC (Hamt);
+  hamt->functions = functions;
+  hamt->root = NULL;
+  return hamt;
+}
+
+/* Return a copy of the hamt.  */
+Hamt *
+hamt_copy (Hamt *hamt)
+{
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = hamt->root == NULL ? NULL : copy_entry (hamt->root);
+  return new_hamt;
+}
+
+/* Free a bucket.  */
+static void
+free_bucket (struct function_table const *functions, struct bucket *bucket)
+{
+  if (dec_ref_counter (&bucket->ref_counter))
+    return;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    free_element (functions, elts [i]);
+  free (bucket);
+}
+
+/* Forward declaration.  */
+static void free_subtrie (struct function_table const *functions,
+                          struct subtrie *subtrie);
+
+/* Free an entry.  */
+static void
+free_entry (struct function_table const *functions, Hamt_entry *entry)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      free_element (functions, entry);
+      break;
+    case subtrie_entry:
+      free_subtrie (functions, (struct subtrie *) entry);
+      break;
+    case bucket_entry:
+      free_bucket (functions, (struct bucket *) entry);
+      break;
+    default:
+      assume (0);
+    }
+}
+
+/* Free a trie recursively.  */
+static void
+free_subtrie (struct function_table const *functions, struct subtrie *subtrie)
+{
+  if (dec_ref_counter (&subtrie->ref_count))
+    return;
+  int n = trienode_count (subtrie);
+  Hamt_entry **node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    free_entry (functions, *node_ptr++);
+  free (subtrie);
+}
+
+/* Free a hamt.  */
+void
+hamt_free (Hamt *hamt)
+{
+  if (hamt->root != NULL)
+    free_entry (hamt->functions, hamt->root);
+  free_function_table (hamt->functions);
+  free (hamt);
+}
+
+/**********/
+/* Lookup */
+/**********/
+
+/* Lookup an element in a bucket.  */
+static const Hamt_entry *
+bucket_lookup (const struct function_table *functions,
+               const struct bucket *bucket, const Hamt_entry *elt)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, elt, *elts))
+        return *elts;
+      ++elts;
+    }
+  return NULL;
+}
+
+/* Forward declaration.  */
+static const Hamt_entry *entry_lookup (const struct function_table *functions,
+                                       const Hamt_entry *entry,
+                                       const Hamt_entry *elt, size_t hash);
+
+/* Lookup an element in a bucket.  */
+static const Hamt_entry *
+subtrie_lookup (const struct function_table *functions,
+                const struct subtrie *subtrie, const Hamt_entry *elt,
+                size_t hash)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+
+  if (! (map & (1 << i)))
+    return NULL;
+
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  return entry_lookup (functions, subtrie->nodes [j], elt, hash >> 5);
+}
+
+/* Lookup an element in an entry.  */
+static const Hamt_entry *
+entry_lookup (const struct function_table *functions, const Hamt_entry *entry,
+              const Hamt_entry *elt, size_t hash)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, elt, entry))
+        return entry;
+      return NULL;
+    case subtrie_entry:
+      return subtrie_lookup (functions, (struct subtrie *) entry, elt, hash);
+    case bucket_entry:
+      return bucket_lookup (functions, (struct bucket *) entry, elt);
+    default:
+      assume (0);
+    }
+}
+
+/* If ELEMENT matches an entry in HAMT, return this entry.  Otherwise,
+   return NULL.  */
+const Hamt_entry *
+hamt_lookup (const Hamt *hamt, const Hamt_entry *elt)
+{
+  if (hamt->root == NULL)
+    return NULL;
+
+  return entry_lookup (hamt->functions, hamt->root, elt,
+                       hash_element (hamt->functions, elt));
+}
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+/* Create a bucket populated with two elements.  */
+static struct bucket *
+create_populated_bucket (Hamt_entry *elt1, Hamt_entry *elt2)
+{
+  struct bucket *bucket = alloc_bucket (2);
+  bucket->elts [0] = elt1;
+  bucket->elts [1] = elt2;
+  return bucket;
+}
+
+/* Create a chain of subtrie nodes so that the resulting trie is
+   populated with exactly two elements.  */
+static Hamt_entry *
+create_populated_subtrie (Hamt_entry *elt1, Hamt_entry *elt2, size_t hash1,
+                          size_t hash2, int depth)
+{
+  if (depth >= MAX_DEPTH)
+    return (Hamt_entry *) create_populated_bucket (elt1, elt2);
+
+  struct subtrie *subtrie;
+  int i1 = hash1 & 31;
+  int i2 = hash2 & 31;
+  if (i1 != i2)
+    {
+      subtrie = alloc_subtrie (2);
+      subtrie->map = (1 << i1) | (1 << i2);
+      if (i1 < i2)
+        {
+          subtrie->nodes [0] = elt1;
+          subtrie->nodes [1] = elt2;
+        }
+      else
+        {
+          subtrie->nodes [0] = elt2;
+          subtrie->nodes [1] = elt1;
+        }
+    }
+  else
+    {
+      subtrie = alloc_subtrie (1);
+      subtrie->map = 1 << i1;
+      subtrie->nodes [0]
+        = create_populated_subtrie (elt1, elt2, hash1 >> 5, hash2 >> 5,
+                                    depth + 1);
+    }
+  return (Hamt_entry *) subtrie;
+}
+
+/* Insert an element in a bucket if not already present.  */
+static struct bucket *
+bucket_insert (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, *elts))
+        {
+          *elt_ptr = *elts;
+          return bucket;
+        }
+      ++elts;
+    }
+  struct bucket *new_bucket = alloc_bucket (elt_count + 1);
+  new_bucket->elts [0] = init_element (*elt_ptr);
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      new_bucket->elts [i + 1] = copy_entry (bucket->elts [i]);
+    }
+  return new_bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_insert (const struct function_table *functions,
+                                 Hamt_entry *subtrie, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth);
+
+/* Insert an element in a subtrie if not already present.  */
+static struct subtrie *
+subtrie_insert (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_insert (functions, entry, elt_ptr, hash >> 5, depth + 1);
+      if (new_entry != entry)
+        return replace_entry (subtrie, j, new_entry);
+      return subtrie;
+    }
+  return insert_entry (subtrie, i, j, init_element (*elt_ptr));
+}
+
+/* Insert an element in an entry if not already present.  */
+static Hamt_entry *
+entry_insert (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          *elt_ptr = entry;
+          return entry;
+        }
+      return create_populated_subtrie
+        (init_element (*elt_ptr), copy_entry (entry), hash,
+         hash_element (functions, entry) >> (5 * depth), depth);
+    case subtrie_entry:
+      return (Hamt_entry *)
+        subtrie_insert (functions, (struct subtrie *) entry, elt_ptr, hash,
+                        depth);
+    case bucket_entry:
+      return (Hamt_entry *)
+        bucket_insert (functions, (struct bucket *) entry, elt_ptr);
+    default:
+      assume (0);
+    }
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return HAMT.  Otherwise, insert *ELT_PTR
+   into a copy of the HAMT and return the copy.  */
+Hamt *
+hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *new_entry;
+
+  if (hamt->root == NULL)
+    new_entry = init_element (*elt_ptr);
+  else
+    new_entry =  entry_insert (hamt->functions, hamt->root, elt_ptr,
+                               hash_element (hamt->functions, *elt_ptr), 0);
+
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/* Delete an element in a bucket if found.  */
+static Hamt_entry *
+bucket_delete (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, elts [i]))
+        {
+          *elt_ptr = elts [i];
+          return remove_bucket_entry (bucket, i);
+        }
+    }
+  return (Hamt_entry *) bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_delete (const struct function_table *functions,
+                                 Hamt_entry *entry, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth);
+
+/* Delete an element in a subtrie if found.  */
+static Hamt_entry *
+subtrie_delete (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_delete (functions, entry, elt_ptr, hash >> 5, depth + 1);
+      if (new_entry == NULL)
+        return remove_subtrie_entry (subtrie, i, j);
+      if (new_entry != entry)
+        return (Hamt_entry *) replace_entry (subtrie, j, new_entry);
+      return (Hamt_entry *) subtrie;
+    }
+  return (Hamt_entry *) subtrie;
+}
+
+/* Delete an element in an entry if found.  */
+static Hamt_entry *
+entry_delete (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          *elt_ptr = entry;
+          return NULL;
+        }
+      return entry;
+    case subtrie_entry:
+      return subtrie_delete (functions, (struct subtrie *) entry, elt_ptr, hash,
+                             depth);
+    case bucket_entry:
+      return bucket_delete (functions, (struct bucket *) entry, elt_ptr);
+    default:
+      assume (0);
+    }
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+element from the table, remove the element from a copy of the hamt and
+return the copy.  Otherwise, return HAMT.  */
+Hamt *
+hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  if (hamt->root == NULL)
+    return hamt;
+
+  Hamt_entry *new_entry
+    = entry_delete (hamt->functions, hamt->root, elt_ptr,
+                    hash_element (hamt->functions, *elt_ptr), 0);
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/*************/
+/* Iteration */
+/*************/
+
+/* Walk a bucket.  */
+static size_t
+bucket_do_while (const struct bucket *bucket, Hamt_processor *proc, void *data,
+                 bool *success)
+{
+  size_t cnt = 0;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      *success = proc (elts [i], data);
+      if (!success)
+        return cnt;
+      ++cnt;
+    }
+  return cnt;
+}
+
+/* Forward declaration.  */
+static size_t entry_do_while (const Hamt_entry *entry, Hamt_processor *proc,
+                              void *data, bool *success);
+
+/* Walk a subtrie.  */
+static size_t subtrie_do_while (const struct subtrie *subtrie,
+                                Hamt_processor *proc, void *data, bool *success)
+{
+  size_t cnt = 0;
+  int n = trienode_count (subtrie);
+  Hamt_entry *const *node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    {
+      cnt += entry_do_while (*node_ptr++, proc, data, success);
+      if (!success)
+        return cnt;
+    }
+  return cnt;
+}
+
+/* Walk an entry.  */
+static size_t
+entry_do_while (const Hamt_entry *entry, Hamt_processor *proc, void *data,
+                bool *success)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      *success = proc (entry, data);
+      return *success ? 1 : 0;
+    case subtrie_entry:
+      return subtrie_do_while ((struct subtrie *) entry, proc, data, success);
+    case bucket_entry:
+      return bucket_do_while ((struct bucket *) entry, proc, data, success);
+    default:
+      assume (0);
+    }
+}
+
+/* Call PROC for every entry of the hamt until it returns false.  The
+   first argument of PROC is the entry, the second entry is the value
+   of DATA as received.  Return the number of calls that returned
+   true.  */
+size_t
+hamt_do_while (const Hamt *hamt, Hamt_processor *proc, void *data)
+{
+  if (hamt->root == NULL)
+    return 0;
+
+  bool success = true;
+  return entry_do_while (hamt->root, proc, data, &success);
+}
diff --git a/lib/hamt.h b/lib/hamt.h
new file mode 100644
index 000000000..bd4a2454b
--- /dev/null
+++ b/lib/hamt.h
@@ -0,0 +1,97 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020. */
+
+#ifndef _GL_HAMT_H
+#define _GL_HAMT_H
+
+/* The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
+   is thread-safe as long as two threads do not simultaneously access
+   the same hamt.  */
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L \
+  && !defined (__STD_NO_ATOMICS__)
+# define GL_HAMT_THREAD_SAFE 1
+#else
+# define GL_HAMT_THREAD_SAFE 0
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* A hamt stores pointers to elements.  Each element has to be a
+   struct whose initial member is of the type Hamt_entry.  An element
+   is conceptually owned by a hamt as soon as it is inserted.  */
+typedef struct
+{
+#ifdef GL_HAMT_THREAD_SAFE
+  _Atomic
+#endif
+  size_t ref_count;
+} Hamt_entry;
+
+/* The public interface is documented in the file hamt.c.
+   Out-of-memory errors are handled by calling xalloc_die ().  */
+
+/*************************/
+/* Opaque Hamt Structure */
+/*************************/
+
+typedef struct hamt Hamt;
+
+/******************/
+/* Function Types */
+/******************/
+
+typedef size_t (Hamt_hasher) (const Hamt_entry *elt);
+typedef bool (Hamt_comparator) (const Hamt_entry *elt1, const Hamt_entry *elt2);
+typedef void (Hamt_freer) (Hamt_entry *elt);
+typedef bool (Hamt_processor) (const Hamt_entry *elt, void *data);
+
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+extern Hamt *hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                          Hamt_freer *freer)
+  _GL_ATTRIBUTE_NODISCARD;
+extern Hamt *hamt_copy (Hamt *hamt) _GL_ATTRIBUTE_NODISCARD;
+extern void hamt_free (Hamt *);
+
+/**********/
+/* Lookup */
+/**********/
+
+extern const Hamt_entry *hamt_lookup (const Hamt *hamt, const Hamt_entry *elt);
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+extern Hamt *hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+extern Hamt *hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/*************/
+/* Iteration */
+/*************/
+
+extern size_t hamt_do_while (const Hamt *hamt, Hamt_processor *proc,
+                             void *data);
+
+#endif /* _GL_HAMT_H */
diff --git a/modules/hamt b/modules/hamt
new file mode 100644
index 000000000..d73f09c2d
--- /dev/null
+++ b/modules/hamt
@@ -0,0 +1,29 @@
+Description:
+Persistent hash array mapped tries.
+
+Files:
+lib/hamt.h
+lib/hamt.c
+
+Depends-on:
+count-one-bits
+flexmember
+inttypes-incomplete
+stdbool
+stdint
+verify
+xalloc
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += hamt.c
+
+Include:
+"hamt.h"
+
+License:
+GPL
+
+Maintainer:
+Marc Nieper-Wisskirchen
diff --git a/modules/hamt-tests b/modules/hamt-tests
new file mode 100644
index 000000000..f4f0ea4e0
--- /dev/null
+++ b/modules/hamt-tests
@@ -0,0 +1,11 @@
+Files:
+tests/test-hamt.c
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-hamt
+check_PROGRAMS += test-hamt
diff --git a/tests/test-hamt.c b/tests/test-hamt.c
new file mode 100644
index 000000000..daac7b2a7
--- /dev/null
+++ b/tests/test-hamt.c
@@ -0,0 +1,148 @@
+/* Test of persistent hash array mapped trie implementation.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "hamt.h"
+#include "macros.h"
+#include "xalloc.h"
+
+typedef struct
+{
+  Hamt_entry entry;
+  int val;
+} Element;
+
+static int
+entry_value (const Hamt_entry *elt)
+{
+  return ((Element *) elt)->val;
+}
+
+static size_t
+hash_element (const Hamt_entry *elt)
+{
+  return entry_value (elt) & ~3; /* We drop the last bits so that we
+                                    can test hash collisions. */
+}
+
+static bool
+compare_element (const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return entry_value (elt1) == entry_value (elt2);
+}
+
+static void
+free_element (Hamt_entry *elt)
+{
+  free (elt);
+}
+
+static Hamt_entry *
+make_element (int n)
+{
+  Element *elt = XMALLOC (Element);
+  elt->val = n;
+  return &elt->entry;
+}
+
+static int sum = 0;
+static int flag;
+
+static bool
+proc (const Hamt_entry *elt, void *data)
+{
+  if (data == &flag)
+    {
+      sum += entry_value (elt);
+      return true;
+    }
+  if (sum > 0)
+    {
+      sum = 0;
+      return true;
+    }
+  return false;
+}
+
+int
+main (void)
+{
+  Hamt *hamt = hamt_create (hash_element, compare_element, free_element);
+
+  Hamt_entry *x5 = make_element (5);
+  Hamt_entry *p = x5;
+  Hamt *hamt1 = hamt_insert (hamt, &p);
+  ASSERT (hamt1 != hamt);
+  ASSERT (hamt_lookup (hamt, x5) == NULL);
+  ASSERT (hamt_lookup (hamt1, x5) == x5);
+
+  Hamt_entry *y5 = make_element (5);
+  p = y5;
+  Hamt *hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 == hamt1);
+  ASSERT (p == x5);
+  ASSERT (hamt_lookup (hamt1, y5) == x5);
+
+  Hamt_entry *z37 = make_element (37);
+  p = z37;
+  hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 != hamt1);
+  ASSERT (p == z37);
+  ASSERT (hamt_lookup (hamt1, z37) == NULL);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+
+  hamt_free (hamt);
+  hamt_free (hamt1);
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 2);
+  ASSERT (sum == 42);
+  ASSERT (hamt_do_while (hamt2, proc, NULL) == 1);
+  ASSERT (sum == 0);
+
+  p = y5;
+  hamt1 = hamt_delete (hamt2, &p);
+  ASSERT (hamt1 != hamt2);
+  ASSERT (p == x5);
+
+  ASSERT (hamt_lookup (hamt1, x5) == NULL);
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+
+  hamt_free (hamt1);
+  Hamt_entry *x4 = make_element (4);
+  hamt1 = hamt_insert (hamt2, &x4);
+  hamt_free (hamt2);
+  Hamt_entry *x6 = make_element (6);
+  hamt2 = hamt_insert (hamt1, &x6);
+  hamt_free (hamt1);
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum == 52);
+
+  hamt1 = hamt_delete (hamt2, &x4);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum = 52);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt1, proc, &flag) == 3);
+  ASSERT (sum  = 48);
+
+  hamt_free (hamt1);
+  hamt_free (hamt2);
+  free_element (y5);
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-09 21:07 [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
@ 2020-10-10 14:35 ` Bruno Haible
  2020-10-10 14:46   ` Marc Nieper-Wißkirchen
  2020-10-10 14:54 ` Bruno Haible
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-10 14:35 UTC (permalink / raw)
  To: bug-gnulib; +Cc: Marc Nieper-Wißkirchen

Hi Marc,

> It implements a persistent version of Phil Bagwell's HAMTs, which has
> been popularized by Clojure. HAMTs can be used when a persistent
> (functional/pure) version of a data structure akin to hash tables is
> needed. For example, the dynamic environment of a (possibly
> multi-threaded) Lisp or Scheme can be modeled with persistent HAMTs.

It is exciting to see such a datastructure with various application domains [1]
in Gnulib!

I haven't read through it line by line; nevertheless a couple of comments:

  - +/* The public interface is documented in the file hamt.c.

    This is not good. As a user of the module, I would want to read *only*
    hamt.h and know how to use the public interface. In other words, hamt.h
    should have the functions' comments.

  - "Each
    updating operation returns a new hamt, which shares structure with
    the original one."
    So, after calling hamt_insert or hamt_delete, which function should I call
    to delete the original Hamt without affecting the new one? (In order to
    avoid accumulating pieces of old Hamts in memory.)

  - "The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
    is thread-safe as long as two threads do not simultaneously access
    the same hamt."
    I don't understand this sentence. Isn't the guarantee that
    "is thread-safe as long as two threads do not simultaneously access
    the same hamt" trivial? And what you want to achieve through the use
    of _Atomic is that it is thread-safe *also* when two threads access
    the same hamt?

Bruno

[1] http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 14:35 ` Bruno Haible
@ 2020-10-10 14:46   ` Marc Nieper-Wißkirchen
  2020-10-10 17:34     ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-10 14:46 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Hi Bruno,

Am Sa., 10. Okt. 2020 um 16:35 Uhr schrieb Bruno Haible <bruno@clisp.org>:

> It is exciting to see such a datastructure with various application domains [1]
> in Gnulib!

I'm glad that you like the idea.

>
> I haven't read through it line by line; nevertheless a couple of comments:

A few more lines will follow before the final draft is done because I
have been adding some destructive update procedures in the meantime.

>   - +/* The public interface is documented in the file hamt.c.
>
>     This is not good. As a user of the module, I would want to read *only*
>     hamt.h and know how to use the public interface. In other words, hamt.h
>     should have the functions' comments.

I followed the example of the already existing "hash" module. But I
can easily move the public comments into the header file.

>   - "Each
>     updating operation returns a new hamt, which shares structure with
>     the original one."
>     So, after calling hamt_insert or hamt_delete, which function should I call
>     to delete the original Hamt without affecting the new one? (In order to
>     avoid accumulating pieces of old Hamts in memory.)

When you do, say,

new_hamt = hamt_insert (hamt, ...)

and new_hamt != hamt afterwards, you have to delete both hamt and
new_hamt with hamt_free to free all the memory. After you have freed
the first hamt, the second won't be affected.

>   - "The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
>     is thread-safe as long as two threads do not simultaneously access
>     the same hamt."
>     I don't understand this sentence. Isn't the guarantee that
>     "is thread-safe as long as two threads do not simultaneously access
>     the same hamt" trivial? And what you want to achieve through the use
>     of _Atomic is that it is thread-safe *also* when two threads access
>     the same hamt?

The guarantee is not trivial because two different hamts may share
some structure.

So... assume you have a hamt. You do some operations on it (or you do
hamt_copy) to get a new_hamt that shares structure. Now if thread 1
works on hamt and thread 2 on new_hamt, it is safe when
GL_HAMT_THREAD_SAFE is set.

If two threads access the same hamt, it is not guaranteed to be safe.
If you need this, get an (extremely shallow) copy through hamt_copy
first.

Marc


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-09 21:07 [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
  2020-10-10 14:35 ` Bruno Haible
@ 2020-10-10 14:54 ` Bruno Haible
  2020-10-10 15:01   ` Marc Nieper-Wißkirchen
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-10 14:54 UTC (permalink / raw)
  To: bug-gnulib; +Cc: Marc Nieper-Wißkirchen

Since you define GL_HAMT_THREAD_SAFE to either 0 or 1, you need
to test it through
  #if GL_HAMT_THREAD_SAFE
not
  #ifdef GL_HAMT_THREAD_SAFE

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 14:54 ` Bruno Haible
@ 2020-10-10 15:01   ` Marc Nieper-Wißkirchen
  2020-10-10 15:04     ` Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-10 15:01 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am Sa., 10. Okt. 2020 um 16:54 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Since you define GL_HAMT_THREAD_SAFE to either 0 or 1, you need
> to test it through
>   #if GL_HAMT_THREAD_SAFE
> not
>   #ifdef GL_HAMT_THREAD_SAFE
>
> Bruno

Oh, yes... thanks!


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 15:01   ` Marc Nieper-Wißkirchen
@ 2020-10-10 15:04     ` Marc Nieper-Wißkirchen
  2020-10-10 17:41       ` Bruno Haible
  2020-10-10 18:19       ` Paul Eggert
  0 siblings, 2 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-10 15:04 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib, Bruno Haible

Speaking of GL_HAMT_THREAD_SAFE:

Is there a special Gnulib way (or Autoconf macro) for the following test:

#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L &&
!defined (__STD_NO_ATOMICS__)

I am asking because there may be non-C11 compilers that nevertheless
understand _Atomic.

Am Sa., 10. Okt. 2020 um 17:01 Uhr schrieb Marc Nieper-Wißkirchen
<marc.nieper+gnu@gmail.com>:
>
> Am Sa., 10. Okt. 2020 um 16:54 Uhr schrieb Bruno Haible <bruno@clisp.org>:
> >
> > Since you define GL_HAMT_THREAD_SAFE to either 0 or 1, you need
> > to test it through
> >   #if GL_HAMT_THREAD_SAFE
> > not
> >   #ifdef GL_HAMT_THREAD_SAFE
> >
> > Bruno
>
> Oh, yes... thanks!


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 14:46   ` Marc Nieper-Wißkirchen
@ 2020-10-10 17:34     ` Bruno Haible
  0 siblings, 0 replies; 45+ messages in thread
From: Bruno Haible @ 2020-10-10 17:34 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> >   - +/* The public interface is documented in the file hamt.c.
> >
> >     This is not good. As a user of the module, I would want to read *only*
> >     hamt.h and know how to use the public interface. In other words, hamt.h
> >     should have the functions' comments.
> 
> I followed the example of the already existing "hash" module. But I
> can easily move the public comments into the header file.

Yes please. Here in Gnulib, we don't have a way to extract reference
documentation from the source code (we don't use doxygen or similar);
therefore a programmer who wants to use a Gnulib module must peruse
the .h file.

> >   - "Each
> >     updating operation returns a new hamt, which shares structure with
> >     the original one."
> >     So, after calling hamt_insert or hamt_delete, which function should I call
> >     to delete the original Hamt without affecting the new one? (In order to
> >     avoid accumulating pieces of old Hamts in memory.)
> 
> When you do, say,
> 
> new_hamt = hamt_insert (hamt, ...)
> 
> and new_hamt != hamt afterwards, you have to delete both hamt and
> new_hamt with hamt_free to free all the memory. After you have freed
> the first hamt, the second won't be affected.
> 
> >   - "The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
> >     is thread-safe as long as two threads do not simultaneously access
> >     the same hamt."
> >     I don't understand this sentence. Isn't the guarantee that
> >     "is thread-safe as long as two threads do not simultaneously access
> >     the same hamt" trivial? And what you want to achieve through the use
> >     of _Atomic is that it is thread-safe *also* when two threads access
> >     the same hamt?
> 
> The guarantee is not trivial because two different hamts may share
> some structure.
> 
> So... assume you have a hamt. You do some operations on it (or you do
> hamt_copy) to get a new_hamt that shares structure. Now if thread 1
> works on hamt and thread 2 on new_hamt, it is safe when
> GL_HAMT_THREAD_SAFE is set.
> 
> If two threads access the same hamt, it is not guaranteed to be safe.
> If you need this, get an (extremely shallow) copy through hamt_copy
> first.

I see. Thanks. These explanation would make good comments.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 15:04     ` Marc Nieper-Wißkirchen
@ 2020-10-10 17:41       ` Bruno Haible
  2020-10-10 17:49         ` Marc Nieper-Wißkirchen
  2020-10-10 18:19       ` Paul Eggert
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-10 17:41 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> Is there a special Gnulib way (or Autoconf macro) for the following test:
> 
> #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L &&
> !defined (__STD_NO_ATOMICS__)
> 
> I am asking because there may be non-C11 compilers that nevertheless
> understand _Atomic.

And there may be C11 compilers which don't support _Atomic and nevertheless
dont't define __STD_NO_ATOMICS__.

So, indeed it would be useful to have an Autoconf macro that tests whether
_Atomic can be used.

AFAICS, neither Autoconf, nor Gnulib, nor the autoconf-archive project
have a test for this.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 17:41       ` Bruno Haible
@ 2020-10-10 17:49         ` Marc Nieper-Wißkirchen
  0 siblings, 0 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-10 17:49 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am Sa., 10. Okt. 2020 um 19:41 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Hi Marc,
>
> > Is there a special Gnulib way (or Autoconf macro) for the following test:
> >
> > #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L &&
> > !defined (__STD_NO_ATOMICS__)
> >
> > I am asking because there may be non-C11 compilers that nevertheless
> > understand _Atomic.
>
> And there may be C11 compilers which don't support _Atomic and nevertheless
> dont't define __STD_NO_ATOMICS__.
>
> So, indeed it would be useful to have an Autoconf macro that tests whether
> _Atomic can be used.

If someone is able to provide such a macro for Gnulib, I would replace
my check with this macro.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 15:04     ` Marc Nieper-Wißkirchen
  2020-10-10 17:41       ` Bruno Haible
@ 2020-10-10 18:19       ` Paul Eggert
  2020-10-10 21:24         ` Marc Nieper-Wißkirchen
  2020-10-10 22:39         ` _Atomic Bruno Haible
  1 sibling, 2 replies; 45+ messages in thread
From: Paul Eggert @ 2020-10-10 18:19 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

On 10/10/20 8:04 AM, Marc Nieper-Wißkirchen wrote:
> #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L &&
> !defined (__STD_NO_ATOMICS__)
> 
> I am asking because there may be non-C11 compilers that nevertheless
> understand _Atomic.

I suggest not worrying about this problem until we run into it. Quite possibly 
any actual problem will be something like "_Atomic can be used for most things, 
but not for feature X" in which case we'll likely want the more fine-grained 
check anyway. Until we discover actual problems we can get by with

    #if __STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__

which is a cleaner way of writing the negative of the above test. These days 
there should be no reason to check whether __STDC_VERSION__ is defined, 
generally it's clearer to use "<" instead of ">=" so that textual order reflects 
numeric order, and the parens after "defined" are better omitted.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 18:19       ` Paul Eggert
@ 2020-10-10 21:24         ` Marc Nieper-Wißkirchen
  2020-10-10 21:46           ` Marc Nieper-Wißkirchen
  2020-10-10 22:39         ` _Atomic Bruno Haible
  1 sibling, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-10 21:24 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am Sa., 10. Okt. 2020 um 20:19 Uhr schrieb Paul Eggert <eggert@cs.ucla.edu>:

>     #if __STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__
>
> which is a cleaner way of writing the negative of the above test. These days
> there should be no reason to check whether __STDC_VERSION__ is defined,
> generally it's clearer to use "<" instead of ">=" so that textual order reflects
> numeric order, and the parens after "defined" are better omitted.

Thanks, I did the edit in my local copy.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 21:24         ` Marc Nieper-Wißkirchen
@ 2020-10-10 21:46           ` Marc Nieper-Wißkirchen
  2020-10-11  1:28             ` Bruno Haible
  2020-10-11 14:14             ` terminology Bruno Haible
  0 siblings, 2 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-10 21:46 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: Bruno Haible, Paul Eggert, bug-gnulib

[-- Attachment #1: Type: text/plain, Size: 1567 bytes --]

I have attached an improved version of the HAMT module to this email.

Apart from improving the comments (which includes moving some
documentation into the header file) and changing the things already
discussed, I added a few more tests and three procedures for in-place
updates.

For example, you can now do hamt_insert_x to destructively insert an
element into a hamt (instead of inserting the element into a
conceptual copy as you do with hamt_insert). Hamts that share
structure with the modified hamt are not effected. (The idea with the
_x prefix comes from Guile whose C interface uses _x where Scheme uses
!.)

NB: For those who fancy Scheme, hamts can be used to implement the
library (srfi 146 hash) of SRFI 146 ([1]). The destructive update
operations of this module can be used to implement the linear-update
procedures of (srfi 146 hash) efficiently.

--

[1] https://srfi.schemers.org/srfi-146/srfi-146.html

Am Sa., 10. Okt. 2020 um 23:24 Uhr schrieb Marc Nieper-Wißkirchen
<marc.nieper+gnu@gmail.com>:
>
> Am Sa., 10. Okt. 2020 um 20:19 Uhr schrieb Paul Eggert <eggert@cs.ucla.edu>:
>
> >     #if __STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__
> >
> > which is a cleaner way of writing the negative of the above test. These days
> > there should be no reason to check whether __STDC_VERSION__ is defined,
> > generally it's clearer to use "<" instead of ">=" so that textual order reflects
> > numeric order, and the parens after "defined" are better omitted.
>
> Thanks, I did the edit in my local copy.

[-- Attachment #2: 0001-hamt-New-module.patch --]
[-- Type: text/x-patch, Size: 48218 bytes --]

From db21b3e975ad7526e0db531fd4936354d8f031ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Nieper-Wi=C3=9Fkirchen?= <marc@nieper-wisskirchen.de>
Date: Sat, 10 Oct 2020 23:38:21 +0200
Subject: [PATCH] hamt: New module.

This module provides (persistent) hash array mapped tries.
* MODULES.html.sh: Add hamt.
* lib/hamt.c: New file.
* lib/hamt.h: New file.
* modules/hamt: New file.
* modules/hamt-tests: New file.
* tests/test-hamt.c: New file.
---
 ChangeLog          |  11 +
 MODULES.html.sh    |   1 +
 lib/hamt.c         | 984 +++++++++++++++++++++++++++++++++++++++++++++
 lib/hamt.h         | 182 +++++++++
 modules/hamt       |  29 ++
 modules/hamt-tests |  11 +
 tests/test-hamt.c  | 352 ++++++++++++++++
 7 files changed, 1570 insertions(+)
 create mode 100644 lib/hamt.c
 create mode 100644 lib/hamt.h
 create mode 100644 modules/hamt
 create mode 100644 modules/hamt-tests
 create mode 100644 tests/test-hamt.c

diff --git a/ChangeLog b/ChangeLog
index 2aba2b0c7..b9e4f9970 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2020-10-10  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+	hamt: New module.
+	This module provides (persistent) hash array mapped tries.
+	* MODULES.html.sh: Add hamt.
+	* lib/hamt.c: New file.
+	* lib/hamt.h: New file.
+	* modules/hamt: New file.
+	* modules/hamt-tests: New file.
+	* tests/test-hamt.c: New file.
+
 2020-10-10  Bruno Haible  <bruno@clisp.org>
 
 	*-list, *-oset, *-omap: Avoid possible compiler warnings.
diff --git a/MODULES.html.sh b/MODULES.html.sh
index 7e7cdae3e..2907eb741 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2028,6 +2028,7 @@ func_all_modules ()
   func_module hash-pjw
   func_module hash-pjw-bare
   func_module hash
+  func_module hamt
   func_module readline
   func_module readtokens
   func_module readtokens0
diff --git a/lib/hamt.c b/lib/hamt.c
new file mode 100644
index 000000000..eb15e19c1
--- /dev/null
+++ b/lib/hamt.c
@@ -0,0 +1,984 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.  */
+
+#include <config.h>
+#include "hamt.h"
+
+#include <flexmember.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "count-one-bits.h"
+#include "verify.h"
+#include "xalloc.h"
+
+/* Reference counters can be shared between different threads if the
+   entry they belong to is shared between different threads.
+   Operations on them therefore have to be atomic so that no invalid
+   state is observable.
+
+   A thread must not modify an entry or its children (!) if its
+   reference count implies that the entry is shared by at least two
+   hamts.  */
+typedef
+#if GL_HAMT_THREAD_SAFE
+_Atomic
+#endif
+size_t ref_counter;
+
+/* Hash values are of type size_t.  For each level of the trie, we use
+   5 bits (corresponding to lg2 of the width of a 32-bit word.  */
+#define MAX_DEPTH ((SIZE_WIDTH + 4) / 5)
+
+/***************/
+/* Entry Types */
+/***************/
+
+/* Leaf nodes are of type element.  Non-leaf nodes are either subtries
+   or, if at maximal depth, buckets.  The entry type is stored in the
+   lower two bits of the reference counter, whereas reference counters
+   for entries are incremented and decremented in multiples of 4.  */
+enum entry_type
+{
+  element_entry = 0,
+  subtrie_entry = 1,
+  bucket_entry = 2
+};
+
+/* Return the type an entry.  */
+static enum entry_type
+entry_type (const Hamt_entry *entry)
+{
+  return entry->ref_count & 3;
+}
+
+/********************/
+/* Reference Counts */
+/********************/
+
+/* Initialize the reference counter, storing its type.  */
+static void
+init_ref_counter (ref_counter *counter, enum entry_type type)
+{
+  *counter = 4 + type;
+}
+
+/* Increase the reference counter of an entry.  */
+static void
+inc_ref_counter (ref_counter *counter)
+{
+  *counter += 4;
+}
+
+/* Decrease the entry reference counter.  Return false if the entry
+   can be deleted.  */
+static bool
+dec_ref_counter (ref_counter *counter)
+{
+  *counter -= 4;
+  return *counter >= 4;
+}
+
+/**************/
+/* Structures */
+/**************/
+
+/* Different generations of a hamt share a function table.  */
+struct function_table
+{
+  Hamt_hasher *hasher;
+  Hamt_comparator *comparator;
+  Hamt_freer *freer;
+  ref_counter ref_count;
+};
+
+/* Different generations of a hamt share subtries.  A singleton
+   subtrie is modelled as a single element.  */
+struct subtrie
+{
+  ref_counter ref_count;
+  /* Nodes carry labels from 0 to 31.  The i-th bit in MAP is set if
+     the node labelled i is present.  */
+  uint32_t map;
+  /* The length of the NODES array is the population count of MAP.
+     The order of the nodes corresponds to the order of the 1-bits in
+     MAP.  */
+  Hamt_entry *nodes [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* Buckets are used when different elements have the same hash values.  */
+struct bucket
+{
+  ref_counter ref_counter;
+  size_t elt_count;
+  Hamt_entry *elts [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* A hamt consists of its function table and the root entry.  */
+struct hamt
+{
+  struct function_table *functions;
+  /* The root entry is NULL for an empty HAMT.  */
+  Hamt_entry *root;
+};
+
+/*******************/
+/* Function Tables */
+/*******************/
+
+/* Allocate and initialize a function table.  */
+static struct function_table *
+create_function_table (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                       Hamt_freer *freer)
+{
+  struct function_table *functions = XMALLOC (struct function_table);
+  functions->hasher = hasher;
+  functions->comparator = comparator;
+  functions->freer = freer;
+  functions->ref_count = 1;
+  return functions;
+}
+
+/* Increment the reference count and return the function table. */
+static struct function_table *
+copy_function_table (struct function_table *function_table)
+{
+  ++function_table->ref_count;
+  return function_table;
+}
+
+/* Decrease the reference count and free the function table if the
+   reference count drops to zero.  */
+static void
+free_function_table (struct function_table *function_table)
+{
+  if (--function_table->ref_count)
+    return;
+  free (function_table);
+}
+
+/************/
+/* Elements */
+/************/
+
+/* Return an element's hash.  */
+static size_t
+hash_element (const struct function_table *functions, const Hamt_entry *elt)
+{
+  return functions->hasher (elt);
+}
+
+/* Compare two elements.  */
+static bool
+compare_elements (const struct function_table *functions,
+                  const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return functions->comparator (elt1, elt2);
+}
+
+/* Free an element.  */
+static void
+free_element (const struct function_table *functions, Hamt_entry *elt)
+{
+  if (dec_ref_counter (&elt->ref_count))
+    return;
+  functions->freer (elt);
+}
+
+/* Return the initialized element.  */
+static Hamt_entry *
+init_element (Hamt_entry *elt)
+{
+  init_ref_counter (&elt->ref_count, element_entry);
+  return elt;
+}
+
+/***********/
+/* Buckets */
+/***********/
+
+/* Allocate a partially initialized bucket with a given number of elements.  */
+static struct bucket *
+alloc_bucket (size_t elt_count)
+{
+  struct bucket *bucket
+    = xmalloc (FLEXSIZEOF (struct bucket, elts,
+                           sizeof (Hamt_entry) * elt_count));
+  init_ref_counter (&bucket->ref_counter, bucket_entry);
+  bucket->elt_count = elt_count;
+  return bucket;
+}
+
+/***********/
+/* Entries */
+/***********/
+
+/* Return true if the entry is shared between two or more hamts.
+   Otherwise, return false.
+
+   This procedure is used for destructive updates.  If an entry and
+   all its parents are not shared, it can be updated destructively
+   without effecting other hamts.  */
+static bool
+is_shared (const Hamt_entry *entry)
+{
+  return entry->ref_count >= 8;
+}
+
+/* Calculate and return the number of nodes in a subtrie.  */
+static int
+trienode_count (const struct subtrie *subtrie)
+{
+  return count_one_bits_l (subtrie->map);
+}
+
+/* Allocate a partially initialized subtrie with a given number of nodes.  */
+static struct subtrie *
+alloc_subtrie (int node_count)
+{
+  struct subtrie *subtrie
+    = xmalloc (FLEXSIZEOF (struct subtrie, nodes,
+                           sizeof (Hamt_entry) * node_count));
+  init_ref_counter (&subtrie->ref_count, subtrie_entry);
+  return subtrie;
+}
+
+/* Return a conceptually copy of an entry.  */
+static Hamt_entry *
+copy_entry (Hamt_entry *entry)
+{
+  inc_ref_counter (&entry->ref_count);
+  return entry;
+}
+
+/* Return a new bucket that has the j-th element replaced.  */
+static struct bucket *
+replace_bucket_element (struct bucket *bucket, int j, Hamt_entry *elt)
+{
+  int n = bucket->elt_count;
+  struct bucket *new_bucket = alloc_bucket (n);
+  for (int k = 0; k < n; ++k)
+    if (k == j)
+      new_bucket->elts [k] = elt;
+    else
+      new_bucket->elts [k] = copy_entry (bucket->elts [k]);
+  return new_bucket;
+}
+
+/* Return a new subtrie that has the j-th node replaced.  */
+static struct subtrie *
+replace_entry (struct subtrie *subtrie, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie);
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map;
+  for (int k = 0; k < n; ++k)
+    if (k == j)
+      new_subtrie->nodes [k] = entry;
+    else
+      new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+  return new_subtrie;
+}
+
+/* Return a new subtrie that has an entry labelled i inserted at
+   the j-th position.  */
+static struct subtrie *
+insert_entry (struct subtrie *subtrie, int i, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie) + 1;
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map | (1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k > j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k - 1]);
+      else
+        new_subtrie->nodes [k] = entry;
+    }
+  return new_subtrie;
+}
+
+/* Return a new entry that has the entry labelled i removed from
+   position j.  */
+static Hamt_entry *
+remove_subtrie_entry (struct subtrie *subtrie, int i, int j)
+{
+  int n = trienode_count (subtrie) - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (subtrie->nodes [1]);
+      return copy_entry (subtrie->nodes [0]);
+    }
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map & ~(1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k >= j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k + 1]);
+    }
+  return (Hamt_entry *) new_subtrie;
+}
+
+/* Return a new entry that has the entry at position j removed.  */
+static Hamt_entry *
+remove_bucket_entry (struct bucket *bucket, int j)
+{
+  int n = bucket->elt_count - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (bucket->elts [1]);
+      return copy_entry (bucket->elts [0]);
+    }
+  struct bucket *new_bucket = alloc_bucket (n);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k]);
+      else if (k >= j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k + 1]);
+    }
+  return (Hamt_entry *) new_bucket;
+}
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+/* Create a new, empty hash array mapped trie.  */
+Hamt *
+hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+             Hamt_freer *freer)
+{
+  struct function_table *functions
+    = create_function_table (hasher, comparator, freer);
+  Hamt *hamt = XMALLOC (Hamt);
+  hamt->functions = functions;
+  hamt->root = NULL;
+  return hamt;
+}
+
+/* Return a copy of the hamt.  */
+Hamt *
+hamt_copy (Hamt *hamt)
+{
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = hamt->root == NULL ? NULL : copy_entry (hamt->root);
+  return new_hamt;
+}
+
+/* Free a bucket.  */
+static void
+free_bucket (struct function_table const *functions, struct bucket *bucket)
+{
+  if (dec_ref_counter (&bucket->ref_counter))
+    return;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    free_element (functions, elts [i]);
+  free (bucket);
+}
+
+/* Forward declaration.  */
+static void free_subtrie (struct function_table const *functions,
+                          struct subtrie *subtrie);
+
+/* Free an entry.  */
+static void
+free_entry (struct function_table const *functions, Hamt_entry *entry)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      free_element (functions, entry);
+      break;
+    case subtrie_entry:
+      free_subtrie (functions, (struct subtrie *) entry);
+      break;
+    case bucket_entry:
+      free_bucket (functions, (struct bucket *) entry);
+      break;
+    default:
+      assume (0);
+    }
+}
+
+/* Free a trie recursively.  */
+static void
+free_subtrie (struct function_table const *functions, struct subtrie *subtrie)
+{
+  if (dec_ref_counter (&subtrie->ref_count))
+    return;
+  int n = trienode_count (subtrie);
+  Hamt_entry **node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    free_entry (functions, *node_ptr++);
+  free (subtrie);
+}
+
+/* Free a hamt.  */
+void
+hamt_free (Hamt *hamt)
+{
+  if (hamt->root != NULL)
+    free_entry (hamt->functions, hamt->root);
+  free_function_table (hamt->functions);
+  free (hamt);
+}
+
+/**********/
+/* Lookup */
+/**********/
+
+/* Lookup an element in a bucket.  */
+static const Hamt_entry *
+bucket_lookup (const struct function_table *functions,
+               const struct bucket *bucket, const Hamt_entry *elt)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, elt, elts [i]))
+        return *elts;
+    }
+  return NULL;
+}
+
+/* Forward declaration.  */
+static const Hamt_entry *entry_lookup (const struct function_table *functions,
+                                       const Hamt_entry *entry,
+                                       const Hamt_entry *elt, size_t hash);
+
+/* Lookup an element in a bucket.  */
+static const Hamt_entry *
+subtrie_lookup (const struct function_table *functions,
+                const struct subtrie *subtrie, const Hamt_entry *elt,
+                size_t hash)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+
+  if (! (map & (1 << i)))
+    return NULL;
+
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  return entry_lookup (functions, subtrie->nodes [j], elt, hash >> 5);
+}
+
+/* Lookup an element in an entry.  */
+static const Hamt_entry *
+entry_lookup (const struct function_table *functions, const Hamt_entry *entry,
+              const Hamt_entry *elt, size_t hash)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, elt, entry))
+        return entry;
+      return NULL;
+    case subtrie_entry:
+      return subtrie_lookup (functions, (struct subtrie *) entry, elt, hash);
+    case bucket_entry:
+      return bucket_lookup (functions, (struct bucket *) entry, elt);
+    default:
+      assume (0);
+    }
+}
+
+/* If ELT matches an entry in HAMT, return this entry.  Otherwise,
+   return NULL.  */
+const Hamt_entry *
+hamt_lookup (const Hamt *hamt, const Hamt_entry *elt)
+{
+  if (hamt->root == NULL)
+    return NULL;
+
+  return entry_lookup (hamt->functions, hamt->root, elt,
+                       hash_element (hamt->functions, elt));
+}
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+/* Create a bucket populated with two elements.  */
+static struct bucket *
+create_populated_bucket (Hamt_entry *elt1, Hamt_entry *elt2)
+{
+  struct bucket *bucket = alloc_bucket (2);
+  bucket->elts [0] = elt1;
+  bucket->elts [1] = elt2;
+  return bucket;
+}
+
+/* Create a chain of subtrie nodes so that the resulting trie is
+   populated with exactly two elements.  */
+static Hamt_entry *
+create_populated_subtrie (Hamt_entry *elt1, Hamt_entry *elt2, size_t hash1,
+                          size_t hash2, int depth)
+{
+  if (depth >= MAX_DEPTH)
+    return (Hamt_entry *) create_populated_bucket (elt1, elt2);
+
+  struct subtrie *subtrie;
+  int i1 = hash1 & 31;
+  int i2 = hash2 & 31;
+  if (i1 != i2)
+    {
+      subtrie = alloc_subtrie (2);
+      subtrie->map = (1 << i1) | (1 << i2);
+      if (i1 < i2)
+        {
+          subtrie->nodes [0] = elt1;
+          subtrie->nodes [1] = elt2;
+        }
+      else
+        {
+          subtrie->nodes [0] = elt2;
+          subtrie->nodes [1] = elt1;
+        }
+    }
+  else
+    {
+      subtrie = alloc_subtrie (1);
+      subtrie->map = 1 << i1;
+      subtrie->nodes [0]
+        = create_populated_subtrie (elt1, elt2, hash1 >> 5, hash2 >> 5,
+                                    depth + 1);
+    }
+  return (Hamt_entry *) subtrie;
+}
+
+/* Insert or replace an element in a bucket.  */
+static struct bucket *
+bucket_insert (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr, bool replace, bool shared)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry **elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, elts [i]))
+        {
+          if (replace)
+            {
+              if (shared)
+                {
+                  struct bucket *new_bucket
+                    = replace_bucket_element (bucket, i,
+                                              init_element (*elt_ptr));
+                  *elt_ptr = elts [i];
+                  return new_bucket;
+                }
+              free_element (functions, elts [i]);
+              elts [i] = init_element (*elt_ptr);
+              return bucket;
+            }
+          *elt_ptr = *elt_ptr == elts [i] ? NULL : elts [i];
+          return bucket;
+        }
+    }
+  struct bucket *new_bucket = alloc_bucket (elt_count + 1);
+  new_bucket->elts [0] = init_element (*elt_ptr);
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      new_bucket->elts [i + 1] = copy_entry (bucket->elts [i]);
+    }
+  if (replace)
+    *elt_ptr = NULL;
+  return new_bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_insert (const struct function_table *functions,
+                                 Hamt_entry *subtrie, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth, bool replace,
+                                 bool shared);
+
+/* Insert or replace an element in a subtrie.  */
+static struct subtrie *
+subtrie_insert (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth, bool replace,
+                bool shared)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_insert (functions, entry, elt_ptr, hash >> 5, depth + 1,
+                        replace, shared);
+      if (new_entry != entry)
+        {
+          if (shared)
+            return replace_entry (subtrie, j, new_entry);
+          free_entry (functions, entry);
+          subtrie->nodes [j] = new_entry;
+        }
+      return subtrie;
+    }
+  Hamt_entry *entry = init_element (*elt_ptr);
+  if (replace)
+    *elt_ptr = NULL;
+  return insert_entry (subtrie, i, j, entry);
+}
+
+/* Insert or replace an element in an entry.
+
+   REPLACE is true if we want replace instead of insert semantics.
+   SHARED is false if a destructive update has been requested and none
+   of the parent nodes are shared.  If an entry cannot be inserted
+   because the same entry with respect to pointer equality is already
+   present, *ELT_PTR is set to NULL to mark this special case.  */
+static Hamt_entry *
+entry_insert (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth, bool replace,
+              bool shared)
+{
+  shared |= is_shared (entry);
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          if (replace)
+            {
+              if (shared)
+                {
+                  Hamt_entry *new_entry = init_element (*elt_ptr);
+                  *elt_ptr = entry;
+                  return new_entry;
+                }
+              return init_element (*elt_ptr);
+            }
+          *elt_ptr = *elt_ptr == entry ? NULL : entry;
+          return entry;
+        }
+      Hamt_entry *new_entry = init_element (*elt_ptr);
+      if (replace)
+        *elt_ptr = NULL;
+      return create_populated_subtrie (new_entry, copy_entry (entry), hash,
+                                       (hash_element (functions, entry)
+                                        >> (5 * depth)), depth);
+    case subtrie_entry:
+      return (Hamt_entry *)
+        subtrie_insert (functions, (struct subtrie *) entry, elt_ptr, hash,
+                        depth, replace, shared);
+    case bucket_entry:
+      return (Hamt_entry *)
+        bucket_insert (functions, (struct bucket *) entry, elt_ptr, replace,
+                       shared);
+    default:
+      assume (0);
+    }
+}
+
+/* Insert or replace an element in the root.  */
+static Hamt_entry *
+root_insert (const struct function_table *functions, Hamt_entry *root,
+             Hamt_entry **elt_ptr, bool replace, bool shared)
+{
+  if (root == NULL)
+    return init_element (*elt_ptr);
+
+ return entry_insert (functions, root, elt_ptr,
+                      hash_element (functions, *elt_ptr), 0, replace, shared);
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return HAMT.  Otherwise, insert *ELT_PTR
+   into a copy of the HAMT and return the copy.  */
+Hamt *
+hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *elt = *elt_ptr;
+  Hamt_entry *new_entry = root_insert (hamt->functions, hamt->root,
+                                       elt_ptr, false, true);
+  if (*elt_ptr == NULL)
+    *elt_ptr = elt;
+
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/* Insert *ELT_PTR into a copy of HAMT and return the copy.  If an
+   existing element was replaced, set *ELT_PTR to this element, and to
+   NULL otherwise. */
+Hamt *
+hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = root_insert (hamt->functions, hamt->root, elt_ptr, true,
+                                true);
+  return new_hamt;
+}
+
+/* Delete an element in a bucket if found.  */
+static Hamt_entry *
+bucket_delete (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, elts [i]))
+        {
+          *elt_ptr = elts [i];
+          return remove_bucket_entry (bucket, i);
+        }
+    }
+  *elt_ptr = NULL;
+  return (Hamt_entry *) bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_delete (const struct function_table *functions,
+                                 Hamt_entry *entry, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth, bool shared);
+
+/* Delete an element in a subtrie if found.  */
+static Hamt_entry *
+subtrie_delete (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth, bool shared)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_delete (functions, entry, elt_ptr, hash >> 5, depth + 1,
+                        shared);
+      if (new_entry == NULL)
+        return remove_subtrie_entry (subtrie, i, j);
+      if (new_entry != entry)
+        {
+          if (shared)
+            return (Hamt_entry *) replace_entry (subtrie, j, new_entry);
+          free_entry (functions, entry);
+          subtrie->nodes [j] = new_entry;
+        }
+      return (Hamt_entry *) subtrie;
+    }
+  *elt_ptr = NULL;
+  return (Hamt_entry *) subtrie;
+}
+
+/* Delete an element in an entry if found.
+
+   SHARED is false if a destructive update has been requested and none
+   of the parent nodes are shared.  If an entry cannot be
+   deleted, *ELT_PTR is set to NULL.  */
+static Hamt_entry *
+entry_delete (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth, bool shared)
+{
+  shared |= is_shared (entry);
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          *elt_ptr = entry;
+          return NULL;
+        }
+      *elt_ptr = NULL;
+      return entry;
+    case subtrie_entry:
+      return subtrie_delete (functions, (struct subtrie *) entry, elt_ptr, hash,
+                             depth, shared);
+    case bucket_entry:
+      return bucket_delete (functions, (struct bucket *) entry, elt_ptr);
+    default:
+      assume (0);
+    }
+}
+
+/* Delete an element in the root.  */
+static Hamt_entry *
+root_delete (const struct function_table *functions, Hamt_entry *root,
+             Hamt_entry **elt_ptr, bool shared)
+{
+  if (root == NULL)
+    return NULL;
+
+  return entry_delete (functions, root, elt_ptr,
+                       hash_element (functions, *elt_ptr), 0, shared);
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+element from the table, remove the element from a copy of the hamt and
+return the copy.  Otherwise, return HAMT.  */
+Hamt *
+hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *elt = *elt_ptr;
+  Hamt_entry *new_entry = root_delete (hamt->functions, hamt->root, elt_ptr,
+                                       true);
+  if (*elt_ptr == NULL)
+    *elt_ptr = elt;
+
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/*************/
+/* Iteration */
+/*************/
+
+/* Walk a bucket.  */
+static size_t
+bucket_do_while (const struct bucket *bucket, Hamt_processor *proc, void *data,
+                 bool *success)
+{
+  size_t cnt = 0;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      *success = proc (elts [i], data);
+      if (!success)
+        return cnt;
+      ++cnt;
+    }
+  return cnt;
+}
+
+/* Forward declaration.  */
+static size_t entry_do_while (const Hamt_entry *entry, Hamt_processor *proc,
+                              void *data, bool *success);
+
+/* Walk a subtrie.  */
+static size_t subtrie_do_while (const struct subtrie *subtrie,
+                                Hamt_processor *proc, void *data, bool *success)
+{
+  size_t cnt = 0;
+  int n = trienode_count (subtrie);
+  Hamt_entry *const *node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    {
+      cnt += entry_do_while (*node_ptr++, proc, data, success);
+      if (!success)
+        return cnt;
+    }
+  return cnt;
+}
+
+/* Walk an entry.  */
+static size_t
+entry_do_while (const Hamt_entry *entry, Hamt_processor *proc, void *data,
+                bool *success)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      *success = proc (entry, data);
+      return *success ? 1 : 0;
+    case subtrie_entry:
+      return subtrie_do_while ((struct subtrie *) entry, proc, data, success);
+    case bucket_entry:
+      return bucket_do_while ((struct bucket *) entry, proc, data, success);
+    default:
+      assume (0);
+    }
+}
+
+/* Call PROC for every entry of the hamt until it returns false.  The
+   first argument of PROC is the entry, the second argument is the value
+   of DATA as received.  Return the number of calls that returned
+   true.  */
+size_t
+hamt_do_while (const Hamt *hamt, Hamt_processor *proc, void *data)
+{
+  if (hamt->root == NULL)
+    return 0;
+
+  bool success = true;
+  return entry_do_while (hamt->root, proc, data, &success);
+}
+
+/***********************/
+/* Destructive Updates */
+/***********************/
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return false.  Otherwise, insert *ELT_PTR
+   destructively into the hamt and return true.  */
+bool
+hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *elt = *elt_ptr;
+  Hamt_entry *old_root = hamt->root;
+  hamt->root = root_insert (hamt->functions, old_root, elt_ptr, false, false);
+  if (old_root != hamt->root && old_root != NULL)
+    free_entry (hamt->functions, old_root);
+  if (*elt_ptr == NULL)
+    {
+      *elt_ptr = elt;
+      return false;
+    }
+  return *elt_ptr == elt;
+}
+
+/* Insert ELT destructively into HAMT.  If an existing element was
+   replaced, return true.  Otherwise, return false.  */
+bool
+hamt_replace_x (Hamt *hamt, Hamt_entry *elt)
+{
+  Hamt_entry *old_root = hamt->root;
+  hamt->root = root_insert (hamt->functions, old_root, &elt, true, false);
+  if (old_root != hamt->root && old_root != NULL)
+    free_entry (hamt->functions, old_root);
+  return elt != NULL;
+}
+
+/* If ELT matches an element already in HAMT, remove the element
+   destructively from the hamt and return true.  Otherwise, return
+   false.  */
+bool
+hamt_delete_x (Hamt *hamt, Hamt_entry *elt)
+{
+  Hamt_entry *old_root = hamt->root;
+  hamt->root = root_delete (hamt->functions, old_root, &elt, false);
+  if (old_root != hamt->root)
+    free_entry (hamt->functions, old_root);
+  return elt != NULL;
+}
diff --git a/lib/hamt.h b/lib/hamt.h
new file mode 100644
index 000000000..c37314f55
--- /dev/null
+++ b/lib/hamt.h
@@ -0,0 +1,182 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020. */
+
+/* This module provides a persistent version of hash array mapped
+   tries (hamts) that can be used in place of hash tables when pure
+   (functional) operations are needed.
+
+   A hash function and an equivalence predicate has to be provided for
+   the elements that can be inserted, replaced and deleted in a hamt.
+   A hamt cannot contain duplicates that compare equal.
+
+   Each non-destructive updating operation returns a new hamt, which
+   shares structure with the original one.  Destructive updates only
+   effect the hamt, on which the destructive operation is applied.
+   For example, given a hamt HAMT1, any non-destructive update
+   operation (e.g. hamt_insert) will result in a new hamt HAMT2.
+   Whatever further operations (destructive or not, including freeing
+   a hamt) are applied to HAMT1 won't change HAMT2 and vice versa.  To
+   free all the memory, hash_free has therefore to be applied to both
+   HAMT1 and HAMT2.
+
+   If persistence is not needed, transient hash tables are probably
+   faster.
+
+   See also: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience
+   Department, École Polytechnique Fédérale de Lausanne.
+
+   http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf  */
+
+#ifndef _GL_HAMT_H
+#define _GL_HAMT_H
+
+/* The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
+   is thread-safe as long as two threads do not simultaneously access
+   the same hamt.  This is non-trivial as different hamts may share
+   some structure.  */
+#if __STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__
+# define GL_HAMT_THREAD_SAFE 0
+#else
+# define GL_HAMT_THREAD_SAFE 1
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* A hamt stores pointers to elements.  Each element has to be a
+   struct whose initial member is of the type Hamt_entry.  An element
+   is conceptually owned by a hamt as soon as it is inserted.  It will
+   be automatically freed as soon as the last hamt containing it is
+   freed.  */
+typedef struct
+{
+#ifdef GL_HAMT_THREAD_SAFE
+  _Atomic
+#endif
+  size_t ref_count;
+} Hamt_entry;
+
+/*************************/
+/* Opaque Hamt Structure */
+/*************************/
+
+/* In user-code, hamts are accessed through pointers to the opaque
+   Hamt type.  Two hamts are said to be the same if and only if their
+   pointers are equal. */
+typedef struct hamt Hamt;
+
+/******************/
+/* Function Types */
+/******************/
+
+/* A hash function has to be pure, and two elements that compare equal
+   have to have the same hash value.  For a hash function to be a good
+   one, it is important that it uses all SIZE_WIDTH bits in
+   size_t.  */
+typedef size_t (Hamt_hasher) (const Hamt_entry *elt);
+
+/* A comparision function has to be pure, and two elements that have
+   equal pointers have to compare equal.  */
+typedef bool (Hamt_comparator) (const Hamt_entry *elt1, const Hamt_entry *elt2);
+
+/* A user-defined function that is called when the last hamt
+   containing a particular element is freed.  */
+typedef void (Hamt_freer) (Hamt_entry *elt);
+
+/* A processor function is called during walking of a hamt.  */
+typedef bool (Hamt_processor) (const Hamt_entry *elt, void *data);
+
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+/* Create and return a new and empty hash array mapped trie.  */
+extern Hamt *hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                          Hamt_freer *freer)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/* Return a copy of HAMT, which is not the same in the sense above.
+   This procedure can be used, for example, so that two threads can
+   access the same data independently.  */
+extern Hamt *hamt_copy (Hamt *hamt) _GL_ATTRIBUTE_NODISCARD;
+
+/* Free the resources solely allocated by HAMT and all elements solely
+   contained in it.  */
+extern void hamt_free (Hamt *hamt);
+
+/**********/
+/* Lookup */
+/**********/
+
+/* If ELT matches an entry in HAMT, return this entry.  Otherwise,
+   return NULL.  */
+extern const Hamt_entry *hamt_lookup (const Hamt *hamt, const Hamt_entry *elt);
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   existing element and return the original hamt.  Otherwise, insert
+   *ELT_PTR into a copy of the hamt and return the copy.  */
+extern Hamt *hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+existing element, remove the element from a copy of the hamt and
+return the copy.  Otherwise, return the original hamt.  */
+extern Hamt *hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/* Insert *ELT_PTR into a copy of HAMT and return the copy.  If an
+   existing element was replaced, set *ELT_PTR to this element, and to
+   NULL otherwise.  */
+extern Hamt *hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/*************/
+/* Iteration */
+/*************/
+
+/* Call PROC for every entry of the hamt until it returns false.  The
+   first argument to the processor is the entry, the second argument
+   is the value of DATA as received.  Return the number of calls that
+   returned true.  */
+extern size_t hamt_do_while (const Hamt *hamt, Hamt_processor *proc,
+                             void *data);
+
+/***********************/
+/* Destructive Updates */
+/***********************/
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return false.  Otherwise, insert *ELT_PTR
+   destructively into the hamt and return true.  */
+extern bool hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr);
+
+/* Insert ELT destructively into HAMT.  If an existing element was
+   replaced, return true.  Otherwise, return false.  */
+extern bool hamt_replace_x (Hamt *hamt, Hamt_entry *elt);
+
+/* If ELT matches an element already in HAMT, remove the element
+   destructively from the hamt and return true.  Otherwise, return
+   false.  */
+extern bool hamt_delete_x (Hamt *hamt, Hamt_entry *elt);
+
+#endif /* _GL_HAMT_H */
diff --git a/modules/hamt b/modules/hamt
new file mode 100644
index 000000000..d73f09c2d
--- /dev/null
+++ b/modules/hamt
@@ -0,0 +1,29 @@
+Description:
+Persistent hash array mapped tries.
+
+Files:
+lib/hamt.h
+lib/hamt.c
+
+Depends-on:
+count-one-bits
+flexmember
+inttypes-incomplete
+stdbool
+stdint
+verify
+xalloc
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += hamt.c
+
+Include:
+"hamt.h"
+
+License:
+GPL
+
+Maintainer:
+Marc Nieper-Wisskirchen
diff --git a/modules/hamt-tests b/modules/hamt-tests
new file mode 100644
index 000000000..f4f0ea4e0
--- /dev/null
+++ b/modules/hamt-tests
@@ -0,0 +1,11 @@
+Files:
+tests/test-hamt.c
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-hamt
+check_PROGRAMS += test-hamt
diff --git a/tests/test-hamt.c b/tests/test-hamt.c
new file mode 100644
index 000000000..9c068c404
--- /dev/null
+++ b/tests/test-hamt.c
@@ -0,0 +1,352 @@
+/* Test of persistent hash array mapped trie implementation.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.  */
+
+#include <config.h>
+
+#include "hamt.h"
+#include "macros.h"
+#include "xalloc.h"
+
+typedef struct
+{
+  Hamt_entry entry;
+  int val;
+} Element;
+
+static int
+entry_value (const Hamt_entry *elt)
+{
+  return ((Element *) elt)->val;
+}
+
+static size_t
+hash_element (const Hamt_entry *elt)
+{
+  return entry_value (elt) & ~3; /* We drop the last bits so that we
+                                    can test hash collisions. */
+}
+
+static bool
+compare_element (const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return entry_value (elt1) == entry_value (elt2);
+}
+
+static void
+free_element (Hamt_entry *elt)
+{
+  free (elt);
+}
+
+static Hamt_entry *
+make_element (int n)
+{
+  Element *elt = XMALLOC (Element);
+  elt->val = n;
+  return &elt->entry;
+}
+
+static Hamt *
+test_hamt_create (void)
+{
+  return hamt_create (hash_element, compare_element, free_element);
+}
+
+
+static int sum = 0;
+static int flag;
+
+static bool
+proc (const Hamt_entry *elt, void *data)
+{
+  if (data == &flag)
+    {
+      sum += entry_value (elt);
+      return true;
+    }
+  if (sum > 0)
+    {
+      sum = 0;
+      return true;
+    }
+  return false;
+}
+
+static void
+test_general (void)
+{
+  Hamt *hamt = test_hamt_create ();
+
+  Hamt_entry *x5 = make_element (5);
+  Hamt_entry *p = x5;
+  Hamt *hamt1 = hamt_insert (hamt, &p);
+  ASSERT (hamt1 != hamt);
+  ASSERT (hamt_lookup (hamt, x5) == NULL);
+  ASSERT (hamt_lookup (hamt1, x5) == x5);
+  hamt_free (hamt);
+
+  Hamt_entry *y5 = make_element (5);
+  p = y5;
+  Hamt *hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 == hamt1);
+  ASSERT (p == x5);
+  ASSERT (hamt_lookup (hamt1, y5) == x5);
+
+  p = y5;
+  hamt = hamt_replace (hamt1, &p);
+  ASSERT (p == x5);
+  ASSERT (hamt_lookup (hamt, y5) == y5);
+  hamt_free (hamt);
+  y5 = make_element (5);
+
+  Hamt_entry *z37 = make_element (37);
+  p = z37;
+  hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 != hamt1);
+  ASSERT (p == z37);
+  ASSERT (hamt_lookup (hamt1, z37) == NULL);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+  hamt_free (hamt1);
+
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 2);
+  ASSERT (sum == 42);
+  ASSERT (hamt_do_while (hamt2, proc, NULL) == 1);
+  ASSERT (sum == 0);
+
+  p = y5;
+  hamt1 = hamt_delete (hamt2, &p);
+  ASSERT (hamt1 != hamt2);
+  ASSERT (p == x5);
+
+  ASSERT (hamt_lookup (hamt1, x5) == NULL);
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+
+  hamt_free (hamt1);
+  Hamt_entry *x4 = make_element (4);
+  hamt1 = hamt_insert (hamt2, &x4);
+  hamt_free (hamt2);
+  Hamt_entry *x6 = make_element (6);
+  hamt2 = hamt_insert (hamt1, &x6);
+  hamt_free (hamt1);
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum == 52);
+
+  hamt1 = hamt_delete (hamt2, &x4);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum = 52);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt1, proc, &flag) == 3);
+  ASSERT (sum  = 48);
+
+  hamt_free (hamt1);
+  hamt_free (hamt2);
+  free_element (y5);
+}
+
+static bool
+true_processor (_GL_ATTRIBUTE_MAYBE_UNUSED const Hamt_entry *elt,
+                _GL_ATTRIBUTE_MAYBE_UNUSED void *data)
+{
+  return true;
+}
+
+static size_t
+element_count (Hamt *hamt)
+{
+  return hamt_do_while (hamt, true_processor, NULL);
+}
+
+struct find_values_context
+{
+  size_t n;
+  int *elts;
+  bool *found;
+};
+
+static bool
+find_values_processor (const Hamt_entry *entry, void *data)
+{
+  struct find_values_context *ctx = data;
+  int val = entry_value (entry);
+  for (size_t i = 0; i < ctx->n; ++i)
+    if (ctx->elts [i] == val && !ctx->found [i])
+      {
+        ctx->found [i] = true;
+        return true;
+      }
+  return false;
+}
+
+static bool
+find_values (Hamt *hamt, size_t n, int *elts)
+{
+  bool *found = XCALLOC (n, bool);
+  struct find_values_context ctx = {n, elts, found};
+  bool res = hamt_do_while (hamt, find_values_processor, &ctx) == n;
+  free (found);
+  return res;
+}
+
+static size_t
+insert_values (Hamt **hamt, size_t n, int *elts, bool destructive)
+{
+  size_t cnt = 0;
+  for (size_t i = 0; i < n; ++i)
+    {
+      Hamt_entry *p = make_element (elts [i]);
+      Hamt_entry *q = p;
+      if (destructive)
+        {
+          if (hamt_insert_x (*hamt, &p))
+            ++cnt;
+          else
+            free_element (q);
+        }
+      else
+        {
+          Hamt *new_hamt = hamt_insert (*hamt, &p);
+          if (new_hamt != *hamt)
+            {
+              hamt_free (*hamt);
+              *hamt = new_hamt;
+              ++cnt;
+            }
+          else
+            {
+              free_element (q);
+            }
+        }
+    }
+  return cnt;
+}
+
+static size_t
+replace_values (Hamt **hamt, size_t n, int *elts, bool destructive)
+{
+  size_t cnt = 0;
+  for (size_t i = 0; i < n; ++i)
+    {
+      Hamt_entry *p = make_element (elts [i]);
+      if (destructive)
+        {
+          if (hamt_replace_x (*hamt, p))
+            ++cnt;
+        }
+      else
+        {
+          Hamt *new_hamt = hamt_replace (*hamt, &p);
+          hamt_free (*hamt);
+          *hamt = new_hamt;
+          if (p != NULL)
+            ++cnt;
+        }
+    }
+  return cnt;
+}
+
+static size_t
+delete_values (Hamt **hamt, size_t n, int *elts, bool destructive)
+{
+  size_t cnt = 0;
+  for (size_t i = 0; i < n; ++i)
+    {
+      Hamt_entry *p = make_element (elts [i]);
+      Hamt_entry *q = p;
+      if (destructive)
+        {
+          if (hamt_delete_x (*hamt, p))
+            ++cnt;
+        }
+      else
+        {
+          Hamt *new_hamt = hamt_delete (*hamt, &p);
+          if (new_hamt != *hamt)
+            {
+              hamt_free (*hamt);
+              *hamt = new_hamt;
+              ++cnt;
+            }
+        }
+      free (q);
+    }
+  return cnt;
+}
+
+static int val_array1 [10] = {1, 2, 3, 4, 33, 34, 35, 36, 1024, 1025};
+static int val_array2 [10] = {1, 2, 34, 36, 1025, 32768, 32769, 32770, 32771,
+                              32772};
+
+static void
+test_functional_update (void)
+{
+  Hamt *hamt = test_hamt_create ();
+
+  ASSERT (insert_values (&hamt, 10, val_array1, false) == 10);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (insert_values (&hamt, 10, val_array2, false) == 5);
+  ASSERT (element_count (hamt) == 15);
+  ASSERT (delete_values (&hamt, 10, val_array1, false) == 10);
+  ASSERT (element_count (hamt) == 5);
+  ASSERT (delete_values (&hamt, 10, val_array2, false) == 5);
+  ASSERT (element_count (hamt) == 0);
+
+  ASSERT (replace_values (&hamt, 10, val_array1, false) == 0);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (replace_values (&hamt, 10, val_array2, false) == 5);
+  ASSERT (element_count (hamt) == 15);
+
+  hamt_free (hamt);
+}
+
+static void
+test_destructive_update (void)
+{
+  Hamt *hamt = test_hamt_create ();
+
+  ASSERT (insert_values (&hamt, 10, val_array1, true) == 10);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (insert_values (&hamt, 10, val_array2, true) == 5);
+  ASSERT (element_count (hamt) == 15);
+  ASSERT (delete_values (&hamt, 10, val_array1, true) == 10);
+  ASSERT (element_count (hamt) == 5);
+  ASSERT (delete_values (&hamt, 10, val_array2, true) == 5);
+  ASSERT (element_count (hamt) == 0);
+
+  ASSERT (replace_values (&hamt, 10, val_array1, true) == 0);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (replace_values (&hamt, 10, val_array2, true) == 5);
+  ASSERT (element_count (hamt) == 15);
+
+  hamt_free (hamt);
+}
+
+int
+main (void)
+{
+  test_general ();
+  test_functional_update ();
+  test_destructive_update ();
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* Re: _Atomic
  2020-10-10 18:19       ` Paul Eggert
  2020-10-10 21:24         ` Marc Nieper-Wißkirchen
@ 2020-10-10 22:39         ` Bruno Haible
  2020-10-11 20:15           ` _Atomic Paul Eggert
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-10 22:39 UTC (permalink / raw)
  To: bug-gnulib; +Cc: Marc Nieper-Wißkirchen, Paul Eggert

Paul Eggert wrote:
> On 10/10/20 8:04 AM, Marc Nieper-Wißkirchen wrote:
> > #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L &&
> > !defined (__STD_NO_ATOMICS__)
> > 
> > I am asking because there may be non-C11 compilers that nevertheless
> > understand _Atomic.
> 
> I suggest not worrying about this problem until we run into it.

GCC 4.9.x is such a compiler that is non-C11 but supports _Atomic.

With this test program

==================================================================
#include <stdio.h>

#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L \
  && !defined (__STD_NO_ATOMICS__)
# define GL_HAMT_THREAD_SAFE 1
#else
# define GL_HAMT_THREAD_SAFE 0
#endif

_Atomic int x;

void increment_x (void)
{
  x++;
}

int main (void)
{
  printf ("GL_HAMT_THREAD_SAFE=%d\n", GL_HAMT_THREAD_SAFE);
  return 0;
}
==================================================================

the results on the various platforms are as follows:

                   GL_HAMT_THREAD_SAFE      _Atomic compiles   _Atomic works
  GCC 4.8                 0                       0                  0
  GCC 4.9                 0                       1                  1
  GCC 5                   1                       1                  1
  GCC 6                   1                       1                  1
  GCC 7                   1                       1                  1
  GCC 8                   1                       1                  1
  GCC 9                   1                       1                  1
  GCC 10                  1                       1                  1
  macOS 10.13             1                       1                  1
  FreeBSD 12              1                       1                  1
  NetBSD 9                1                       1                  1
  OpenBSD 6.7 cc          1                       1                  1
  OpenBSD 6.7 gcc         0                       0                  0
  AIX 7.1 xlc             0                       0                  0
  Solaris 10 cc           0                       0                  0
  Solaris 11.3 cc 12.4    0                       0                  0
  Solaris 11.3 cc 12.5    1                       1                  1
  Solaris 11.3 cc 12.6    1                       1                  1
  MSVC 14                 0                       0                  0

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-10 21:46           ` Marc Nieper-Wißkirchen
@ 2020-10-11  1:28             ` Bruno Haible
  2020-10-11  8:20               ` Marc Nieper-Wißkirchen
  2020-10-11 14:14             ` terminology Bruno Haible
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11  1:28 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen, bug-gnulib

Hi Marc,

Some comments:

* GL_HAMT_THREAD_SAFE can be defined to 1 not only if
    __STDC_VERSION__ >= 201112 && ! defined __STD_NO_ATOMICS__
  but also if
    __GNUC__ + (__GNUC_MINOR >= 9) > 4
  (see the other mail).

* Still '#ifdef GL_HAMT_THREAD_SAFE'.

* Typo s/comparision/comparison/

* Hamt_processor: The comment does not say what the return value means. Maybe it
  would be clearer if this type was moved down to the "Iteration" section.

* hamt_lookup: If the caller is allowed to modify the payload stored in the
  returned entry, this function should return a 'Hamt_entry *', not a
  'const Hamt_entry *'. Just like 'strchr' and 'strstr' return a 'char *',
  not a 'const char *'.

* In trienode_count, you don't need count_one_bits_l, only count_one_bits.
  It's OK to assume that 'uint32_t' and 'unsigned int' are the same type.

* If subtrie_lookup was inlined in entry_lookup, you could get rid of the
  forward declaration of entry_lookup, and compilers would be more likely
  to turn the recursion of entry_lookup into a loop (tail-recursion
  elimination).

* If subtrie_do_while was inlined in entry_do_while, you could get rid of the
  forward declaration of entry_do_while.

* It would be useful to be able to construct modules 'hamt-set' and 'hamt-map',
  analogous to 'hash-set' and 'hash-map', but the hamt API currently has two
  issues that block it:

  1) The iterator API is such that you cannot write the iteration using a loop;
     it involves a function call that is active during the entire iteration.
     This is also problematic for two other reasons:

       - Creating a closure in C (unlike Scheme or Lisp) is tedious hand-work.
         (The GNU C extension that allows local functions inside functions [1]
         is not a solution, because
           - it is unportable,
           - it forces an executable stack on many platforms, which is
             considered bad for security.)

       - In Common Lisp, iteration through a hash table is available not only
         through a function-call interface (MAPHASH [2]) but also through an
         iteration-loop interface (WITH-HASH-TABLE-ITERATOR [3], LOOP [4]).

         For example, in GNU clisp, LOOP expands to

         [1]> (macroexpand-1 '(loop for x being each hash-key of h do (foo x)))
         (MACROLET ((LOOP-FINISH NIL (SYSTEM::LOOP-FINISH-ERROR)))
          (BLOCK NIL
           (LET ((#:WHTI-3214 (SYSTEM::HASH-TABLE-ITERATOR H)))
            (MULTIPLE-VALUE-BIND (#:MORE?3215 #:HASH-KEY-3216 #:HASH-VALUE-3217)
             (SYSTEM::HASH-TABLE-ITERATE #:WHTI-3214)
             (DECLARE (IGNORE #:HASH-VALUE-3217))
             (LET ((X NIL))
              (LET NIL
               (MACROLET ((LOOP-FINISH NIL '(GO SYSTEM::END-LOOP)))
                (TAGBODY SYSTEM::BEGIN-LOOP (UNLESS #:MORE?3215 (LOOP-FINISH))
                 (SETQ X #:HASH-KEY-3216) (PROGN (PROGN (FOO X)))
                 (MULTIPLE-VALUE-SETQ
                  (#:MORE?3215 #:HASH-KEY-3216 #:HASH-VALUE-3217)
                  (SYSTEM::HASH-TABLE-ITERATE #:WHTI-3214))
                 (GO SYSTEM::BEGIN-LOOP) SYSTEM::END-LOOP
                 (MACROLET
                  ((LOOP-FINISH NIL (SYSTEM::LOOP-FINISH-WARN)
                    '(GO SYSTEM::END-LOOP)))))))))))) ;

         You can see that there is an initial function call HASH-TABLE-ITERATOR
         upfront, and then a function call to HASH-TABLE-ITERATE in each round
         of the loop.

         This principle makes it possible to iterate e.g. through a hash table
         and a list in parallel.

  2) We have a dilemma regarding use of malloc vs. xmalloc. While modules
     meant for use in programs can readily use xmalloc, modules meant for use
     (also) in libraries cannot use xmalloc, since a library is not supposed
     to terminate the program, even when memory is tight.

     The solution we found for this dilemma is that the *-set and *-map modules
     use just malloc, and we have thin wrapper modules (xset, xmap) that do
     call xalloc_die() in case of out-of-memory.

     Unfortunately, the API of many of the functions need to be adjusted to
     cope with the possibility of an ENOMEM failure. That is tedious work, and
     the code does not look so pretty afterwards... But I see no other way to
     make it fit for use in libraries.

Bruno

[1] https://gcc.gnu.org/onlinedocs/gcc-10.2.0/gcc/Nested-Functions.html
[2] http://www.ai.mit.edu/projects/iiip/doc/CommonLISP/HyperSpec/Body/fun_maphash.html
[3] http://www.ai.mit.edu/projects/iiip/doc/CommonLISP/HyperSpec/Body/mac_with-hash_ble-iterator.html
[4] http://www.ai.mit.edu/projects/iiip/doc/CommonLISP/HyperSpec/Body/sec_6-1-2-1-6.html



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-11  1:28             ` Bruno Haible
@ 2020-10-11  8:20               ` Marc Nieper-Wißkirchen
  2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
                                   ` (3 more replies)
  0 siblings, 4 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11  8:20 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 03:28 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Hi Marc,
>
> Some comments:
>
> * GL_HAMT_THREAD_SAFE can be defined to 1 not only if
>     __STDC_VERSION__ >= 201112 && ! defined __STD_NO_ATOMICS__
>   but also if
>     __GNUC__ + (__GNUC_MINOR >= 9) > 4

Fixed.

>   (see the other mail).
>
> * Still '#ifdef GL_HAMT_THREAD_SAFE'.

Fixed.

> * Typo s/comparision/comparison/

Fixed.

> * Hamt_processor: The comment does not say what the return value means. Maybe it
>   would be clearer if this type was moved down to the "Iteration" section.

Moved.

> * hamt_lookup: If the caller is allowed to modify the payload stored in the
>   returned entry, this function should return a 'Hamt_entry *', not a
>   'const Hamt_entry *'. Just like 'strchr' and 'strstr' return a 'char *',
>   not a 'const char *'.

Unless the caller knows what it does, modifying the payload is not a
good idea because the entry is shared between different hamts. If the
caller really wants to modify the payload, it has to do an explicit
type cast (which is safe).

> * In trienode_count, you don't need count_one_bits_l, only count_one_bits.
>   It's OK to assume that 'uint32_t' and 'unsigned int' are the same type.

Okay.

> * If subtrie_lookup was inlined in entry_lookup, you could get rid of the
>   forward declaration of entry_lookup, and compilers would be more likely
>   to turn the recursion of entry_lookup into a loop (tail-recursion
>   elimination).

I would prefer to have smaller functions. Are there any relevant
compilers who do not inline the function at higher recursion levels?

> * If subtrie_do_while was inlined in entry_do_while, you could get rid of the
>   forward declaration of entry_do_while.

> * It would be useful to be able to construct modules 'hamt-set' and 'hamt-map',
>   analogous to 'hash-set' and 'hash-map', but the hamt API currently has two
>   issues that block it:
>
>   1) The iterator API is such that you cannot write the iteration using a loop;
>      it involves a function call that is active during the entire iteration.
>      This is also problematic for two other reasons:
>
>        - Creating a closure in C (unlike Scheme or Lisp) is tedious hand-work.
>          (The GNU C extension that allows local functions inside functions [1]
>          is not a solution, because
>            - it is unportable,
>            - it forces an executable stack on many platforms, which is
>              considered bad for security.)
>
>        - In Common Lisp, iteration through a hash table is available not only
>          through a function-call interface (MAPHASH [2]) but also through an
>          iteration-loop interface (WITH-HASH-TABLE-ITERATOR [3], LOOP [4]).
>
>          For example, in GNU clisp, LOOP expands to
>
>          [1]> (macroexpand-1 '(loop for x being each hash-key of h do (foo x)))
>          (MACROLET ((LOOP-FINISH NIL (SYSTEM::LOOP-FINISH-ERROR)))
>           (BLOCK NIL
>            (LET ((#:WHTI-3214 (SYSTEM::HASH-TABLE-ITERATOR H)))
>             (MULTIPLE-VALUE-BIND (#:MORE?3215 #:HASH-KEY-3216 #:HASH-VALUE-3217)
>              (SYSTEM::HASH-TABLE-ITERATE #:WHTI-3214)
>              (DECLARE (IGNORE #:HASH-VALUE-3217))
>              (LET ((X NIL))
>               (LET NIL
>                (MACROLET ((LOOP-FINISH NIL '(GO SYSTEM::END-LOOP)))
>                 (TAGBODY SYSTEM::BEGIN-LOOP (UNLESS #:MORE?3215 (LOOP-FINISH))
>                  (SETQ X #:HASH-KEY-3216) (PROGN (PROGN (FOO X)))
>                  (MULTIPLE-VALUE-SETQ
>                   (#:MORE?3215 #:HASH-KEY-3216 #:HASH-VALUE-3217)
>                   (SYSTEM::HASH-TABLE-ITERATE #:WHTI-3214))
>                  (GO SYSTEM::BEGIN-LOOP) SYSTEM::END-LOOP
>                  (MACROLET
>                   ((LOOP-FINISH NIL (SYSTEM::LOOP-FINISH-WARN)
>                     '(GO SYSTEM::END-LOOP)))))))))))) ;
>
>          You can see that there is an initial function call HASH-TABLE-ITERATOR
>          upfront, and then a function call to HASH-TABLE-ITERATE in each round
>          of the loop.
>
>          This principle makes it possible to iterate e.g. through a hash table
>          and a list in parallel.

In Scheme, it is even more relevant because you have call/cc so any
iteration can be suspended and restored any number of times.

I thought this could be solved by storing a "next" pointer in the
payload but this doesn't work with hamts sharing entries... So I will
add some iterator interface as well. Preferably one that can also be
used to implement Hamts in Scheme.

>   2) We have a dilemma regarding use of malloc vs. xmalloc. While modules
>      meant for use in programs can readily use xmalloc, modules meant for use
>      (also) in libraries cannot use xmalloc, since a library is not supposed
>      to terminate the program, even when memory is tight.

GMP more or less has this behavior.

>      The solution we found for this dilemma is that the *-set and *-map modules
>      use just malloc, and we have thin wrapper modules (xset, xmap) that do
>      call xalloc_die() in case of out-of-memory.
>
>      Unfortunately, the API of many of the functions need to be adjusted to
>      cope with the possibility of an ENOMEM failure. That is tedious work, and
>      the code does not look so pretty afterwards... But I see no other way to
>      make it fit for use in libraries.

The bigger problem is that it mustn't make the code slower for the
99,99999% of the use cases where there is either enough memory or
calling xalloc_die is the only reasonable action.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-11  8:20               ` Marc Nieper-Wißkirchen
@ 2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
  2020-10-11 11:02                   ` HAMT iterator Bruno Haible
  2020-10-11 10:29                 ` HAMT iterators Bruno Haible
                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11  9:43 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib, Bruno Haible

I am going to implement the following iterator API as well:

/* An alternative interface to iterating through the entry of a hamt
   that does not make use of a higher-order function like
   hamt_do_while uses the opaque Hamt_iterator type.  */
typedef struct hamt_iterator Hamt_iterator;

/* Return a newly created iterator for HAMT.  */
extern Hamt_iterator *hamt_iterator_create (const Hamt *hamt);

/* Free the iterator ITER created through hamt_iterator_create or
   hamt_iterator_copy.  */
extern void hamt_iterator_free (Hamt_iterator *iter);

/* Return an independent copy of ITER that is initially in the same
   state.  */
extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);

/* Return true if ITER is not at the end, place the current element in
   *ELT_PTR and move the iterator forward.  Otherwise, return
   *false.  */
extern bool hamt_iterator_next (Hamt_iterator *iter,
                                Hamt_entry *const *elt_ptr);

Am So., 11. Okt. 2020 um 10:20 Uhr schrieb Marc Nieper-Wißkirchen
<marc.nieper+gnu@gmail.com>:

> Am So., 11. Okt. 2020 um 03:28 Uhr schrieb Bruno Haible <bruno@clisp.org>:

> >        - Creating a closure in C (unlike Scheme or Lisp) is tedious hand-work.
> >          (The GNU C extension that allows local functions inside functions [1]
> >          is not a solution, because
> >            - it is unportable,
> >            - it forces an executable stack on many platforms, which is
> >              considered bad for security.)

PS Conceptually, hamt_do_while takes a closure, which consists of the
function pointer PROC and the closure environment DATA.

If this interface is sufficient for an application, it is more
lightweight than the alternative interface above because all
intermediate state is stored on the stack.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterators
  2020-10-11  8:20               ` Marc Nieper-Wißkirchen
  2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
@ 2020-10-11 10:29                 ` Bruno Haible
  2020-10-11 12:44                   ` Marc Nieper-Wißkirchen
  2020-10-11 10:53                 ` out-of-memory handling Bruno Haible
  2020-10-11 17:32                 ` [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
  3 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 10:29 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> >          You can see that there is an initial function call HASH-TABLE-ITERATOR
> >          upfront, and then a function call to HASH-TABLE-ITERATE in each round
> >          of the loop.
> >
> >          This principle makes it possible to iterate e.g. through a hash table
> >          and a list in parallel.
> 
> In Scheme, it is even more relevant because you have call/cc so any
> iteration can be suspended and restored any number of times.
> 
> I thought this could be solved by storing a "next" pointer in the
> payload but this doesn't work with hamts sharing entries... So I will
> add some iterator interface as well. Preferably one that can also be
> used to implement Hamts in Scheme.

The recursion across hamt_do_while / entry_do_while / subtrie_do_while /
bucket_do_while builds up a stack whose essential contents is a set of
indices, each at most 5 bits large, with a total of SIZE_WIDTH bits.
In other words, the iterator object can be a

struct
  {
    const Hamt *hamt;
    size_t position;
    int depth;
    const Hamt_entry *entry;
  };

where during the iteration
  - hamt stays unmodified,
  - position is incremented by some amount at each round,
  - depth is the recursion depth.
  - entry is a cache of hamt->nodes[(position >> 59) & 31][(position >> 54) & 31]...
    with depth-1 indexing operations, and is changed at each round,

HASH-TABLE-ITERATOR sets position, depth, entry to point to the first entry.
HASH-TABLE-ITERATE works as follows:
  - in a bucket, it increments position and fetches the next entry of the bucket,
  - if the bucket is done, it decreases depth and goes to
    hamt->nodes[(position >> 59) & 31][(position >> 54) & 31]... (with depth-1
    indexing operations) until it finds a subtrie that is not yet done,
    then it increases depth, entry to point to the first entry in that subtrie.

If done this way, the iterator will fit into the gl_set_iterator_t and
gl_map_iterator_t that we already have in gl_set.h and gl_map.h.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: out-of-memory handling
  2020-10-11  8:20               ` Marc Nieper-Wißkirchen
  2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
  2020-10-11 10:29                 ` HAMT iterators Bruno Haible
@ 2020-10-11 10:53                 ` Bruno Haible
  2020-10-11 11:07                   ` Marc Nieper-Wißkirchen
  2020-10-11 17:32                 ` [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
  3 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 10:53 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> >   2) We have a dilemma regarding use of malloc vs. xmalloc. While modules
> >      meant for use in programs can readily use xmalloc, modules meant for use
> >      (also) in libraries cannot use xmalloc, since a library is not supposed
> >      to terminate the program, even when memory is tight.
> 
> GMP more or less has this behavior.

Indeed, GMP's out-of-memory options [1] are poor in this respect:
  "There’s currently no defined way for the allocation functions to recover
   from an error such as out of memory, they must terminate program execution.
   A longjmp or throwing a C++ exception will have undefined results."
Likewise for libffcall, alas.

> >      The solution we found for this dilemma is that the *-set and *-map modules
> >      use just malloc, and we have thin wrapper modules (xset, xmap) that do
> >      call xalloc_die() in case of out-of-memory.
> >
> >      Unfortunately, the API of many of the functions need to be adjusted to
> >      cope with the possibility of an ENOMEM failure. That is tedious work, and
> >      the code does not look so pretty afterwards... But I see no other way to
> >      make it fit for use in libraries.
> 
> The bigger problem is that it mustn't make the code slower for the
> 99,99999% of the use cases where there is either enough memory or
> calling xalloc_die is the only reasonable action.

OK. When we add hamt as a possible implementation for gl_set and gl_map, we
could document that out-of-memory handling must be done
  - either through xalloc_die(),
  - or by defining xmalloc and xrealloc [this is what Emacs does],
unlike for the other implementations.

Bruno

[1] https://gmplib.org/manual/Custom-Allocation



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
@ 2020-10-11 11:02                   ` Bruno Haible
  2020-10-11 11:08                     ` Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 11:02 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> I am going to implement the following iterator API as well:
> 
> /* An alternative interface to iterating through the entry of a hamt
>    that does not make use of a higher-order function like
>    hamt_do_while uses the opaque Hamt_iterator type.  */
> typedef struct hamt_iterator Hamt_iterator;
> 
> /* Return a newly created iterator for HAMT.  */
> extern Hamt_iterator *hamt_iterator_create (const Hamt *hamt);

The pointer return here is overkill. A prototype

  extern Hamt_iterator hamt_iterator_create (const Hamt *hamt);

is sufficient, because the way such an iterator is used is:

  Hamt_iterator iter = hamt_iterator_create (hamt);

  for (...)
    {
      ...
      Hamt_entry *elt;
      if (hamt_iterator_next (&iter, &elt))
        {
          ...
        }
      ...
    }

  hamt_iterator_free (&iter);

> /* Return an independent copy of ITER that is initially in the same
>    state.  */
> extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);

Then a copy function is not needed, because the user's program can do

  Hamt_iterator iter_clone = iter;

> /* Return true if ITER is not at the end, place the current element in
>    *ELT_PTR and move the iterator forward.  Otherwise, return
>    *false.  */
> extern bool hamt_iterator_next (Hamt_iterator *iter,
>                                 Hamt_entry *const *elt_ptr);

The 'const' here forbids assigning to *ELT_PTR.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: out-of-memory handling
  2020-10-11 10:53                 ` out-of-memory handling Bruno Haible
@ 2020-10-11 11:07                   ` Marc Nieper-Wißkirchen
  2020-10-11 11:56                     ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 11:07 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 12:54 Uhr schrieb Bruno Haible <bruno@clisp.org>:

> > The bigger problem is that it mustn't make the code slower for the
> > 99,99999% of the use cases where there is either enough memory or
> > calling xalloc_die is the only reasonable action.
>
> OK. When we add hamt as a possible implementation for gl_set and gl_map, we
> could document that out-of-memory handling must be done
>   - either through xalloc_die(),
>   - or by defining xmalloc and xrealloc [this is what Emacs does],
> unlike for the other implementations.

I am not so convinced of whether it makes sense to create a
gl_set/gl_map frontend for this hamt implementation. It is optimized
for the persistence (otherwise, use ordinary hash tables), while the
gl_set/gl_map interface is for destructive updates.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11 11:02                   ` HAMT iterator Bruno Haible
@ 2020-10-11 11:08                     ` Marc Nieper-Wißkirchen
  2020-10-11 12:04                       ` Bruno Haible
  2020-10-11 12:14                       ` Bruno Haible
  0 siblings, 2 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 11:08 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 13:02 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Hi Marc,
>
> > I am going to implement the following iterator API as well:
> >
> > /* An alternative interface to iterating through the entry of a hamt
> >    that does not make use of a higher-order function like
> >    hamt_do_while uses the opaque Hamt_iterator type.  */
> > typedef struct hamt_iterator Hamt_iterator;
> >
> > /* Return a newly created iterator for HAMT.  */
> > extern Hamt_iterator *hamt_iterator_create (const Hamt *hamt);
>
> The pointer return here is overkill. A prototype
>
>   extern Hamt_iterator hamt_iterator_create (const Hamt *hamt);
>
> is sufficient, because the way such an iterator is used is:
>
>   Hamt_iterator iter = hamt_iterator_create (hamt);
>
>   for (...)
>     {
>       ...
>       Hamt_entry *elt;
>       if (hamt_iterator_next (&iter, &elt))
>         {
>           ...
>         }
>       ...
>     }
>
>   hamt_iterator_free (&iter);
>
> > /* Return an independent copy of ITER that is initially in the same
> >    state.  */
> > extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);
>
> Then a copy function is not needed, because the user's program can do
>
>   Hamt_iterator iter_clone = iter;

The hamt itself has to be copied (to increase the reference counter).

> > /* Return true if ITER is not at the end, place the current element in
> >    *ELT_PTR and move the iterator forward.  Otherwise, return
> >    *false.  */
> > extern bool hamt_iterator_next (Hamt_iterator *iter,
> >                                 Hamt_entry *const *elt_ptr);
>
> The 'const' here forbids assigning to *ELT_PTR.

Yes. Wrong position of const.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: out-of-memory handling
  2020-10-11 11:07                   ` Marc Nieper-Wißkirchen
@ 2020-10-11 11:56                     ` Bruno Haible
  2020-10-11 12:20                       ` Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 11:56 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> I am not so convinced of whether it makes sense to create a
> gl_set/gl_map frontend for this hamt implementation.

Wikipedia [1] lists some advantages of HAMTs even without the persistence.

> It is optimized
> for the persistence (otherwise, use ordinary hash tables), while the
> gl_set/gl_map interface is for destructive updates.

How would a HAMT implementation look like that does not support persistence,
but is instead optimized for destructive updates? Probably the reference
counters would go away. Anything else?

Bruno

[1] https://en.wikipedia.org/wiki/Hash_array_mapped_trie#Advantages_of_HAMTs



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11 11:08                     ` Marc Nieper-Wißkirchen
@ 2020-10-11 12:04                       ` Bruno Haible
  2020-10-11 12:25                         ` Marc Nieper-Wißkirchen
  2020-10-11 12:14                       ` Bruno Haible
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 12:04 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> > > /* Return an independent copy of ITER that is initially in the same
> > >    state.  */
> > > extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);
> >
> > Then a copy function is not needed, because the user's program can do
> >
> >   Hamt_iterator iter_clone = iter;
> 
> The hamt itself has to be copied (to increase the reference counter).

Then the comment should clarify what "independent" means. I thought it
means that both iterators (the original one and the copy) can be used
simultaneously, as long as the HAMT does not change. Do you mean
something broader?
  - If someone creates a derivative of the HAMT, the iterator won't
    be affected, right? ("persistence")
  - If someone makes destructive modifications to the HAMT (through the
    *_x functions), the iterator will be affected if it has not yet
    passed the point of modification, right?
So, what is the scenario where increasing the reference count will make
a difference?

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11 11:08                     ` Marc Nieper-Wißkirchen
  2020-10-11 12:04                       ` Bruno Haible
@ 2020-10-11 12:14                       ` Bruno Haible
  2020-10-11 12:22                         ` Marc Nieper-Wißkirchen
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 12:14 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> > > I am going to implement the following iterator API as well:
> > >
> > > /* An alternative interface to iterating through the entry of a hamt
> > >    that does not make use of a higher-order function like
> > >    hamt_do_while uses the opaque Hamt_iterator type.  */
> > > typedef struct hamt_iterator Hamt_iterator;
> > >
> > > /* Return a newly created iterator for HAMT.  */
> > > extern Hamt_iterator *hamt_iterator_create (const Hamt *hamt);
> >
> > The pointer return here is overkill. A prototype
> >
> >   extern Hamt_iterator hamt_iterator_create (const Hamt *hamt);
> >
> > is sufficient, because the way such an iterator is used is:
> >
> >   Hamt_iterator iter = hamt_iterator_create (hamt);
> >
> >   for (...)
> >     {
> >       ...
> >       Hamt_entry *elt;
> >       if (hamt_iterator_next (&iter, &elt))
> >         {
> >           ...
> >         }
> >       ...
> >     }
> >
> >   hamt_iterator_free (&iter);
> >
> > > /* Return an independent copy of ITER that is initially in the same
> > >    state.  */
> > > extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);
> >
> > Then a copy function is not needed, because the user's program can do
> >
> >   Hamt_iterator iter_clone = iter;
> 
> The hamt itself has to be copied (to increase the reference counter).

Whether copying an iterator can be done by assignment or requires a function
call, is of second importance.

The more important point I wanted to make: Does allocating an iterator
require a (heap) memory allocation?

If you iterate with do_while, no memory allocation is needed, because all
information is stored on the stack, between the various function calls.

If you iterate with hamt_iterator_create, and the Hamt_iterator type
has bounded size (I guess it will not require more than 15 pointers and
13 indices), it can be stored on the caller's stack.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: out-of-memory handling
  2020-10-11 11:56                     ` Bruno Haible
@ 2020-10-11 12:20                       ` Marc Nieper-Wißkirchen
  2020-10-11 14:01                         ` HAMT for gl_set and gl_map Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 12:20 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 13:56 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Marc Nieper-Wißkirchen wrote:
> > I am not so convinced of whether it makes sense to create a
> > gl_set/gl_map frontend for this hamt implementation.
>
> Wikipedia [1] lists some advantages of HAMTs even without the persistence.
>
> > It is optimized
> > for the persistence (otherwise, use ordinary hash tables), while the
> > gl_set/gl_map interface is for destructive updates.
>
> How would a HAMT implementation look like that does not support persistence,
> but is instead optimized for destructive updates? Probably the reference
> counters would go away. Anything else?

The reference counter would go away as would all tests for "shared" in the code.

Without a reference counter, an element can be any "void *" (instead
of a pointer to a struct starting with Hamt_entry), which is a
substantial change.

We can add a number of #if's to the code so that it either compiles to
an implementation of persistent or of transient hamts, but I wouldn't
rather do it right now because it would make the code harder to read.
But when the hamt code has become stable (and has been put to use and,
therefore, to real-world testing), this sounds like a feasible option.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11 12:14                       ` Bruno Haible
@ 2020-10-11 12:22                         ` Marc Nieper-Wißkirchen
  0 siblings, 0 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 12:22 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 14:14 Uhr schrieb Bruno Haible <bruno@clisp.org>:

[...]

> > > > /* Return an independent copy of ITER that is initially in the same
> > > >    state.  */
> > > > extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);
> > >
> > > Then a copy function is not needed, because the user's program can do
> > >
> > >   Hamt_iterator iter_clone = iter;
> >
> > The hamt itself has to be copied (to increase the reference counter).
>
> Whether copying an iterator can be done by assignment or requires a function
> call, is of second importance.
>
> The more important point I wanted to make: Does allocating an iterator
> require a (heap) memory allocation?
>
> If you iterate with do_while, no memory allocation is needed, because all
> information is stored on the stack, between the various function calls.
>
> If you iterate with hamt_iterator_create, and the Hamt_iterator type
> has bounded size (I guess it will not require more than 15 pointers and
> 13 indices), it can be stored on the caller's stack.

That's a very good point you make. The hamt iterator has, of course,
(low) bounded size, so it can (and should) be allocated on the stack.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11 12:04                       ` Bruno Haible
@ 2020-10-11 12:25                         ` Marc Nieper-Wißkirchen
  2020-10-11 13:52                           ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 12:25 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 14:04 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Marc Nieper-Wißkirchen wrote:
> > > > /* Return an independent copy of ITER that is initially in the same
> > > >    state.  */
> > > > extern Hamt_iterator *hamt_iterator_copy (Hamt_iterator *iter);
> > >
> > > Then a copy function is not needed, because the user's program can do
> > >
> > >   Hamt_iterator iter_clone = iter;
> >
> > The hamt itself has to be copied (to increase the reference counter).
>
> Then the comment should clarify what "independent" means. I thought it
> means that both iterators (the original one and the copy) can be used
> simultaneously, as long as the HAMT does not change. Do you mean
> something broader?
>   - If someone creates a derivative of the HAMT, the iterator won't
>     be affected, right? ("persistence")

Yes.

>   - If someone makes destructive modifications to the HAMT (through the
>     *_x functions), the iterator will be affected if it has not yet
>     passed the point of modification, right?

No, the iterator shouldn't be affected. (The point of modification is
not well-defined without exposing implementation details.)

> So, what is the scenario where increasing the reference count will make
> a difference?

If you have a language with GC like Lisp or Scheme, say, the hamt may
be GC'd while the iterator is still live.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterators
  2020-10-11 10:29                 ` HAMT iterators Bruno Haible
@ 2020-10-11 12:44                   ` Marc Nieper-Wißkirchen
  2020-10-11 13:47                     ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 12:44 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 11. Okt. 2020 um 12:29 Uhr schrieb Bruno Haible <bruno@clisp.org>:

> The recursion across hamt_do_while / entry_do_while / subtrie_do_while /
> bucket_do_while builds up a stack whose essential contents is a set of
> indices, each at most 5 bits large, with a total of SIZE_WIDTH bits.
> In other words, the iterator object can be a

Yes, this is roughly how the implementation I worked on this morning
works. However, I haven't yet implemented the 5-bit compression.

> struct
>   {
>     const Hamt *hamt;
>     size_t position;
>     int depth;
>     const Hamt_entry *entry;
>   };
>
> where during the iteration
>   - hamt stays unmodified,
>   - position is incremented by some amount at each round,
>   - depth is the recursion depth.
>   - entry is a cache of hamt->nodes[(position >> 59) & 31][(position >> 54) & 31]...
>     with depth-1 indexing operations, and is changed at each round,
>
> HASH-TABLE-ITERATOR sets position, depth, entry to point to the first entry.
> HASH-TABLE-ITERATE works as follows:
>   - in a bucket, it increments position and fetches the next entry of the bucket,

For a bucket, in the worst case, we need the full size_t range for
position, so we have to store the path in the tree that leads to the
bucket in another size_t variable.

>   - if the bucket is done, it decreases depth and goes to
>     hamt->nodes[(position >> 59) & 31][(position >> 54) & 31]... (with depth-1
>     indexing operations) until it finds a subtrie that is not yet done,

This means a lot of pointer operations if large subtries are processed
that are deep in the trie. The hamt_do_while operation stores that
chain of entries from the root implicitly on the stack. The same
should be done for the iterator, meaning that an array of MAX_DEPTH
entries has to be cached. On a 32 bit system, this means 28 bytes,
and, on a 64 bit system, 130 bytes.

>     then it increases depth, entry to point to the first entry in that subtrie.
>
> If done this way, the iterator will fit into the gl_set_iterator_t and
> gl_map_iterator_t that we already have in gl_set.h and gl_map.h.
>
> Bruno


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterators
  2020-10-11 12:44                   ` Marc Nieper-Wißkirchen
@ 2020-10-11 13:47                     ` Bruno Haible
  0 siblings, 0 replies; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 13:47 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> For a bucket, in the worst case, we need the full size_t range for
> position

Right. I missed that. When the hash function is very bad (maps everything
to a single hash code), the bucket is very large.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT iterator
  2020-10-11 12:25                         ` Marc Nieper-Wißkirchen
@ 2020-10-11 13:52                           ` Bruno Haible
  0 siblings, 0 replies; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 13:52 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> >   - If someone creates a derivative of the HAMT, the iterator won't
> >     be affected, right? ("persistence")
> 
> Yes.

OK, that's one thing to document.

> > So, what is the scenario where increasing the reference count will make
> > a difference?
> 
> If you have a language with GC like Lisp or Scheme, say, the hamt may
> be GC'd while the iterator is still live.

And in C, when someone calls hamt_free on the HAMT, the iterator won't be
affected. IMO, that's another thing to document (because it's unexpected).

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: HAMT for gl_set and gl_map
  2020-10-11 12:20                       ` Marc Nieper-Wißkirchen
@ 2020-10-11 14:01                         ` Bruno Haible
  0 siblings, 0 replies; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 14:01 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> > How would a HAMT implementation look like that does not support persistence,
> > but is instead optimized for destructive updates? Probably the reference
> > counters would go away. Anything else?
> 
> The reference counter would go away as would all tests for "shared" in the code.
> 
> Without a reference counter, an element can be any "void *" (instead
> of a pointer to a struct starting with Hamt_entry), which is a
> substantial change.

Thanks for explaining.

> We can add a number of #if's to the code so that it either compiles to
> an implementation of persistent or of transient hamts, but I wouldn't
> rather do it right now because it would make the code harder to read.
> But when the hamt code has become stable (and has been put to use and,
> therefore, to real-world testing), this sounds like a feasible option.

I agree that stabilizing the current code should come first.

Then, whether the adaptations will be doable with a small set of #ifs
or whether it will be best to copy the code, will remain to be seen.
I mean, there are different red-black tree implementations in Gnulib
and in the Linux kernel, and it does not cause maintenance problems
because algorithmic code only very rarely needs updates.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: terminology
  2020-10-10 21:46           ` Marc Nieper-Wißkirchen
  2020-10-11  1:28             ` Bruno Haible
@ 2020-10-11 14:14             ` Bruno Haible
  2020-10-11 14:20               ` terminology Marc Nieper-Wißkirchen
  1 sibling, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 14:14 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: Paul Eggert, bug-gnulib

Hi Marc,

> I have attached an improved version of the HAMT module to this email.

How about terminology: "delete" vs. "remove"?

In C++ the verb "delete" is more or less the same as "free": It means
"deallocate" and "free memory".

Likewise in some C APIs, e.g. pthread_key_delete.

In this sense, 'hamt_delete' is triggering the wrong associations.
How about renaming 'hamt_remove'?

Deleting an entry from a hash table or HAMT does not always mean to
delete the object that the entry references.

The Java collections [1], C# collections [2], Python collections [3]
all use the verb "remove".

Yes, we still have hash_delete (in module 'hash') and 'argz_delete' (in
module 'argz'); these are very old modules.

Bruno

[1] https://docs.oracle.com/javase/7/docs/api/java/util/Set.html
[2] https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.hashset-1
[3] https://docs.python.org/3/library/stdtypes.html#set





^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: terminology
  2020-10-11 14:14             ` terminology Bruno Haible
@ 2020-10-11 14:20               ` Marc Nieper-Wißkirchen
  0 siblings, 0 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 14:20 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, Paul Eggert, bug-gnulib

Am So., 11. Okt. 2020 um 16:14 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Hi Marc,
>
> > I have attached an improved version of the HAMT module to this email.
>
> How about terminology: "delete" vs. "remove"?

> In this sense, 'hamt_delete' is triggering the wrong associations.
> How about renaming 'hamt_remove'?
>
> Deleting an entry from a hash table or HAMT does not always mean to
> delete the object that the entry references.
>
> The Java collections [1], C# collections [2], Python collections [3]
> all use the verb "remove".

I'm fine with this change and agree with your reasoning. I used the
hash module (see your comment below) as a guide for the interface,
that's why I called the procedure hamt_delete in the first place.

> Yes, we still have hash_delete (in module 'hash') and 'argz_delete' (in
> module 'argz'); these are very old modules.

Marc


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [New module] Persistent Hash Array Mapped Tries (HAMTs)
  2020-10-11  8:20               ` Marc Nieper-Wißkirchen
                                   ` (2 preceding siblings ...)
  2020-10-11 10:53                 ` out-of-memory handling Bruno Haible
@ 2020-10-11 17:32                 ` Marc Nieper-Wißkirchen
  2020-10-11 18:22                   ` Draft #3 (with iterators) Marc Nieper-Wißkirchen
  3 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 17:32 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib, Bruno Haible

Am So., 11. Okt. 2020 um 10:20 Uhr schrieb Marc Nieper-Wißkirchen
<marc.nieper+gnu@gmail.com>:
>
> Am So., 11. Okt. 2020 um 03:28 Uhr schrieb Bruno Haible <bruno@clisp.org>:

[...]

> > * hamt_lookup: If the caller is allowed to modify the payload stored in the
> >   returned entry, this function should return a 'Hamt_entry *', not a
> >   'const Hamt_entry *'. Just like 'strchr' and 'strstr' return a 'char *',
> >   not a 'const char *'.
>
> Unless the caller knows what it does, modifying the payload is not a
> good idea because the entry is shared between different hamts. If the
> caller really wants to modify the payload, it has to do an explicit
> type cast (which is safe).

I have noticed a problem with the current design: While an element can
be in more than one hamt (because copies of hamts are created through
various operations), an element cannot be actively inserted in more
than one hamt. The reason is that the reference counter of the element
is initialized whenever the element is inserted.

The way out is to expose the initialization function to the user, who
becomes responsible for initializing each element exactly once.

As soon as it is possible to insert an element more than once, another
observation will be made: The insert procedure does not accept a
pointer to a const element because it must be able to change the
reference counter internally. Thus it is more convenient if procedures
like hamt_lookup do not return const versions.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Draft #3 (with iterators)
  2020-10-11 17:32                 ` [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
@ 2020-10-11 18:22                   ` Marc Nieper-Wißkirchen
  2020-10-11 19:09                     ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-11 18:22 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib, Bruno Haible

[-- Attachment #1: Type: text/plain, Size: 2506 bytes --]

I have implemented everything we have discussed today. The patch
versus the current master is attached so it can be reviewed.

The changes versus the previous draft can be summarized as follows:

* Bug fixes.
* Use _Atomic on GCC 4.9+.
* Implement a lightweight iterator type akin to the iterators of gl_list.
* Expose element initialization to the user so than an element can be
inserted in more than one hamt.
* Rename delete to remove.
* Improve documentation.

Future options for when the code has matured:

* Inline a number of subtrie procedures to get rid of forward
declarations to help compilers.
* Implement purely non-pure hamts to replace ordinary hash tables.
* Add _nx versions of the procedures that won't call xalloc_die.

Many thanks to Bruno for his support, guidance and his great suggestions.

Marc

Am So., 11. Okt. 2020 um 19:32 Uhr schrieb Marc Nieper-Wißkirchen
<marc.nieper+gnu@gmail.com>:
>
> Am So., 11. Okt. 2020 um 10:20 Uhr schrieb Marc Nieper-Wißkirchen
> <marc.nieper+gnu@gmail.com>:
> >
> > Am So., 11. Okt. 2020 um 03:28 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> [...]
>
> > > * hamt_lookup: If the caller is allowed to modify the payload stored in the
> > >   returned entry, this function should return a 'Hamt_entry *', not a
> > >   'const Hamt_entry *'. Just like 'strchr' and 'strstr' return a 'char *',
> > >   not a 'const char *'.
> >
> > Unless the caller knows what it does, modifying the payload is not a
> > good idea because the entry is shared between different hamts. If the
> > caller really wants to modify the payload, it has to do an explicit
> > type cast (which is safe).
>
> I have noticed a problem with the current design: While an element can
> be in more than one hamt (because copies of hamts are created through
> various operations), an element cannot be actively inserted in more
> than one hamt. The reason is that the reference counter of the element
> is initialized whenever the element is inserted.
>
> The way out is to expose the initialization function to the user, who
> becomes responsible for initializing each element exactly once.
>
> As soon as it is possible to insert an element more than once, another
> observation will be made: The insert procedure does not accept a
> pointer to a const element because it must be able to change the
> reference counter internally. Thus it is more convenient if procedures
> like hamt_lookup do not return const versions.

[-- Attachment #2: 0001-hamt-New-module.patch --]
[-- Type: text/x-patch, Size: 53811 bytes --]

From ece7b9e3cd090e9c084efd72677669130e80dd9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Nieper-Wi=C3=9Fkirchen?= <marc@nieper-wisskirchen.de>
Date: Sat, 10 Oct 2020 23:38:21 +0200
Subject: [PATCH] hamt: New module.

This module provides (persistent) hash array mapped tries.
* MODULES.html.sh: Add hamt.
* lib/hamt.c: New file.
* lib/hamt.h: New file.
* modules/hamt: New file.
* modules/hamt-tests: New file.
* tests/test-hamt.c: New file.
---
 ChangeLog          |   11 +
 MODULES.html.sh    |    1 +
 lib/hamt.c         | 1083 ++++++++++++++++++++++++++++++++++++++++++++
 lib/hamt.h         |  253 +++++++++++
 modules/hamt       |   29 ++
 modules/hamt-tests |   11 +
 tests/test-hamt.c  |  378 ++++++++++++++++
 7 files changed, 1766 insertions(+)
 create mode 100644 lib/hamt.c
 create mode 100644 lib/hamt.h
 create mode 100644 modules/hamt
 create mode 100644 modules/hamt-tests
 create mode 100644 tests/test-hamt.c

diff --git a/ChangeLog b/ChangeLog
index 22f79fb09..7263e628f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2020-10-11  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+	hamt: New module.
+	This module provides (persistent) hash array mapped tries.
+	* MODULES.html.sh: Add hamt.
+	* lib/hamt.c: New file.
+	* lib/hamt.h: New file.
+	* modules/hamt: New file.
+	* modules/hamt-tests: New file.
+	* tests/test-hamt.c: New file.
+
 2020-10-11  Bruno Haible  <bruno@clisp.org>
 
 	stdioext: Update comments regarding UnixWare.
diff --git a/MODULES.html.sh b/MODULES.html.sh
index 7e7cdae3e..2907eb741 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2028,6 +2028,7 @@ func_all_modules ()
   func_module hash-pjw
   func_module hash-pjw-bare
   func_module hash
+  func_module hamt
   func_module readline
   func_module readtokens
   func_module readtokens0
diff --git a/lib/hamt.c b/lib/hamt.c
new file mode 100644
index 000000000..f92d9c4e8
--- /dev/null
+++ b/lib/hamt.c
@@ -0,0 +1,1083 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.  */
+
+#include <config.h>
+#define _GL_HAMT_INLINE _GL_EXTERN_INLINE
+#include "hamt.h"
+
+#include <flexmember.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include "count-one-bits.h"
+#include "verify.h"
+#include "xalloc.h"
+
+/* Reference counters can be shared between different threads if the
+   entry they belong to is shared between different threads.
+   Operations on them therefore have to be atomic so that no invalid
+   state is observable.
+
+   A thread must not modify an entry or its children (!) if its
+   reference count implies that the entry is shared by at least two
+   hamts.  */
+typedef
+#if GL_HAMT_THREAD_SAFE
+_Atomic
+#endif
+size_t ref_counter;
+
+/***************/
+/* Entry Types */
+/***************/
+
+/* Leaf nodes are of type element.  Non-leaf nodes are either subtries
+   or, if at maximal depth, buckets.  The entry type is stored in the
+   lower two bits of the reference counter, whereas reference counters
+   for entries are incremented and decremented in multiples of 4.  */
+enum entry_type
+{
+  element_entry = 0,
+  subtrie_entry = 1,
+  bucket_entry = 2
+};
+
+/* Return the type an entry.  */
+static enum entry_type
+entry_type (const Hamt_entry *entry)
+{
+  return entry->ref_count & 3;
+}
+
+/********************/
+/* Reference Counts */
+/********************/
+
+/* Initialize the reference counter, storing its type.  */
+static void
+init_ref_counter (ref_counter *counter, enum entry_type type)
+{
+  *counter = 4 + type;
+}
+
+/* Increase the reference counter of an entry.  */
+static void
+inc_ref_counter (ref_counter *counter)
+{
+  *counter += 4;
+}
+
+/* Decrease the entry reference counter.  Return false if the entry
+   can be deleted.  */
+static bool
+dec_ref_counter (ref_counter *counter)
+{
+  *counter -= 4;
+  return *counter >= 4;
+}
+
+/**************/
+/* Structures */
+/**************/
+
+/* Different generations of a hamt share a function table.  */
+struct function_table
+{
+  Hamt_hasher *hasher;
+  Hamt_comparator *comparator;
+  Hamt_freer *freer;
+  ref_counter ref_count;
+};
+
+/* Different generations of a hamt share subtries.  A singleton
+   subtrie is modelled as a single element.  */
+struct subtrie
+{
+  ref_counter ref_count;
+  /* Nodes carry labels from 0 to 31.  The i-th bit in MAP is set if
+     the node labelled i is present.  */
+  uint32_t map;
+  /* The length of the NODES array is the population count of MAP.
+     The order of the nodes corresponds to the order of the 1-bits in
+     MAP.  */
+  Hamt_entry *nodes [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* Buckets are used when different elements have the same hash values.  */
+struct bucket
+{
+  ref_counter ref_counter;
+  size_t elt_count;
+  Hamt_entry *elts [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* A hamt consists of its function table and the root entry.  */
+struct hamt
+{
+  struct function_table *functions;
+  /* The root entry is NULL for an empty HAMT.  */
+  Hamt_entry *root;
+};
+
+/*******************/
+/* Function Tables */
+/*******************/
+
+/* Allocate and initialize a function table.  */
+static struct function_table *
+create_function_table (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                       Hamt_freer *freer)
+{
+  struct function_table *functions = XMALLOC (struct function_table);
+  functions->hasher = hasher;
+  functions->comparator = comparator;
+  functions->freer = freer;
+  functions->ref_count = 1;
+  return functions;
+}
+
+/* Increment the reference count and return the function table. */
+static struct function_table *
+copy_function_table (struct function_table *function_table)
+{
+  ++function_table->ref_count;
+  return function_table;
+}
+
+/* Decrease the reference count and free the function table if the
+   reference count drops to zero.  */
+static void
+free_function_table (struct function_table *function_table)
+{
+  if (--function_table->ref_count)
+    return;
+  free (function_table);
+}
+
+/************/
+/* Elements */
+/************/
+
+/* Return an element's hash.  */
+static size_t
+hash_element (const struct function_table *functions, const Hamt_entry *elt)
+{
+  return functions->hasher (elt);
+}
+
+/* Compare two elements.  */
+static bool
+compare_elements (const struct function_table *functions,
+                  const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return functions->comparator (elt1, elt2);
+}
+
+/* Free an element.  */
+static void
+free_element (const struct function_table *functions, Hamt_entry *elt)
+{
+  if (dec_ref_counter (&elt->ref_count))
+    return;
+  functions->freer (elt);
+}
+
+/* Return the initialized element.  */
+static Hamt_entry *
+init_element (Hamt_entry *elt)
+{
+  init_ref_counter (&elt->ref_count, element_entry);
+  return elt;
+}
+
+/***********/
+/* Buckets */
+/***********/
+
+/* Allocate a partially initialized bucket with a given number of elements.  */
+static struct bucket *
+alloc_bucket (size_t elt_count)
+{
+  struct bucket *bucket
+    = xmalloc (FLEXSIZEOF (struct bucket, elts,
+                           sizeof (Hamt_entry) * elt_count));
+  init_ref_counter (&bucket->ref_counter, bucket_entry);
+  bucket->elt_count = elt_count;
+  return bucket;
+}
+
+/***********/
+/* Entries */
+/***********/
+
+/* Return true if the entry is shared between two or more hamts.
+   Otherwise, return false.
+
+   This procedure is used for destructive updates.  If an entry and
+   all its parents are not shared, it can be updated destructively
+   without effecting other hamts.  */
+static bool
+is_shared (const Hamt_entry *entry)
+{
+  return entry->ref_count >= 8;
+}
+
+/* Calculate and return the number of nodes in a subtrie.  */
+static int
+trienode_count (const struct subtrie *subtrie)
+{
+  return count_one_bits (subtrie->map); /* In Gnulib, we assume that
+                                           an integer has at least 32
+                                           bits. */
+}
+
+/* Allocate a partially initialized subtrie with a given number of nodes.  */
+static struct subtrie *
+alloc_subtrie (int node_count)
+{
+  struct subtrie *subtrie
+    = xmalloc (FLEXSIZEOF (struct subtrie, nodes,
+                           sizeof (Hamt_entry) * node_count));
+  init_ref_counter (&subtrie->ref_count, subtrie_entry);
+  return subtrie;
+}
+
+/* Return a conceptually copy of an entry.  */
+static Hamt_entry *
+copy_entry (Hamt_entry *entry)
+{
+  inc_ref_counter (&entry->ref_count);
+  return entry;
+}
+
+/* Return a new bucket that has the j-th element replaced.  */
+static struct bucket *
+replace_bucket_element (struct bucket *bucket, int j, Hamt_entry *elt)
+{
+  int n = bucket->elt_count;
+  struct bucket *new_bucket = alloc_bucket (n);
+  for (int k = 0; k < n; ++k)
+    if (k == j)
+      new_bucket->elts [k] = elt;
+    else
+      new_bucket->elts [k] = copy_entry (bucket->elts [k]);
+  return new_bucket;
+}
+
+/* Return a new subtrie that has the j-th node replaced.  */
+static struct subtrie *
+replace_entry (struct subtrie *subtrie, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie);
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map;
+  for (int k = 0; k < n; ++k)
+    if (k == j)
+      new_subtrie->nodes [k] = entry;
+    else
+      new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+  return new_subtrie;
+}
+
+/* Return a new subtrie that has an entry labelled i inserted at
+   the j-th position.  */
+static struct subtrie *
+insert_entry (struct subtrie *subtrie, int i, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie) + 1;
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map | (1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k > j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k - 1]);
+      else
+        new_subtrie->nodes [k] = entry;
+    }
+  return new_subtrie;
+}
+
+/* Return a new entry that has the entry labelled i removed from
+   position j.  */
+static Hamt_entry *
+remove_subtrie_entry (struct subtrie *subtrie, int i, int j)
+{
+  int n = trienode_count (subtrie) - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (subtrie->nodes [1]);
+      return copy_entry (subtrie->nodes [0]);
+    }
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map & ~(1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k >= j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k + 1]);
+    }
+  return (Hamt_entry *) new_subtrie;
+}
+
+/* Return a new entry that has the entry at position j removed.  */
+static Hamt_entry *
+remove_bucket_entry (struct bucket *bucket, int j)
+{
+  int n = bucket->elt_count - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (bucket->elts [1]);
+      return copy_entry (bucket->elts [0]);
+    }
+  struct bucket *new_bucket = alloc_bucket (n);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k]);
+      else if (k >= j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k + 1]);
+    }
+  return (Hamt_entry *) new_bucket;
+}
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+/* Create a new, empty hash array mapped trie.  */
+Hamt *
+hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+             Hamt_freer *freer)
+{
+  struct function_table *functions
+    = create_function_table (hasher, comparator, freer);
+  Hamt *hamt = XMALLOC (Hamt);
+  hamt->functions = functions;
+  hamt->root = NULL;
+  return hamt;
+}
+
+/* Return a copy of the hamt.  */
+Hamt *
+hamt_copy (Hamt *hamt)
+{
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = hamt->root == NULL ? NULL : copy_entry (hamt->root);
+  return new_hamt;
+}
+
+/* Free a bucket.  */
+static void
+free_bucket (struct function_table const *functions, struct bucket *bucket)
+{
+  if (dec_ref_counter (&bucket->ref_counter))
+    return;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    free_element (functions, elts [i]);
+  free (bucket);
+}
+
+/* Forward declaration.  */
+static void free_subtrie (struct function_table const *functions,
+                          struct subtrie *subtrie);
+
+/* Free an entry.  */
+static void
+free_entry (struct function_table const *functions, Hamt_entry *entry)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      free_element (functions, entry);
+      break;
+    case subtrie_entry:
+      free_subtrie (functions, (struct subtrie *) entry);
+      break;
+    case bucket_entry:
+      free_bucket (functions, (struct bucket *) entry);
+      break;
+    default:
+      assume (0);
+    }
+}
+
+/* Free a trie recursively.  */
+static void
+free_subtrie (struct function_table const *functions, struct subtrie *subtrie)
+{
+  if (dec_ref_counter (&subtrie->ref_count))
+    return;
+  int n = trienode_count (subtrie);
+  Hamt_entry **node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    free_entry (functions, *node_ptr++);
+  free (subtrie);
+}
+
+/* Free a hamt.  */
+void
+hamt_free (Hamt *hamt)
+{
+  if (hamt->root != NULL)
+    free_entry (hamt->functions, hamt->root);
+  free_function_table (hamt->functions);
+  free (hamt);
+}
+
+/**********/
+/* Lookup */
+/**********/
+
+/* Lookup an element in a bucket.  */
+static Hamt_entry *
+bucket_lookup (const struct function_table *functions,
+               const struct bucket *bucket, const void *elt)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, elt, elts [i]))
+        return *elts;
+    }
+  return NULL;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_lookup (const struct function_table *functions,
+                                 Hamt_entry *entry,
+                                 const void *elt, size_t hash);
+
+/* Lookup an element in a bucket.  */
+static Hamt_entry *
+subtrie_lookup (const struct function_table *functions,
+                const struct subtrie *subtrie, const void *elt,
+                size_t hash)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+
+  if (! (map & (1 << i)))
+    return NULL;
+
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  return entry_lookup (functions, subtrie->nodes [j], elt, hash >> 5);
+}
+
+/* Lookup an element in an entry.  */
+static Hamt_entry *
+entry_lookup (const struct function_table *functions, Hamt_entry *entry,
+              const void *elt, size_t hash)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, elt, entry))
+        return entry;
+      return NULL;
+    case subtrie_entry:
+      return subtrie_lookup (functions, (struct subtrie *) entry, elt, hash);
+    case bucket_entry:
+      return bucket_lookup (functions, (struct bucket *) entry, elt);
+    default:
+      assume (0);
+    }
+}
+
+/* If ELT matches an entry in HAMT, return this entry.  Otherwise,
+   return NULL.  */
+Hamt_entry *
+hamt_lookup (const Hamt *hamt, const void *elt)
+{
+  if (hamt->root == NULL)
+    return NULL;
+
+  return entry_lookup (hamt->functions, hamt->root, elt,
+                       hash_element (hamt->functions, elt));
+}
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+/* Create a bucket populated with two elements.  */
+static struct bucket *
+create_populated_bucket (Hamt_entry *elt1, Hamt_entry *elt2)
+{
+  struct bucket *bucket = alloc_bucket (2);
+  bucket->elts [0] = elt1;
+  bucket->elts [1] = elt2;
+  return bucket;
+}
+
+/* Create a chain of subtrie nodes so that the resulting trie is
+   populated with exactly two elements.  */
+static Hamt_entry *
+create_populated_subtrie (Hamt_entry *elt1, Hamt_entry *elt2, size_t hash1,
+                          size_t hash2, int depth)
+{
+  if (depth >= _GL_HAMT_MAX_DEPTH)
+    return (Hamt_entry *) create_populated_bucket (elt1, elt2);
+
+  struct subtrie *subtrie;
+  int i1 = hash1 & 31;
+  int i2 = hash2 & 31;
+  if (i1 != i2)
+    {
+      subtrie = alloc_subtrie (2);
+      subtrie->map = (1 << i1) | (1 << i2);
+      if (i1 < i2)
+        {
+          subtrie->nodes [0] = elt1;
+          subtrie->nodes [1] = elt2;
+        }
+      else
+        {
+          subtrie->nodes [0] = elt2;
+          subtrie->nodes [1] = elt1;
+        }
+    }
+  else
+    {
+      subtrie = alloc_subtrie (1);
+      subtrie->map = 1 << i1;
+      subtrie->nodes [0]
+        = create_populated_subtrie (elt1, elt2, hash1 >> 5, hash2 >> 5,
+                                    depth + 1);
+    }
+  return (Hamt_entry *) subtrie;
+}
+
+/* Insert or replace an element in a bucket.  */
+static struct bucket *
+bucket_insert (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr, bool replace, bool shared)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry **elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, elts [i]))
+        {
+          if (replace)
+            {
+              if (shared)
+                {
+                  struct bucket *new_bucket
+                    = replace_bucket_element (bucket, i,
+                                              copy_entry (*elt_ptr));
+                  *elt_ptr = elts [i];
+                  return new_bucket;
+                }
+              free_element (functions, elts [i]);
+              elts [i] = copy_entry (*elt_ptr);
+              return bucket;
+            }
+          *elt_ptr = *elt_ptr == elts [i] ? NULL : elts [i];
+          return bucket;
+        }
+    }
+  struct bucket *new_bucket = alloc_bucket (elt_count + 1);
+  new_bucket->elts [0] = copy_entry (*elt_ptr);
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      new_bucket->elts [i + 1] = copy_entry (bucket->elts [i]);
+    }
+  if (replace)
+    *elt_ptr = NULL;
+  return new_bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_insert (const struct function_table *functions,
+                                 Hamt_entry *subtrie, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth, bool replace,
+                                 bool shared);
+
+/* Insert or replace an element in a subtrie.  */
+static struct subtrie *
+subtrie_insert (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth, bool replace,
+                bool shared)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_insert (functions, entry, elt_ptr, hash >> 5, depth + 1,
+                        replace, shared);
+      if (new_entry != entry)
+        {
+          if (shared)
+            return replace_entry (subtrie, j, new_entry);
+          free_entry (functions, entry);
+          subtrie->nodes [j] = new_entry;
+        }
+      return subtrie;
+    }
+  Hamt_entry *entry = copy_entry (*elt_ptr);
+  if (replace)
+    *elt_ptr = NULL;
+  return insert_entry (subtrie, i, j, entry);
+}
+
+/* Insert or replace an element in an entry.
+
+   REPLACE is true if we want replace instead of insert semantics.
+   SHARED is false if a destructive update has been requested and none
+   of the parent nodes are shared.  If an entry cannot be inserted
+   because the same entry with respect to pointer equality is already
+   present, *ELT_PTR is set to NULL to mark this special case.  */
+static Hamt_entry *
+entry_insert (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth, bool replace,
+              bool shared)
+{
+  shared |= is_shared (entry);
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          if (replace)
+            {
+              if (shared)
+                {
+                  Hamt_entry *new_entry = copy_entry (*elt_ptr);
+                  *elt_ptr = entry;
+                  return new_entry;
+                }
+              return copy_entry (*elt_ptr);
+            }
+          *elt_ptr = *elt_ptr == entry ? NULL : entry;
+          return entry;
+        }
+      Hamt_entry *new_entry = copy_entry (*elt_ptr);
+      if (replace)
+        *elt_ptr = NULL;
+      return create_populated_subtrie (new_entry, copy_entry (entry), hash,
+                                       (hash_element (functions, entry)
+                                        >> (5 * depth)), depth);
+    case subtrie_entry:
+      return (Hamt_entry *)
+        subtrie_insert (functions, (struct subtrie *) entry, elt_ptr, hash,
+                        depth, replace, shared);
+    case bucket_entry:
+      return (Hamt_entry *)
+        bucket_insert (functions, (struct bucket *) entry, elt_ptr, replace,
+                       shared);
+    default:
+      assume (0);
+    }
+}
+
+/* Insert or replace an element in the root.  */
+static Hamt_entry *
+root_insert (const struct function_table *functions, Hamt_entry *root,
+             Hamt_entry **elt_ptr, bool replace, bool shared)
+{
+  if (root == NULL)
+    return copy_entry (*elt_ptr);
+
+ return entry_insert (functions, root, elt_ptr,
+                      hash_element (functions, *elt_ptr), 0, replace, shared);
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return HAMT.  Otherwise, insert *ELT_PTR
+   into a copy of the HAMT and return the copy.  */
+Hamt *
+hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *elt = *elt_ptr;
+  Hamt_entry *new_entry = root_insert (hamt->functions, hamt->root,
+                                       elt_ptr, false, true);
+  if (*elt_ptr == NULL)
+    *elt_ptr = elt;
+
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/* Insert *ELT_PTR into a copy of HAMT and return the copy.  If an
+   existing element was replaced, set *ELT_PTR to this element, and to
+   NULL otherwise. */
+Hamt *
+hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = root_insert (hamt->functions, hamt->root, elt_ptr, true,
+                                true);
+  return new_hamt;
+}
+
+/* Remove an element in a bucket if found.  */
+static Hamt_entry *
+bucket_remove (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, elts [i]))
+        {
+          *elt_ptr = elts [i];
+          return remove_bucket_entry (bucket, i);
+        }
+    }
+  *elt_ptr = NULL;
+  return (Hamt_entry *) bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_remove (const struct function_table *functions,
+                                 Hamt_entry *entry, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth, bool shared);
+
+/* Remove an element in a subtrie if found.  */
+static Hamt_entry *
+subtrie_remove (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth, bool shared)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_remove (functions, entry, elt_ptr, hash >> 5, depth + 1,
+                        shared);
+      if (new_entry == NULL)
+        return remove_subtrie_entry (subtrie, i, j);
+      if (new_entry != entry)
+        {
+          if (shared)
+            return (Hamt_entry *) replace_entry (subtrie, j, new_entry);
+          free_entry (functions, entry);
+          subtrie->nodes [j] = new_entry;
+        }
+      return (Hamt_entry *) subtrie;
+    }
+  *elt_ptr = NULL;
+  return (Hamt_entry *) subtrie;
+}
+
+/* Remove an element in an entry if found.
+
+   SHARED is false if a destructive update has been requested and none
+   of the parent nodes are shared.  If an entry cannot be
+   removed, *ELT_PTR is set to NULL.  */
+static Hamt_entry *
+entry_remove (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth, bool shared)
+{
+  shared |= is_shared (entry);
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          *elt_ptr = entry;
+          return NULL;
+        }
+      *elt_ptr = NULL;
+      return entry;
+    case subtrie_entry:
+      return subtrie_remove (functions, (struct subtrie *) entry, elt_ptr, hash,
+                             depth, shared);
+    case bucket_entry:
+      return bucket_remove (functions, (struct bucket *) entry, elt_ptr);
+    default:
+      assume (0);
+    }
+}
+
+/* Remove an element in the root.  */
+static Hamt_entry *
+root_remove (const struct function_table *functions, Hamt_entry *root,
+             Hamt_entry **elt_ptr, bool shared)
+{
+  if (root == NULL)
+    return NULL;
+
+  return entry_remove (functions, root, elt_ptr,
+                       hash_element (functions, *elt_ptr), 0, shared);
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+element from the table, remove the element from a copy of the hamt and
+return the copy.  Otherwise, return HAMT.  */
+Hamt *
+hamt_remove (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *elt = *elt_ptr;
+  Hamt_entry *new_entry = root_remove (hamt->functions, hamt->root, elt_ptr,
+                                       true);
+  if (*elt_ptr == NULL)
+    *elt_ptr = elt;
+
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/*************/
+/* Iteration */
+/*************/
+
+/* Walk a bucket.  */
+static size_t
+bucket_do_while (const struct bucket *bucket, Hamt_processor *proc, void *data,
+                 bool *success)
+{
+  size_t cnt = 0;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      *success = proc (elts [i], data);
+      if (!success)
+        return cnt;
+      ++cnt;
+    }
+  return cnt;
+}
+
+/* Forward declaration.  */
+static size_t entry_do_while (Hamt_entry *entry, Hamt_processor *proc,
+                              void *data, bool *success);
+
+/* Walk a subtrie.  */
+static size_t subtrie_do_while (const struct subtrie *subtrie,
+                                Hamt_processor *proc, void *data, bool *success)
+{
+  size_t cnt = 0;
+  int n = trienode_count (subtrie);
+  Hamt_entry *const *node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    {
+      cnt += entry_do_while (*node_ptr++, proc, data, success);
+      if (!success)
+        return cnt;
+    }
+  return cnt;
+}
+
+/* Walk an entry.  */
+static size_t
+entry_do_while (Hamt_entry *entry, Hamt_processor *proc, void *data,
+                bool *success)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      *success = proc (entry, data);
+      return *success ? 1 : 0;
+    case subtrie_entry:
+      return subtrie_do_while ((struct subtrie *) entry, proc, data, success);
+    case bucket_entry:
+      return bucket_do_while ((struct bucket *) entry, proc, data, success);
+    default:
+      assume (0);
+    }
+}
+
+/* Call PROC for every entry of the hamt until it returns false.  The
+   first argument of PROC is the entry, the second argument is the value
+   of DATA as received.  Return the number of calls that returned
+   true.  */
+size_t
+hamt_do_while (const Hamt *hamt, Hamt_processor *proc, void *data)
+{
+  if (hamt->root == NULL)
+    return 0;
+
+  bool success = true;
+  return entry_do_while (hamt->root, proc, data, &success);
+}
+
+/* Create an iterator with a copy of the hamt.
+
+   For a valid iterator state the following is true: If DEPTH is
+   negative, the iterator is exhausted.  Otherwise, ENTRY [DEPTH] is
+   either the element that is produced next, or a bucket such that the
+   element at POSITION of the bucket is produced next.
+*/
+Hamt_iterator
+hamt_iterator (Hamt *hamt)
+{
+  Hamt_iterator iter;
+  iter.hamt = hamt_copy (hamt);
+  Hamt_entry *entry = hamt->root;
+  if (entry == NULL)
+    {
+      iter.depth = -1;
+      return iter;
+    }
+  iter.depth = 0;
+  iter.path = 0;
+  iter.position = 0;
+  while (iter.entry [iter.depth] = entry, entry_type (entry) == subtrie_entry)
+    {
+      const struct subtrie *subtrie = (const struct subtrie *) entry;
+      ++iter.depth;
+      entry = subtrie->nodes [0];
+    }
+  return iter;
+}
+
+/* Free the iterator and the associated hamt copy.  */
+void
+hamt_iterator_free (Hamt_iterator *iter)
+{
+  hamt_free (iter->hamt);
+}
+
+/* Create a copy of the complete iterator state, including the
+   hamt.  */
+Hamt_iterator
+hamt_iterator_copy (Hamt_iterator *iter)
+{
+  Hamt_iterator new_iter = *iter;
+  new_iter.hamt = hamt_copy (iter->hamt);
+  return new_iter;
+}
+
+/* Return the number of significant bits at DEPTH.  */
+static int
+bit_width (int depth)
+{
+  if (depth < _GL_HAMT_MAX_DEPTH - 1)
+    return 5;
+  return SIZE_WIDTH - 5 * (_GL_HAMT_MAX_DEPTH - 1);
+}
+
+/* The actual iteration.  */
+bool
+hamt_iterator_next (Hamt_iterator *iter, Hamt_entry **elt_ptr)
+{
+  int depth = iter->depth;
+  if (depth < 0)
+    return false;
+
+  Hamt_entry *entry = iter->entry [depth];
+  if (entry_type (entry) == bucket_entry)
+    {
+      struct bucket *bucket = (struct bucket *) entry;
+      *elt_ptr = bucket->elts [iter->position];
+      if (++iter->position < bucket->elt_count)
+        return true;
+    }
+  else
+    *elt_ptr = entry;
+
+  struct subtrie *subtrie;
+  while (iter->depth-- > 0)
+    {
+      int width = bit_width (iter->depth);
+      int j = (iter->path & ((1 << width) - 1)) + 1;
+      subtrie = (struct subtrie *) iter->entry [iter->depth];
+      if (j < trienode_count (subtrie))
+        {
+          ++iter->path;
+          while (iter->entry [++iter->depth] = subtrie->nodes [j],
+                 entry_type (iter->entry [iter->depth]) == subtrie_entry)
+            {
+              width = bit_width (iter->depth);
+              iter->path <<= width;
+              j = 0;
+              subtrie = (struct subtrie *) iter->entry [iter->depth];
+            }
+          iter->position = 0;
+          break;
+        }
+      iter->path >>= width;
+    }
+
+  return true;
+}
+
+/***********************/
+/* Destructive Updates */
+/***********************/
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return false.  Otherwise, insert *ELT_PTR
+   destructively into the hamt and return true.  */
+bool
+hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *elt = *elt_ptr;
+  Hamt_entry *old_root = hamt->root;
+  hamt->root = root_insert (hamt->functions, old_root, elt_ptr, false, false);
+  if (old_root != hamt->root && old_root != NULL)
+    free_entry (hamt->functions, old_root);
+  if (*elt_ptr == NULL)
+    {
+      *elt_ptr = elt;
+      return false;
+    }
+  return *elt_ptr == elt;
+}
+
+/* Insert ELT destructively into HAMT.  If an existing element was
+   replaced, return true.  Otherwise, return false.  */
+bool
+hamt_replace_x (Hamt *hamt, Hamt_entry *elt)
+{
+  Hamt_entry *old_root = hamt->root;
+  hamt->root = root_insert (hamt->functions, old_root, &elt, true, false);
+  if (old_root != hamt->root && old_root != NULL)
+    free_entry (hamt->functions, old_root);
+  return elt != NULL;
+}
+
+/* If ELT matches an element already in HAMT, remove the element
+   destructively from the hamt and return true.  Otherwise, return
+   false.  */
+bool
+hamt_remove_x (Hamt *hamt, Hamt_entry *elt)
+{
+  Hamt_entry *old_root = hamt->root;
+  hamt->root = root_remove (hamt->functions, old_root, &elt, false);
+  if (old_root != hamt->root)
+    free_entry (hamt->functions, old_root);
+  return elt != NULL;
+}
diff --git a/lib/hamt.h b/lib/hamt.h
new file mode 100644
index 000000000..e5a24c7d7
--- /dev/null
+++ b/lib/hamt.h
@@ -0,0 +1,253 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020. */
+
+/* This module provides a persistent version of hash array mapped
+   tries (hamts) that can be used in place of hash tables when pure
+   (functional) operations are needed.
+
+   A hash function and an equivalence predicate has to be provided for
+   the elements that can be inserted, replaced and removed in a hamt.
+   A hamt cannot contain duplicates that compare equal.
+
+   Each non-destructive updating operation returns a new hamt, which
+   shares structure with the original one.  Destructive updates only
+   effect the hamt, on which the destructive operation is applied.
+   For example, given a hamt HAMT1, any non-destructive update
+   operation (e.g. hamt_insert) will result in a new hamt HAMT2.
+   Whatever further operations (destructive or not, including freeing
+   a hamt) are applied to HAMT1 won't change HAMT2 and vice versa.  To
+   free all the memory, hash_free has therefore to be applied to both
+   HAMT1 and HAMT2.
+
+   If persistence is not needed, transient hash tables are probably
+   faster.
+
+   See also: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience
+   Department, École Polytechnique Fédérale de Lausanne.
+
+   http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf  */
+
+#ifndef _GL_HAMT_H
+#define _GL_HAMT_H
+
+#ifndef _GL_INLINE_HEADER_BEGIN
+# error "Please include config.h first."
+#endif
+_GL_INLINE_HEADER_BEGIN
+#ifndef _GL_HAMT_INLINE
+# define _GL_HAMT_INLINE _GL_INLINE
+#endif
+
+/* The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
+   is thread-safe as long as two threads do not simultaneously access
+   the same hamt.  This is non-trivial as different hamts may share
+   some structure.  */
+
+#if (__STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__) \
+  && __GNUC__ + (__GNUC_MINOR >= 9) <= 4
+# define GL_HAMT_THREAD_SAFE 0
+#else
+# define GL_HAMT_THREAD_SAFE 1
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Hash values are of type size_t.  For each level of the trie, we use
+   5 bits (corresponding to lg2 of the width of a 32-bit word.  */
+#define _GL_HAMT_MAX_DEPTH ((SIZE_WIDTH + 4) / 5)
+
+/************/
+/* Elements */
+/************/
+
+/* A hamt stores pointers to elements.  Each element has to be a
+   struct whose initial member is of the type Hamt_entry.  An element
+   is conceptually owned by a hamt as soon as it is inserted.  It will
+   be automatically freed as soon as the last hamt containing it is
+   freed.  */
+typedef struct
+{
+#if GL_HAMT_THREAD_SAFE
+  _Atomic
+#endif
+  size_t ref_count;
+} Hamt_entry;
+
+/* Initialize *ELT, which has to point to a structure as described
+   above and return ELT type-cast.
+
+   Before an element is inserted into any hamt, whether once or
+   multiple times, it has to be initialized exactly once.  */
+_GL_HAMT_INLINE Hamt_entry *
+hamt_element (void *elt)
+{
+  Hamt_entry *entry = elt;
+  entry->ref_count = 0;         /* This assumes that element_entry ==
+                                   0.  */
+  return entry;
+}
+
+/*************************/
+/* Opaque Hamt Structure */
+/*************************/
+
+/* In user-code, hamts are accessed through pointers to the opaque
+   Hamt type.  Two hamts are said to be the same if and only if their
+   pointers are equal. */
+typedef struct hamt Hamt;
+
+/*************/
+/* Interface */
+/*************/
+
+/* A hash function has to be pure, and two elements that compare equal
+   have to have the same hash value.  For a hash function to be a good
+   one, it is important that it uses all SIZE_WIDTH bits in
+   size_t.  */
+typedef size_t (Hamt_hasher) (const void *elt);
+
+/* A comparison function has to be pure, and two elements that have
+   equal pointers have to compare equal.  */
+typedef bool (Hamt_comparator) (const void *elt1, const void *elt2);
+
+/* A user-defined function that is called when the last hamt
+   containing a particular element is freed.  */
+typedef void (Hamt_freer) (Hamt_entry *elt);
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+/* Create and return a new and empty hash array mapped trie.  */
+extern Hamt *hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                          Hamt_freer *freer)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/* Return a copy of HAMT, which is not the same in the sense above.
+   This procedure can be used, for example, so that two threads can
+   access the same data independently.  */
+extern Hamt *hamt_copy (Hamt *hamt) _GL_ATTRIBUTE_NODISCARD;
+
+/* Free the resources solely allocated by HAMT and all elements solely
+   contained in it.  */
+extern void hamt_free (Hamt *hamt);
+
+/**********/
+/* Lookup */
+/**********/
+
+/* If ELT matches an entry in HAMT, return this entry.  Otherwise,
+   return NULL.  */
+extern Hamt_entry *hamt_lookup (const Hamt *hamt, const void *elt);
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   existing element and return the original hamt.  Otherwise, insert
+   *ELT_PTR into a copy of the hamt and return the copy.  */
+extern Hamt *hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+existing element, remove the element from a copy of the hamt and
+return the copy.  Otherwise, return the original hamt.  */
+extern Hamt *hamt_remove (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/* Insert *ELT_PTR into a copy of HAMT and return the copy.  If an
+   existing element was replaced, set *ELT_PTR to this element, and to
+   NULL otherwise.  */
+extern Hamt *hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/*************/
+/* Iteration */
+/*************/
+
+/* A processor function is called during walking of a hamt, which
+   returns true to continue the walking.  */
+typedef bool (Hamt_processor) (Hamt_entry *elt, void *data);
+
+/* Call PROC for every entry of the hamt until it returns false.  The
+   first argument to the processor is the entry, the second argument
+   is the value of DATA as received.  Return the number of calls that
+   returned true.  During processing, the hamt mustn't be
+   modified.  */
+extern size_t hamt_do_while (const Hamt *hamt, Hamt_processor *proc,
+                             void *data);
+
+/* An alternative interface to iterating through the entry of a hamt
+   that does not make use of a higher-order function like
+   hamt_do_while uses the Hamt_iterator type, which can be allocated
+   through automatic variables on the stack.  As a hamt iterator
+   operates on a copy of a hamt, the original hamt can modified
+   (including freeing it) without affecting the iterator.  */
+struct hamt_iterator
+{
+  Hamt* hamt;
+  int depth;
+  size_t path;
+  size_t position;
+  Hamt_entry *entry[_GL_HAMT_MAX_DEPTH + 1];
+};
+typedef struct hamt_iterator Hamt_iterator;
+
+/* Create of copy of HAMT and return an initialized ITER on the
+   copy.  */
+extern Hamt_iterator hamt_iterator (Hamt *hamt);
+
+/* Free the resources allocated for ITER, including the hamt copy
+   associated with it.  */
+extern void hamt_iterator_free (Hamt_iterator *iter);
+
+/* Return an independent copy of ITER that is initially in the same
+   state.  Any operation on the original iterator (including freeing
+   it) doesn't affect the iterator copy and vice versa.  */
+extern Hamt_iterator hamt_iterator_copy (Hamt_iterator *iter);
+
+/* Return true if ITER is not at the end, place the current element in
+   *ELT_PTR and move the iterator forward.  Otherwise, return
+   false.  */
+extern bool hamt_iterator_next (Hamt_iterator *iter,
+                                Hamt_entry **elt_ptr);
+
+/***********************/
+/* Destructive Updates */
+/***********************/
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return false.  Otherwise, insert *ELT_PTR
+   destructively into the hamt and return true.  */
+extern bool hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr);
+
+/* Insert ELT destructively into HAMT.  If an existing element was
+   replaced, return true.  Otherwise, return false.  */
+extern bool hamt_replace_x (Hamt *hamt, Hamt_entry *elt);
+
+/* If ELT matches an element already in HAMT, remove the element
+   destructively from the hamt and return true.  Otherwise, return
+   false.  */
+extern bool hamt_remove_x (Hamt *hamt, Hamt_entry *elt);
+
+_GL_INLINE_HEADER_END
+
+#endif /* _GL_HAMT_H */
diff --git a/modules/hamt b/modules/hamt
new file mode 100644
index 000000000..d73f09c2d
--- /dev/null
+++ b/modules/hamt
@@ -0,0 +1,29 @@
+Description:
+Persistent hash array mapped tries.
+
+Files:
+lib/hamt.h
+lib/hamt.c
+
+Depends-on:
+count-one-bits
+flexmember
+inttypes-incomplete
+stdbool
+stdint
+verify
+xalloc
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += hamt.c
+
+Include:
+"hamt.h"
+
+License:
+GPL
+
+Maintainer:
+Marc Nieper-Wisskirchen
diff --git a/modules/hamt-tests b/modules/hamt-tests
new file mode 100644
index 000000000..f4f0ea4e0
--- /dev/null
+++ b/modules/hamt-tests
@@ -0,0 +1,11 @@
+Files:
+tests/test-hamt.c
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-hamt
+check_PROGRAMS += test-hamt
diff --git a/tests/test-hamt.c b/tests/test-hamt.c
new file mode 100644
index 000000000..d9bac6479
--- /dev/null
+++ b/tests/test-hamt.c
@@ -0,0 +1,378 @@
+/* Test of persistent hash array mapped trie implementation.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.  */
+
+#include <config.h>
+
+#include "hamt.h"
+#include "macros.h"
+#include "xalloc.h"
+
+typedef struct
+{
+  Hamt_entry entry;
+  int val;
+} Element;
+
+static int
+entry_value (const void *elt)
+{
+  return ((Element *) elt)->val;
+}
+
+static size_t
+hash_element (const void *elt)
+{
+  return entry_value (elt) & ~3; /* We drop the last bits so that we
+                                    can test hash collisions. */
+}
+
+static bool
+compare_element (const void *elt1, const void *elt2)
+{
+  return entry_value (elt1) == entry_value (elt2);
+}
+
+static void
+free_element (Hamt_entry *elt)
+{
+  free (elt);
+}
+
+static Hamt_entry *
+make_element (int n)
+{
+  Element *elt = XMALLOC (Element);
+  elt->val = n;
+  return hamt_element (&elt->entry);
+}
+
+static Hamt *
+test_hamt_create (void)
+{
+  return hamt_create (hash_element, compare_element, free_element);
+}
+
+
+static int sum = 0;
+static int flag;
+
+static bool
+proc (Hamt_entry *elt, void *data)
+{
+  if (data == &flag)
+    {
+      sum += entry_value (elt);
+      return true;
+    }
+  if (sum > 0)
+    {
+      sum = 0;
+      return true;
+    }
+  return false;
+}
+
+static void
+test_general (void)
+{
+  Hamt *hamt = test_hamt_create ();
+
+  Hamt_entry *x5 = make_element (5);
+  Hamt_entry *p = x5;
+  Hamt *hamt1 = hamt_insert (hamt, &p);
+  ASSERT (hamt1 != hamt);
+  ASSERT (hamt_lookup (hamt, x5) == NULL);
+  ASSERT (hamt_lookup (hamt1, x5) == x5);
+  hamt_free (hamt);
+
+  Hamt_entry *y5 = make_element (5);
+  p = y5;
+  Hamt *hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 == hamt1);
+  ASSERT (p == x5);
+  ASSERT (hamt_lookup (hamt1, y5) == x5);
+
+  p = y5;
+  hamt = hamt_replace (hamt1, &p);
+  ASSERT (p == x5);
+  ASSERT (hamt_lookup (hamt, y5) == y5);
+  hamt_free (hamt);
+  y5 = make_element (5);
+
+  Hamt_entry *z37 = make_element (37);
+  p = z37;
+  hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 != hamt1);
+  ASSERT (p == z37);
+  ASSERT (hamt_lookup (hamt1, z37) == NULL);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+  hamt_free (hamt1);
+
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 2);
+  ASSERT (sum == 42);
+  ASSERT (hamt_do_while (hamt2, proc, NULL) == 1);
+  ASSERT (sum == 0);
+
+  p = y5;
+  hamt1 = hamt_remove (hamt2, &p);
+  ASSERT (hamt1 != hamt2);
+  ASSERT (p == x5);
+
+  ASSERT (hamt_lookup (hamt1, x5) == NULL);
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+
+  hamt_free (hamt1);
+  Hamt_entry *x4 = make_element (4);
+  hamt1 = hamt_insert (hamt2, &x4);
+  hamt_free (hamt2);
+  Hamt_entry *x6 = make_element (6);
+  hamt2 = hamt_insert (hamt1, &x6);
+  hamt_free (hamt1);
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum == 52);
+
+  hamt1 = hamt_remove (hamt2, &x4);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum = 52);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt1, proc, &flag) == 3);
+  ASSERT (sum  = 48);
+
+  hamt_free (hamt1);
+  hamt_free (hamt2);
+  free_element (y5);
+}
+
+static bool
+true_processor (_GL_ATTRIBUTE_MAYBE_UNUSED Hamt_entry *elt,
+                _GL_ATTRIBUTE_MAYBE_UNUSED void *data)
+{
+  return true;
+}
+
+static size_t
+element_count (Hamt *hamt)
+{
+  return hamt_do_while (hamt, true_processor, NULL);
+}
+
+struct find_values_context
+{
+  size_t n;
+  int *elts;
+  bool *found;
+};
+
+static bool
+find_values_processor (Hamt_entry *entry, void *data)
+{
+  struct find_values_context *ctx = data;
+  int val = entry_value (entry);
+  for (size_t i = 0; i < ctx->n; ++i)
+    if (ctx->elts [i] == val && !ctx->found [i])
+      {
+        ctx->found [i] = true;
+        return true;
+      }
+  return false;
+}
+
+static bool
+find_values (Hamt *hamt, size_t n, int *elts)
+{
+  bool *found = XCALLOC (n, bool);
+  struct find_values_context ctx = {n, elts, found};
+  bool res = hamt_do_while (hamt, find_values_processor, &ctx) == n;
+  free (found);
+  return res;
+}
+
+static size_t
+insert_values (Hamt **hamt, size_t n, int *elts, bool destructive)
+{
+  size_t cnt = 0;
+  for (size_t i = 0; i < n; ++i)
+    {
+      Hamt_entry *p = make_element (elts [i]);
+      Hamt_entry *q = p;
+      if (destructive)
+        {
+          if (hamt_insert_x (*hamt, &p))
+            ++cnt;
+          else
+            free_element (q);
+        }
+      else
+        {
+          Hamt *new_hamt = hamt_insert (*hamt, &p);
+          if (new_hamt != *hamt)
+            {
+              hamt_free (*hamt);
+              *hamt = new_hamt;
+              ++cnt;
+            }
+          else
+            {
+              free_element (q);
+            }
+        }
+    }
+  return cnt;
+}
+
+static size_t
+replace_values (Hamt **hamt, size_t n, int *elts, bool destructive)
+{
+  size_t cnt = 0;
+  for (size_t i = 0; i < n; ++i)
+    {
+      Hamt_entry *p = make_element (elts [i]);
+      if (destructive)
+        {
+          if (hamt_replace_x (*hamt, p))
+            ++cnt;
+        }
+      else
+        {
+          Hamt *new_hamt = hamt_replace (*hamt, &p);
+          hamt_free (*hamt);
+          *hamt = new_hamt;
+          if (p != NULL)
+            ++cnt;
+        }
+    }
+  return cnt;
+}
+
+static size_t
+remove_values (Hamt **hamt, size_t n, int *elts, bool destructive)
+{
+  size_t cnt = 0;
+  for (size_t i = 0; i < n; ++i)
+    {
+      Hamt_entry *p = make_element (elts [i]);
+      Hamt_entry *q = p;
+      if (destructive)
+        {
+          if (hamt_remove_x (*hamt, p))
+            ++cnt;
+        }
+      else
+        {
+          Hamt *new_hamt = hamt_remove (*hamt, &p);
+          if (new_hamt != *hamt)
+            {
+              hamt_free (*hamt);
+              *hamt = new_hamt;
+              ++cnt;
+            }
+        }
+      free (q);
+    }
+  return cnt;
+}
+
+static int val_array1 [10] = {1, 2, 3, 4, 33, 34, 35, 36, 1024, 1025};
+static int val_array2 [10] = {1, 2, 34, 36, 1025, 32768, 32769, 32770, 32771,
+                              32772};
+
+static void
+test_functional_update (void)
+{
+  Hamt *hamt = test_hamt_create ();
+
+  ASSERT (insert_values (&hamt, 10, val_array1, false) == 10);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (insert_values (&hamt, 10, val_array2, false) == 5);
+  ASSERT (element_count (hamt) == 15);
+  ASSERT (remove_values (&hamt, 10, val_array1, false) == 10);
+  ASSERT (element_count (hamt) == 5);
+  ASSERT (remove_values (&hamt, 10, val_array2, false) == 5);
+  ASSERT (element_count (hamt) == 0);
+
+  ASSERT (replace_values (&hamt, 10, val_array1, false) == 0);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (replace_values (&hamt, 10, val_array2, false) == 5);
+  ASSERT (element_count (hamt) == 15);
+
+  hamt_free (hamt);
+}
+
+static void
+test_destructive_update (void)
+{
+  Hamt *hamt = test_hamt_create ();
+
+  ASSERT (insert_values (&hamt, 10, val_array1, true) == 10);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (insert_values (&hamt, 10, val_array2, true) == 5);
+  ASSERT (element_count (hamt) == 15);
+  ASSERT (remove_values (&hamt, 10, val_array1, true) == 10);
+  ASSERT (element_count (hamt) == 5);
+  ASSERT (remove_values (&hamt, 10, val_array2, true) == 5);
+  ASSERT (element_count (hamt) == 0);
+
+  ASSERT (replace_values (&hamt, 10, val_array1, true) == 0);
+  ASSERT (element_count (hamt) == 10);
+  ASSERT (find_values (hamt, 10, val_array1));
+  ASSERT (replace_values (&hamt, 10, val_array2, true) == 5);
+  ASSERT (element_count (hamt) == 15);
+
+  hamt_free (hamt);
+}
+
+static void
+test_iterator (void)
+{
+  Hamt *hamt = test_hamt_create ();
+  ASSERT (insert_values (&hamt, 10, val_array1, true) == 10);
+  Hamt_iterator iter [1] = {hamt_iterator (hamt)};
+  size_t cnt = 0;
+  bool found [10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  Hamt_entry *p;
+  while (hamt_iterator_next (iter, &p))
+    {
+      for (size_t i = 0; i < 10; ++i)
+        if (val_array1 [i] == entry_value (p))
+          {
+            ASSERT (!found [i]);
+            found [i] = true;
+            ++cnt;
+            break;
+          }
+    }
+  ASSERT (cnt == 10);
+  hamt_iterator_free (iter);
+  hamt_free (hamt);
+}
+
+int
+main (void)
+{
+  test_general ();
+  test_functional_update ();
+  test_destructive_update ();
+  test_iterator ();
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* Re: Draft #3 (with iterators)
  2020-10-11 18:22                   ` Draft #3 (with iterators) Marc Nieper-Wißkirchen
@ 2020-10-11 19:09                     ` Bruno Haible
  2020-10-12  6:06                       ` Non-opaque hamt type? Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 19:09 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> I have implemented everything we have discussed today. The patch
> versus the current master is attached so it can be reviewed.
> 
> The changes versus the previous draft can be summarized as follows:
> 
> * Bug fixes.
> * Use _Atomic on GCC 4.9+.
> * Implement a lightweight iterator type akin to the iterators of gl_list.
> * Expose element initialization to the user so than an element can be
> inserted in more than one hamt.
> * Rename delete to remove.
> * Improve documentation.

OK for me. Thanks for having listened to the many remarks.

Awesome work!!

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: _Atomic
  2020-10-10 22:39         ` _Atomic Bruno Haible
@ 2020-10-11 20:15           ` Paul Eggert
  2020-10-11 21:47             ` _Atomic Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Paul Eggert @ 2020-10-11 20:15 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

On 10/10/20 3:39 PM, Bruno Haible wrote:
> GCC 4.9.x is such a compiler that is non-C11 but supports _Atomic.

Oh, I was thinking the other way: a compiler that says it's C11 but doesn't 
support _Atomic. I wrote my thoughts backwards, though.

If Gnulib code won't use _Atomic when compiled with GCC 4.9 (even though _Atomic 
would work), that doesn't sound like a big deal. For example, Debian 8 Jessie 
uses GCC 4.9 but long term support for Jessie ended in June and if Debian 
doesn't support it we don't need to worry about it.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: _Atomic
  2020-10-11 20:15           ` _Atomic Paul Eggert
@ 2020-10-11 21:47             ` Bruno Haible
  0 siblings, 0 replies; 45+ messages in thread
From: Bruno Haible @ 2020-10-11 21:47 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Paul Eggert wrote:
> Oh, I was thinking the other way: a compiler that says it's C11 but doesn't 
> support _Atomic.

I haven't found such a compiler in my testing [1]. If one appears, we can add
an ad-hoc test or an Autoconf test.

Bruno

[1] https://lists.gnu.org/archive/html/bug-gnulib/2020-10/msg00068.html



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2020-10-11 19:09                     ` Bruno Haible
@ 2020-10-12  6:06                       ` Marc Nieper-Wißkirchen
  2020-10-18 14:39                         ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-12  6:06 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

One final issue (I hope):

At the moment, the header file exposes an opaque struct Hamt and
communication with the code happens through (stack-allocated) pointers
to a Hamt. In the implementation, however, each Hamt just consists of
two pointers (a pointer to a function table and a pointer to the
root), so stack-allocating Hamts instead and passing them around by
values makes as much sense.

This would have the advantage of reducing heap allocation. The
disadvantage would be that adding further fields to a Hamt in future
extensions might be more problematic and that identity of Hamts cannot
be decided through pointer identity anymore (so the protocol how to
decide whether an element has been inserted or not has to be changed).

What do you think?

Am So., 11. Okt. 2020 um 21:09 Uhr schrieb Bruno Haible <bruno@clisp.org>:

> OK for me. Thanks for having listened to the many remarks.

Your remarks only helped to make the module better!

Marc


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2020-10-12  6:06                       ` Non-opaque hamt type? Marc Nieper-Wißkirchen
@ 2020-10-18 14:39                         ` Bruno Haible
  2020-10-18 15:29                           ` Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-18 14:39 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> At the moment, the header file exposes an opaque struct Hamt and
> communication with the code happens through (stack-allocated) pointers
> to a Hamt. In the implementation, however, each Hamt just consists of
> two pointers (a pointer to a function table and a pointer to the
> root), so stack-allocating Hamts instead and passing them around by
> values makes as much sense.
> 
> This would have the advantage of reducing heap allocation.

By how much does it reduce the heap allocation? Say, someone allocates
a HAMT and adds 100 entries. There will be heap allocations of ca. 5-10
buckets and internal nodes. Adding one more heap allocation for the
root object is not a tragedy.

So, in the end the question is whether to optimize the case of small HAMTs.

> The disadvantage would be that adding further fields to a Hamt in future
> extensions might be more problematic

True. But when this happens we can mitigate this through a warning in the
NEWS file.

> and that identity of Hamts cannot
> be decided through pointer identity anymore (so the protocol how to
> decide whether an element has been inserted or not has to be changed).

Does this protocol change introduce a significant slowdown?

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2020-10-18 14:39                         ` Bruno Haible
@ 2020-10-18 15:29                           ` Marc Nieper-Wißkirchen
  2020-10-18 17:58                             ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-18 15:29 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Am So., 18. Okt. 2020 um 16:39 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Hi Marc,
>
> > At the moment, the header file exposes an opaque struct Hamt and
> > communication with the code happens through (stack-allocated) pointers
> > to a Hamt. In the implementation, however, each Hamt just consists of
> > two pointers (a pointer to a function table and a pointer to the
> > root), so stack-allocating Hamts instead and passing them around by
> > values makes as much sense.
> >
> > This would have the advantage of reducing heap allocation.
>
> By how much does it reduce the heap allocation? Say, someone allocates
> a HAMT and adds 100 entries. There will be heap allocations of ca. 5-10
> buckets and internal nodes. Adding one more heap allocation for the
> root object is not a tragedy.

A struct with just two pointers is allocated on the heap, meaning it
is probably below the minimum malloc size on most implementations. In
architectures where structs of two pointers are passed by value in and
out of functions, a heap-allocated structure will mean one more
pointer indirection per hamt access.

> So, in the end the question is whether to optimize the case of small HAMTs.
>
> > The disadvantage would be that adding further fields to a Hamt in future
> > extensions might be more problematic
>
> True. But when this happens we can mitigate this through a warning in the
> NEWS file.
>
> > and that identity of Hamts cannot
> > be decided through pointer identity anymore (so the protocol how to
> > decide whether an element has been inserted or not has to be changed).
>
> Does this protocol change introduce a significant slowdown?

The existing protocol is as follows:

Hamt_entry *e = hamt_entry (...);
Hamt_entry *p = e;
Hamt *new_hamt = hamt_insert (old_hamt, &p);
if (old_hamt == new_hamt)
  {
    /* The element hasn't been insert as an equivalent element has
already been in the hamt. p now holds a reference to the entry that
already existed in the hamt.
    element_free (e);
    ...
    hamt_free (old_hamt); /* We don't have to free new_hamt because no
new hamt was created. */
  }
else
  {
    /* The element has been inserted. p hasn't changed. */
    ...
    hamt_free (old_hamt);  /* This frees all hamts */
    hamt_free (new_hamt); /* and all elements inserted, including e. */
  }

A protocol where no pointer values need to be compared could use p to
carry the information:

Hamt_entry *e = hamt_entry ();
Hamt_entry *p = e;
Hamt new_hamt = hamt_insert (old_hamt, &p);
if (p == e)
  {
    /* The element has been inserted. */
    ... /* See above. */
  }
else if (p == NULL)
  {
    /* The element e already existed in the hamt. */
   ... /* See above. */
  }
else /* p != e && p != NULL */
  {
    /* An element equivalent to e already existed in the hamt. p now
holds this element. */
    ... /* See above. */
  }

Marc


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2020-10-18 15:29                           ` Marc Nieper-Wißkirchen
@ 2020-10-18 17:58                             ` Bruno Haible
  2020-10-18 18:11                               ` Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Bruno Haible @ 2020-10-18 17:58 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Marc Nieper-Wißkirchen wrote:
> The existing protocol is as follows:
> 
> Hamt_entry *e = hamt_entry (...);
> Hamt_entry *p = e;
> Hamt *new_hamt = hamt_insert (old_hamt, &p);
> if (old_hamt == new_hamt)
>   {
>     /* The element hasn't been insert as an equivalent element has already been in the hamt. p now holds a reference to the entry that already existed in the hamt.
>     element_free (e);
>     ...
>     hamt_free (old_hamt); /* We don't have to free new_hamt because no new hamt was created. */
>   }
> else
>   {
>     /* The element has been inserted. p hasn't changed. */
>     ...
>     hamt_free (old_hamt);  /* This frees all hamts */
>     hamt_free (new_hamt); /* and all elements inserted, including e. */
>   }
> 
> A protocol where no pointer values need to be compared could use p to
> carry the information:
> 
> Hamt_entry *e = hamt_entry ();
> Hamt_entry *p = e;
> Hamt new_hamt = hamt_insert (old_hamt, &p);
> if (p == e)
>   {
>     /* The element has been inserted. */
>     ... /* See above. */
>   }
> else if (p == NULL)
>   {
>     /* The element e already existed in the hamt. */
>    ... /* See above. */
>   }
> else /* p != e && p != NULL */
>   {
>     /* An element equivalent to e already existed in the hamt. p now holds this element. */
>     ... /* See above. */
>   }

I find the latter protocol more robust: it does not depend on details of
the implementation of hamt_insert.

Can you decide on your original question (allow stack-allocated HAMTs)?
I don't feel I can help you decide this.

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2020-10-18 17:58                             ` Bruno Haible
@ 2020-10-18 18:11                               ` Marc Nieper-Wißkirchen
  2021-04-03  9:08                                 ` Marc Nieper-Wißkirchen
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-18 18:11 UTC (permalink / raw)
  To: Bruno Haible; +Cc: Marc Nieper-Wißkirchen, bug-gnulib

Okay, if you find the latter protocol better anyway, I will switch to
this protocol, and hamts will be stack-allocated (just two words) and
passed by value.

Thanks,

Marc

Am So., 18. Okt. 2020 um 19:58 Uhr schrieb Bruno Haible <bruno@clisp.org>:
>
> Marc Nieper-Wißkirchen wrote:
> > The existing protocol is as follows:
> >
> > Hamt_entry *e = hamt_entry (...);
> > Hamt_entry *p = e;
> > Hamt *new_hamt = hamt_insert (old_hamt, &p);
> > if (old_hamt == new_hamt)
> >   {
> >     /* The element hasn't been insert as an equivalent element has already been in the hamt. p now holds a reference to the entry that already existed in the hamt.
> >     element_free (e);
> >     ...
> >     hamt_free (old_hamt); /* We don't have to free new_hamt because no new hamt was created. */
> >   }
> > else
> >   {
> >     /* The element has been inserted. p hasn't changed. */
> >     ...
> >     hamt_free (old_hamt);  /* This frees all hamts */
> >     hamt_free (new_hamt); /* and all elements inserted, including e. */
> >   }
> >
> > A protocol where no pointer values need to be compared could use p to
> > carry the information:
> >
> > Hamt_entry *e = hamt_entry ();
> > Hamt_entry *p = e;
> > Hamt new_hamt = hamt_insert (old_hamt, &p);
> > if (p == e)
> >   {
> >     /* The element has been inserted. */
> >     ... /* See above. */
> >   }
> > else if (p == NULL)
> >   {
> >     /* The element e already existed in the hamt. */
> >    ... /* See above. */
> >   }
> > else /* p != e && p != NULL */
> >   {
> >     /* An element equivalent to e already existed in the hamt. p now holds this element. */
> >     ... /* See above. */
> >   }
>
> I find the latter protocol more robust: it does not depend on details of
> the implementation of hamt_insert.
>
> Can you decide on your original question (allow stack-allocated HAMTs)?
> I don't feel I can help you decide this.
>
> Bruno
>


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2020-10-18 18:11                               ` Marc Nieper-Wißkirchen
@ 2021-04-03  9:08                                 ` Marc Nieper-Wißkirchen
  2021-04-03 10:26                                   ` Bruno Haible
  0 siblings, 1 reply; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2021-04-03  9:08 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib, Bruno Haible

[-- Attachment #1: Type: text/plain, Size: 3308 bytes --]

Please excuse the delay in finalizing the new module. I was distracted due
to the start of the semester in October last year and then forgot to finish
my work.

To summarize, I have finally come to the conclusion not to change the API
as theorized in this thread.

First of all, the benefits of making the hamt type non-opaque are too small
compared with the possible drawbacks (the non-opaqueness, the inability to
return NULL in future API extensions, etc.).

Secondly, after having given it some more thought, the alternative protocol
(which we have called more robust) seems to be harder to understand because
"p != e" could then mean two different things. So I will leave the original
protocol in place, which is easy to comprehend: If "old_hamt == new_hamt",
no insertion has taken place and one has manually free the element one has
attempted to insert. If "old_hamt != new_hamt" the element has been
inserted and has now to eventually free "new_hamt" besides "old_hamt".

After I have rebased my code to HEAD, I will commit the new module to
Gnulib.

Thank you for your patience.

Marc

Am So., 18. Okt. 2020 um 20:11 Uhr schrieb Marc Nieper-Wißkirchen <
marc.nieper+gnu@gmail.com>:

> Okay, if you find the latter protocol better anyway, I will switch to
> this protocol, and hamts will be stack-allocated (just two words) and
> passed by value.
>
> Thanks,
>
> Marc
>
> Am So., 18. Okt. 2020 um 19:58 Uhr schrieb Bruno Haible <bruno@clisp.org>:
> >
> > Marc Nieper-Wißkirchen wrote:
> > > The existing protocol is as follows:
> > >
> > > Hamt_entry *e = hamt_entry (...);
> > > Hamt_entry *p = e;
> > > Hamt *new_hamt = hamt_insert (old_hamt, &p);
> > > if (old_hamt == new_hamt)
> > >   {
> > >     /* The element hasn't been insert as an equivalent element has
> already been in the hamt. p now holds a reference to the entry that already
> existed in the hamt.
> > >     element_free (e);
> > >     ...
> > >     hamt_free (old_hamt); /* We don't have to free new_hamt because no
> new hamt was created. */
> > >   }
> > > else
> > >   {
> > >     /* The element has been inserted. p hasn't changed. */
> > >     ...
> > >     hamt_free (old_hamt);  /* This frees all hamts */
> > >     hamt_free (new_hamt); /* and all elements inserted, including e. */
> > >   }
> > >
> > > A protocol where no pointer values need to be compared could use p to
> > > carry the information:
> > >
> > > Hamt_entry *e = hamt_entry ();
> > > Hamt_entry *p = e;
> > > Hamt new_hamt = hamt_insert (old_hamt, &p);
> > > if (p == e)
> > >   {
> > >     /* The element has been inserted. */
> > >     ... /* See above. */
> > >   }
> > > else if (p == NULL)
> > >   {
> > >     /* The element e already existed in the hamt. */
> > >    ... /* See above. */
> > >   }
> > > else /* p != e && p != NULL */
> > >   {
> > >     /* An element equivalent to e already existed in the hamt. p now
> holds this element. */
> > >     ... /* See above. */
> > >   }
> >
> > I find the latter protocol more robust: it does not depend on details of
> > the implementation of hamt_insert.
> >
> > Can you decide on your original question (allow stack-allocated HAMTs)?
> > I don't feel I can help you decide this.
> >
> > Bruno
> >
>

[-- Attachment #2: Type: text/html, Size: 4998 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Non-opaque hamt type?
  2021-04-03  9:08                                 ` Marc Nieper-Wißkirchen
@ 2021-04-03 10:26                                   ` Bruno Haible
  0 siblings, 0 replies; 45+ messages in thread
From: Bruno Haible @ 2021-04-03 10:26 UTC (permalink / raw)
  To: Marc Nieper-Wißkirchen; +Cc: bug-gnulib

Hi Marc,

> Secondly, after having given it some more thought, the alternative protocol
> (which we have called more robust) seems to be harder to understand because
> "p != e" could then mean two different things. So I will leave the original
> protocol in place, which is easy to comprehend: If "old_hamt == new_hamt",
> no insertion has taken place and one has manually free the element one has
> attempted to insert. If "old_hamt != new_hamt" the element has been
> inserted and has now to eventually free "new_hamt" besides "old_hamt".

Yes, an API with 3 possible outcomes is easier to use incorrectly than an
API with 2 possible outcomes.

> After I have rebased my code to HEAD, I will commit the new module to
> Gnulib.

Yes please. Thank you for your great contribution!!

Bruno



^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2021-04-03 10:27 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-09 21:07 [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
2020-10-10 14:35 ` Bruno Haible
2020-10-10 14:46   ` Marc Nieper-Wißkirchen
2020-10-10 17:34     ` Bruno Haible
2020-10-10 14:54 ` Bruno Haible
2020-10-10 15:01   ` Marc Nieper-Wißkirchen
2020-10-10 15:04     ` Marc Nieper-Wißkirchen
2020-10-10 17:41       ` Bruno Haible
2020-10-10 17:49         ` Marc Nieper-Wißkirchen
2020-10-10 18:19       ` Paul Eggert
2020-10-10 21:24         ` Marc Nieper-Wißkirchen
2020-10-10 21:46           ` Marc Nieper-Wißkirchen
2020-10-11  1:28             ` Bruno Haible
2020-10-11  8:20               ` Marc Nieper-Wißkirchen
2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
2020-10-11 11:02                   ` HAMT iterator Bruno Haible
2020-10-11 11:08                     ` Marc Nieper-Wißkirchen
2020-10-11 12:04                       ` Bruno Haible
2020-10-11 12:25                         ` Marc Nieper-Wißkirchen
2020-10-11 13:52                           ` Bruno Haible
2020-10-11 12:14                       ` Bruno Haible
2020-10-11 12:22                         ` Marc Nieper-Wißkirchen
2020-10-11 10:29                 ` HAMT iterators Bruno Haible
2020-10-11 12:44                   ` Marc Nieper-Wißkirchen
2020-10-11 13:47                     ` Bruno Haible
2020-10-11 10:53                 ` out-of-memory handling Bruno Haible
2020-10-11 11:07                   ` Marc Nieper-Wißkirchen
2020-10-11 11:56                     ` Bruno Haible
2020-10-11 12:20                       ` Marc Nieper-Wißkirchen
2020-10-11 14:01                         ` HAMT for gl_set and gl_map Bruno Haible
2020-10-11 17:32                 ` [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
2020-10-11 18:22                   ` Draft #3 (with iterators) Marc Nieper-Wißkirchen
2020-10-11 19:09                     ` Bruno Haible
2020-10-12  6:06                       ` Non-opaque hamt type? Marc Nieper-Wißkirchen
2020-10-18 14:39                         ` Bruno Haible
2020-10-18 15:29                           ` Marc Nieper-Wißkirchen
2020-10-18 17:58                             ` Bruno Haible
2020-10-18 18:11                               ` Marc Nieper-Wißkirchen
2021-04-03  9:08                                 ` Marc Nieper-Wißkirchen
2021-04-03 10:26                                   ` Bruno Haible
2020-10-11 14:14             ` terminology Bruno Haible
2020-10-11 14:20               ` terminology Marc Nieper-Wißkirchen
2020-10-10 22:39         ` _Atomic Bruno Haible
2020-10-11 20:15           ` _Atomic Paul Eggert
2020-10-11 21:47             ` _Atomic Bruno Haible

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).