bug-gnulib@gnu.org mirror (unofficial)
 help / color / mirror / Atom feed
* [New module] Persistent Hash Array Mapped Tries (HAMTs)
@ 2020-10-09 21:07 Marc Nieper-Wißkirchen
  2020-10-10 14:35 ` Bruno Haible
  2020-10-10 14:54 ` Bruno Haible
  0 siblings, 2 replies; 45+ messages in thread
From: Marc Nieper-Wißkirchen @ 2020-10-09 21:07 UTC (permalink / raw)
  To: bug-gnulib

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

Hi,

after I have contributed two comparably trivial modules to Gnulib, I
would like to contribute a less trivial module this time. It
implements a persistent version of Phil Bagwell's HAMTs, which has
been popularized by Clojure. HAMTs can be used when a persistent
(functional/pure) version of a data structure akin to hash tables is
needed. For example, the dynamic environment of a (possibly
multi-threaded) Lisp or Scheme can be modeled with persistent HAMTs.

Please take a look at the attached patch.

Thank you,

Marc

[-- Attachment #2: 0001-hamt-New-module.patch --]
[-- Type: text/x-patch, Size: 32489 bytes --]

From 39a1ac1f78c8d00b1dfad4f260318a6fb5cf5584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Nieper-Wi=C3=9Fkirchen?= <marc@nieper-wisskirchen.de>
Date: Wed, 7 Oct 2020 09:53:48 +0200
Subject: [PATCH] hamt: New module.

This module provides (persistent) hash array mapped tries.

* MODULES.html.sh: Add hamt.
* lib/hamt.c, lib/hamt.h, modules/hamt, modules/hamt-tests,
tests/test-hamt.c: New files.
---
 MODULES.html.sh    |   1 +
 lib/hamt.c         | 812 +++++++++++++++++++++++++++++++++++++++++++++
 lib/hamt.h         |  97 ++++++
 modules/hamt       |  29 ++
 modules/hamt-tests |  11 +
 tests/test-hamt.c  | 148 +++++++++
 6 files changed, 1098 insertions(+)
 create mode 100644 lib/hamt.c
 create mode 100644 lib/hamt.h
 create mode 100644 modules/hamt
 create mode 100644 modules/hamt-tests
 create mode 100644 tests/test-hamt.c

diff --git a/MODULES.html.sh b/MODULES.html.sh
index a8a629e29..b48ca2bc4 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2028,6 +2028,7 @@ func_all_modules ()
   func_module hash-pjw
   func_module hash-pjw-bare
   func_module hash
+  func_module hamt
   func_module readline
   func_module readtokens
   func_module readtokens0
diff --git a/lib/hamt.c b/lib/hamt.c
new file mode 100644
index 000000000..4876d5809
--- /dev/null
+++ b/lib/hamt.c
@@ -0,0 +1,812 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include "hamt.h"
+
+#include <flexmember.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "count-one-bits.h"
+#include "verify.h"
+#include "xalloc.h"
+
+/* See: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience
+   Department, École Polytechnique Fédérale de Lausanne.
+
+   http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf
+
+   We implement a persistent version of hash array mapped tires.  Each
+   updating operation returns a new hamt, which shares structure with
+   the original one.  If persistence is not needed, transient hash
+   tables are probably faster. */
+
+typedef
+#ifdef GL_HAMT_THREAD_SAFE
+_Atomic
+#endif
+size_t ref_counter;
+
+/* Hash values are of type size_t.  For each level of the trie, we use
+   5 bits (corresponding to lg2 of the width of a 32-bit word.  */
+#define MAX_DEPTH ((SIZE_WIDTH + 4) / 5)
+
+/***************/
+/* Entry Types */
+/***************/
+
+/* Leaf nodes are of type element.  Non-leaf nodes are either
+   subtries or, if at maximal depth, buckets.  */
+enum entry_type
+{
+  element_entry = 0,
+  subtrie_entry = 1,
+  bucket_entry = 2
+};
+
+/* Return the type an entry.  */
+static enum entry_type
+entry_type (const Hamt_entry *entry)
+{
+  return entry->ref_count & 3;
+}
+
+/********************/
+/* Reference Counts */
+/********************/
+
+/* Initialize the reference counter, storing its type.  */
+static void
+init_ref_counter (ref_counter *counter, enum entry_type type)
+{
+  *counter = 4 + type;
+}
+
+/* Increase the reference counter of an entry.  */
+static void
+inc_ref_counter (ref_counter *counter)
+{
+  *counter += 4;
+}
+
+/* Decrease the entry reference counter.  Return false if the entry
+   can be deleted.  */
+static bool
+dec_ref_counter (ref_counter *counter)
+{
+  *counter -= 4;
+  return *counter >= 4;
+}
+
+/**************/
+/* Structures */
+/**************/
+
+/* Different generations of a hamt share a function table.  */
+struct function_table
+{
+  Hamt_hasher *hasher;
+  Hamt_comparator *comparator;
+  Hamt_freer *freer;
+  ref_counter ref_count;
+};
+
+/* Different generations of a hamt share subtries.  A singleton
+   subtrie is modelled as a single element.  */
+struct subtrie
+{
+  ref_counter ref_count;
+  /* Nodes carry labels from 0 to 31.  The i-th bit in map is set if
+     the node labelled i is present.  */
+  uint32_t map;
+  Hamt_entry *nodes [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* Buckets are used when different elements have the same hash values.  */
+struct bucket
+{
+  ref_counter ref_counter;
+  size_t elt_count;
+  Hamt_entry *elts [FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* A hamt consists of its function table and the root entry.  */
+struct hamt
+{
+  struct function_table *functions;
+  /* The root entry is NULL for an empty HAMT.  */
+  Hamt_entry *root;
+};
+
+/*******************/
+/* Function Tables */
+/*******************/
+
+/* Allocate and initialize a function table.  */
+static struct function_table *
+create_function_table (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                       Hamt_freer *freer)
+{
+  struct function_table *functions = XMALLOC (struct function_table);
+  functions->hasher = hasher;
+  functions->comparator = comparator;
+  functions->freer = freer;
+  functions->ref_count = 1;
+  return functions;
+}
+
+/* Increment the reference count and return the function table. */
+static struct function_table *
+copy_function_table (struct function_table *function_table)
+{
+  ++function_table->ref_count;
+  return function_table;
+}
+
+/* Decrease the reference count and free the function table if the
+   reference count drops to zero.  */
+static void
+free_function_table (struct function_table *function_table)
+{
+  if (--function_table->ref_count)
+    return;
+  free (function_table);
+}
+
+/************/
+/* Elements */
+/************/
+
+/* Return an element's hash.  */
+static size_t
+hash_element (const struct function_table *functions, const Hamt_entry *elt)
+{
+  return functions->hasher (elt);
+}
+
+/* Compare two elements.  */
+static bool
+compare_elements (const struct function_table *functions,
+                  const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return functions->comparator (elt1, elt2);
+}
+
+/* Free an element.  */
+static void
+free_element (const struct function_table *functions, Hamt_entry *elt)
+{
+  if (dec_ref_counter (&elt->ref_count))
+    return;
+  functions->freer (elt);
+}
+
+/* Return the initialized element.  */
+static Hamt_entry *
+init_element (Hamt_entry *elt)
+{
+  init_ref_counter (&elt->ref_count, element_entry);
+  return elt;
+}
+
+/***********/
+/* Buckets */
+/***********/
+
+/* Allocate a partially initialized bucket with a given number of elements.  */
+static struct bucket *
+alloc_bucket (size_t elt_count)
+{
+  struct bucket *bucket
+    = xmalloc (FLEXSIZEOF (struct bucket, elts,
+                           sizeof (Hamt_entry) * elt_count));
+  init_ref_counter (&bucket->ref_counter, bucket_entry);
+  bucket->elt_count = elt_count;
+  return bucket;
+}
+
+/***********/
+/* Entries */
+/***********/
+
+/* Calculate and return the number of nodes in a subtrie.  */
+static int
+trienode_count (const struct subtrie *subtrie)
+{
+  return count_one_bits_l (subtrie->map);
+}
+
+/* Allocate a partially initialized subtrie with a given number of nodes.  */
+static struct subtrie *
+alloc_subtrie (int node_count)
+{
+  struct subtrie *subtrie
+    = xmalloc (FLEXSIZEOF (struct subtrie, nodes,
+                           sizeof (Hamt_entry) * node_count));
+  init_ref_counter (&subtrie->ref_count, subtrie_entry);
+  return subtrie;
+}
+
+/* Return a conceptually copy of an entry.  */
+static Hamt_entry *
+copy_entry (Hamt_entry *entry)
+{
+  inc_ref_counter (&entry->ref_count);
+  return entry;
+}
+
+/* Return a new subtrie that has the j-th node replaced.  */
+static struct subtrie *
+replace_entry (struct subtrie *subtrie, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie);
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map;
+  for (int k = 0; k < n; ++k)
+    {
+      if (k == j)
+        new_subtrie->nodes [k] = entry;
+      else
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+    }
+  return new_subtrie;
+}
+
+/* Return a new subtrie that has an entry labelled i inserted at
+   the j-th position.  */
+static struct subtrie *
+insert_entry (struct subtrie *subtrie, int i, int j, Hamt_entry *entry)
+{
+  int n = trienode_count (subtrie) + 1;
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map | (1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k > j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k - 1]);
+      else
+        new_subtrie->nodes [k] = entry;
+    }
+  return new_subtrie;
+}
+
+/* Return a new entry that has the entry labelled i removed from
+   position j.  */
+static Hamt_entry *
+remove_subtrie_entry (struct subtrie *subtrie, int i, int j)
+{
+  int n = trienode_count (subtrie) - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (subtrie->nodes [1]);
+      return copy_entry (subtrie->nodes [0]);
+    }
+  struct subtrie *new_subtrie = alloc_subtrie (n);
+  new_subtrie->map = subtrie->map & ~(1 << i);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k]);
+      else if (k >= j)
+        new_subtrie->nodes [k] = copy_entry (subtrie->nodes [k + 1]);
+    }
+  return (Hamt_entry *) new_subtrie;
+}
+
+/* Return a new entry that has the entry at position j removed.  */
+static Hamt_entry *
+remove_bucket_entry (struct bucket *bucket, int j)
+{
+  int n = bucket->elt_count - 1;
+  if (n == 1)
+    {
+      if (j == 0)
+        return copy_entry (bucket->elts [1]);
+      return copy_entry (bucket->elts [0]);
+    }
+  struct bucket *new_bucket = alloc_bucket (n);
+  for (int k = 0; k < n; ++k)
+    {
+      if (k < j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k]);
+      else if (k >= j)
+        new_bucket->elts [k] = copy_entry (bucket->elts [k + 1]);
+    }
+  return (Hamt_entry *) new_bucket;
+}
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+/* Create a new, empty hash array mapped trie.  */
+Hamt *
+hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+             Hamt_freer *freer)
+{
+  struct function_table *functions
+    = create_function_table (hasher, comparator, freer);
+  Hamt *hamt = XMALLOC (Hamt);
+  hamt->functions = functions;
+  hamt->root = NULL;
+  return hamt;
+}
+
+/* Return a copy of the hamt.  */
+Hamt *
+hamt_copy (Hamt *hamt)
+{
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = hamt->root == NULL ? NULL : copy_entry (hamt->root);
+  return new_hamt;
+}
+
+/* Free a bucket.  */
+static void
+free_bucket (struct function_table const *functions, struct bucket *bucket)
+{
+  if (dec_ref_counter (&bucket->ref_counter))
+    return;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    free_element (functions, elts [i]);
+  free (bucket);
+}
+
+/* Forward declaration.  */
+static void free_subtrie (struct function_table const *functions,
+                          struct subtrie *subtrie);
+
+/* Free an entry.  */
+static void
+free_entry (struct function_table const *functions, Hamt_entry *entry)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      free_element (functions, entry);
+      break;
+    case subtrie_entry:
+      free_subtrie (functions, (struct subtrie *) entry);
+      break;
+    case bucket_entry:
+      free_bucket (functions, (struct bucket *) entry);
+      break;
+    default:
+      assume (0);
+    }
+}
+
+/* Free a trie recursively.  */
+static void
+free_subtrie (struct function_table const *functions, struct subtrie *subtrie)
+{
+  if (dec_ref_counter (&subtrie->ref_count))
+    return;
+  int n = trienode_count (subtrie);
+  Hamt_entry **node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    free_entry (functions, *node_ptr++);
+  free (subtrie);
+}
+
+/* Free a hamt.  */
+void
+hamt_free (Hamt *hamt)
+{
+  if (hamt->root != NULL)
+    free_entry (hamt->functions, hamt->root);
+  free_function_table (hamt->functions);
+  free (hamt);
+}
+
+/**********/
+/* Lookup */
+/**********/
+
+/* Lookup an element in a bucket.  */
+static const Hamt_entry *
+bucket_lookup (const struct function_table *functions,
+               const struct bucket *bucket, const Hamt_entry *elt)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, elt, *elts))
+        return *elts;
+      ++elts;
+    }
+  return NULL;
+}
+
+/* Forward declaration.  */
+static const Hamt_entry *entry_lookup (const struct function_table *functions,
+                                       const Hamt_entry *entry,
+                                       const Hamt_entry *elt, size_t hash);
+
+/* Lookup an element in a bucket.  */
+static const Hamt_entry *
+subtrie_lookup (const struct function_table *functions,
+                const struct subtrie *subtrie, const Hamt_entry *elt,
+                size_t hash)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+
+  if (! (map & (1 << i)))
+    return NULL;
+
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  return entry_lookup (functions, subtrie->nodes [j], elt, hash >> 5);
+}
+
+/* Lookup an element in an entry.  */
+static const Hamt_entry *
+entry_lookup (const struct function_table *functions, const Hamt_entry *entry,
+              const Hamt_entry *elt, size_t hash)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, elt, entry))
+        return entry;
+      return NULL;
+    case subtrie_entry:
+      return subtrie_lookup (functions, (struct subtrie *) entry, elt, hash);
+    case bucket_entry:
+      return bucket_lookup (functions, (struct bucket *) entry, elt);
+    default:
+      assume (0);
+    }
+}
+
+/* If ELEMENT matches an entry in HAMT, return this entry.  Otherwise,
+   return NULL.  */
+const Hamt_entry *
+hamt_lookup (const Hamt *hamt, const Hamt_entry *elt)
+{
+  if (hamt->root == NULL)
+    return NULL;
+
+  return entry_lookup (hamt->functions, hamt->root, elt,
+                       hash_element (hamt->functions, elt));
+}
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+/* Create a bucket populated with two elements.  */
+static struct bucket *
+create_populated_bucket (Hamt_entry *elt1, Hamt_entry *elt2)
+{
+  struct bucket *bucket = alloc_bucket (2);
+  bucket->elts [0] = elt1;
+  bucket->elts [1] = elt2;
+  return bucket;
+}
+
+/* Create a chain of subtrie nodes so that the resulting trie is
+   populated with exactly two elements.  */
+static Hamt_entry *
+create_populated_subtrie (Hamt_entry *elt1, Hamt_entry *elt2, size_t hash1,
+                          size_t hash2, int depth)
+{
+  if (depth >= MAX_DEPTH)
+    return (Hamt_entry *) create_populated_bucket (elt1, elt2);
+
+  struct subtrie *subtrie;
+  int i1 = hash1 & 31;
+  int i2 = hash2 & 31;
+  if (i1 != i2)
+    {
+      subtrie = alloc_subtrie (2);
+      subtrie->map = (1 << i1) | (1 << i2);
+      if (i1 < i2)
+        {
+          subtrie->nodes [0] = elt1;
+          subtrie->nodes [1] = elt2;
+        }
+      else
+        {
+          subtrie->nodes [0] = elt2;
+          subtrie->nodes [1] = elt1;
+        }
+    }
+  else
+    {
+      subtrie = alloc_subtrie (1);
+      subtrie->map = 1 << i1;
+      subtrie->nodes [0]
+        = create_populated_subtrie (elt1, elt2, hash1 >> 5, hash2 >> 5,
+                                    depth + 1);
+    }
+  return (Hamt_entry *) subtrie;
+}
+
+/* Insert an element in a bucket if not already present.  */
+static struct bucket *
+bucket_insert (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, *elts))
+        {
+          *elt_ptr = *elts;
+          return bucket;
+        }
+      ++elts;
+    }
+  struct bucket *new_bucket = alloc_bucket (elt_count + 1);
+  new_bucket->elts [0] = init_element (*elt_ptr);
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      new_bucket->elts [i + 1] = copy_entry (bucket->elts [i]);
+    }
+  return new_bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_insert (const struct function_table *functions,
+                                 Hamt_entry *subtrie, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth);
+
+/* Insert an element in a subtrie if not already present.  */
+static struct subtrie *
+subtrie_insert (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_insert (functions, entry, elt_ptr, hash >> 5, depth + 1);
+      if (new_entry != entry)
+        return replace_entry (subtrie, j, new_entry);
+      return subtrie;
+    }
+  return insert_entry (subtrie, i, j, init_element (*elt_ptr));
+}
+
+/* Insert an element in an entry if not already present.  */
+static Hamt_entry *
+entry_insert (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          *elt_ptr = entry;
+          return entry;
+        }
+      return create_populated_subtrie
+        (init_element (*elt_ptr), copy_entry (entry), hash,
+         hash_element (functions, entry) >> (5 * depth), depth);
+    case subtrie_entry:
+      return (Hamt_entry *)
+        subtrie_insert (functions, (struct subtrie *) entry, elt_ptr, hash,
+                        depth);
+    case bucket_entry:
+      return (Hamt_entry *)
+        bucket_insert (functions, (struct bucket *) entry, elt_ptr);
+    default:
+      assume (0);
+    }
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+   element from the table and return HAMT.  Otherwise, insert *ELT_PTR
+   into a copy of the HAMT and return the copy.  */
+Hamt *
+hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  Hamt_entry *new_entry;
+
+  if (hamt->root == NULL)
+    new_entry = init_element (*elt_ptr);
+  else
+    new_entry =  entry_insert (hamt->functions, hamt->root, elt_ptr,
+                               hash_element (hamt->functions, *elt_ptr), 0);
+
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/* Delete an element in a bucket if found.  */
+static Hamt_entry *
+bucket_delete (const struct function_table *functions, struct bucket *bucket,
+               Hamt_entry **elt_ptr)
+{
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      if (compare_elements (functions, *elt_ptr, elts [i]))
+        {
+          *elt_ptr = elts [i];
+          return remove_bucket_entry (bucket, i);
+        }
+    }
+  return (Hamt_entry *) bucket;
+}
+
+/* Forward declaration.  */
+static Hamt_entry *entry_delete (const struct function_table *functions,
+                                 Hamt_entry *entry, Hamt_entry **elt_ptr,
+                                 size_t hash, int depth);
+
+/* Delete an element in a subtrie if found.  */
+static Hamt_entry *
+subtrie_delete (const struct function_table *functions, struct subtrie *subtrie,
+                Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  uint32_t map = subtrie->map;
+  int i = hash & 31;
+  int j = i == 0 ? 0 : count_one_bits (map << (32 - i));
+  if (map & (1 << i))
+    {
+      Hamt_entry *entry = subtrie->nodes [j];
+      Hamt_entry *new_entry
+        = entry_delete (functions, entry, elt_ptr, hash >> 5, depth + 1);
+      if (new_entry == NULL)
+        return remove_subtrie_entry (subtrie, i, j);
+      if (new_entry != entry)
+        return (Hamt_entry *) replace_entry (subtrie, j, new_entry);
+      return (Hamt_entry *) subtrie;
+    }
+  return (Hamt_entry *) subtrie;
+}
+
+/* Delete an element in an entry if found.  */
+static Hamt_entry *
+entry_delete (const struct function_table *functions, Hamt_entry *entry,
+              Hamt_entry **elt_ptr, size_t hash, int depth)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      if (compare_elements (functions, *elt_ptr, entry))
+        {
+          *elt_ptr = entry;
+          return NULL;
+        }
+      return entry;
+    case subtrie_entry:
+      return subtrie_delete (functions, (struct subtrie *) entry, elt_ptr, hash,
+                             depth);
+    case bucket_entry:
+      return bucket_delete (functions, (struct bucket *) entry, elt_ptr);
+    default:
+      assume (0);
+    }
+}
+
+/* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
+element from the table, remove the element from a copy of the hamt and
+return the copy.  Otherwise, return HAMT.  */
+Hamt *
+hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr)
+{
+  if (hamt->root == NULL)
+    return hamt;
+
+  Hamt_entry *new_entry
+    = entry_delete (hamt->functions, hamt->root, elt_ptr,
+                    hash_element (hamt->functions, *elt_ptr), 0);
+  if (new_entry == hamt->root)
+    return hamt;
+
+  Hamt *new_hamt = XMALLOC (Hamt);
+  new_hamt->functions = copy_function_table (hamt->functions);
+  new_hamt->root = new_entry;
+  return new_hamt;
+}
+
+/*************/
+/* Iteration */
+/*************/
+
+/* Walk a bucket.  */
+static size_t
+bucket_do_while (const struct bucket *bucket, Hamt_processor *proc, void *data,
+                 bool *success)
+{
+  size_t cnt = 0;
+  size_t elt_count = bucket->elt_count;
+  Hamt_entry *const *elts = bucket->elts;
+  for (size_t i = 0; i < elt_count; ++i)
+    {
+      *success = proc (elts [i], data);
+      if (!success)
+        return cnt;
+      ++cnt;
+    }
+  return cnt;
+}
+
+/* Forward declaration.  */
+static size_t entry_do_while (const Hamt_entry *entry, Hamt_processor *proc,
+                              void *data, bool *success);
+
+/* Walk a subtrie.  */
+static size_t subtrie_do_while (const struct subtrie *subtrie,
+                                Hamt_processor *proc, void *data, bool *success)
+{
+  size_t cnt = 0;
+  int n = trienode_count (subtrie);
+  Hamt_entry *const *node_ptr = subtrie->nodes;
+  for (int j = 0; j < n; ++j)
+    {
+      cnt += entry_do_while (*node_ptr++, proc, data, success);
+      if (!success)
+        return cnt;
+    }
+  return cnt;
+}
+
+/* Walk an entry.  */
+static size_t
+entry_do_while (const Hamt_entry *entry, Hamt_processor *proc, void *data,
+                bool *success)
+{
+  switch (entry_type (entry))
+    {
+    case element_entry:
+      *success = proc (entry, data);
+      return *success ? 1 : 0;
+    case subtrie_entry:
+      return subtrie_do_while ((struct subtrie *) entry, proc, data, success);
+    case bucket_entry:
+      return bucket_do_while ((struct bucket *) entry, proc, data, success);
+    default:
+      assume (0);
+    }
+}
+
+/* Call PROC for every entry of the hamt until it returns false.  The
+   first argument of PROC is the entry, the second entry is the value
+   of DATA as received.  Return the number of calls that returned
+   true.  */
+size_t
+hamt_do_while (const Hamt *hamt, Hamt_processor *proc, void *data)
+{
+  if (hamt->root == NULL)
+    return 0;
+
+  bool success = true;
+  return entry_do_while (hamt->root, proc, data, &success);
+}
diff --git a/lib/hamt.h b/lib/hamt.h
new file mode 100644
index 000000000..bd4a2454b
--- /dev/null
+++ b/lib/hamt.h
@@ -0,0 +1,97 @@
+/* (Persistent) hash array mapped tries.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020. */
+
+#ifndef _GL_HAMT_H
+#define _GL_HAMT_H
+
+/* The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
+   is thread-safe as long as two threads do not simultaneously access
+   the same hamt.  */
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L \
+  && !defined (__STD_NO_ATOMICS__)
+# define GL_HAMT_THREAD_SAFE 1
+#else
+# define GL_HAMT_THREAD_SAFE 0
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* A hamt stores pointers to elements.  Each element has to be a
+   struct whose initial member is of the type Hamt_entry.  An element
+   is conceptually owned by a hamt as soon as it is inserted.  */
+typedef struct
+{
+#ifdef GL_HAMT_THREAD_SAFE
+  _Atomic
+#endif
+  size_t ref_count;
+} Hamt_entry;
+
+/* The public interface is documented in the file hamt.c.
+   Out-of-memory errors are handled by calling xalloc_die ().  */
+
+/*************************/
+/* Opaque Hamt Structure */
+/*************************/
+
+typedef struct hamt Hamt;
+
+/******************/
+/* Function Types */
+/******************/
+
+typedef size_t (Hamt_hasher) (const Hamt_entry *elt);
+typedef bool (Hamt_comparator) (const Hamt_entry *elt1, const Hamt_entry *elt2);
+typedef void (Hamt_freer) (Hamt_entry *elt);
+typedef bool (Hamt_processor) (const Hamt_entry *elt, void *data);
+
+
+/****************************/
+/* Creation and Destruction */
+/****************************/
+
+extern Hamt *hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
+                          Hamt_freer *freer)
+  _GL_ATTRIBUTE_NODISCARD;
+extern Hamt *hamt_copy (Hamt *hamt) _GL_ATTRIBUTE_NODISCARD;
+extern void hamt_free (Hamt *);
+
+/**********/
+/* Lookup */
+/**********/
+
+extern const Hamt_entry *hamt_lookup (const Hamt *hamt, const Hamt_entry *elt);
+
+/**************************/
+/* Insertion and Deletion */
+/**************************/
+
+extern Hamt *hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+extern Hamt *hamt_delete (Hamt *hamt, Hamt_entry **elt_ptr)
+  _GL_ATTRIBUTE_NODISCARD;
+
+/*************/
+/* Iteration */
+/*************/
+
+extern size_t hamt_do_while (const Hamt *hamt, Hamt_processor *proc,
+                             void *data);
+
+#endif /* _GL_HAMT_H */
diff --git a/modules/hamt b/modules/hamt
new file mode 100644
index 000000000..d73f09c2d
--- /dev/null
+++ b/modules/hamt
@@ -0,0 +1,29 @@
+Description:
+Persistent hash array mapped tries.
+
+Files:
+lib/hamt.h
+lib/hamt.c
+
+Depends-on:
+count-one-bits
+flexmember
+inttypes-incomplete
+stdbool
+stdint
+verify
+xalloc
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += hamt.c
+
+Include:
+"hamt.h"
+
+License:
+GPL
+
+Maintainer:
+Marc Nieper-Wisskirchen
diff --git a/modules/hamt-tests b/modules/hamt-tests
new file mode 100644
index 000000000..f4f0ea4e0
--- /dev/null
+++ b/modules/hamt-tests
@@ -0,0 +1,11 @@
+Files:
+tests/test-hamt.c
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-hamt
+check_PROGRAMS += test-hamt
diff --git a/tests/test-hamt.c b/tests/test-hamt.c
new file mode 100644
index 000000000..daac7b2a7
--- /dev/null
+++ b/tests/test-hamt.c
@@ -0,0 +1,148 @@
+/* Test of persistent hash array mapped trie implementation.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2020.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "hamt.h"
+#include "macros.h"
+#include "xalloc.h"
+
+typedef struct
+{
+  Hamt_entry entry;
+  int val;
+} Element;
+
+static int
+entry_value (const Hamt_entry *elt)
+{
+  return ((Element *) elt)->val;
+}
+
+static size_t
+hash_element (const Hamt_entry *elt)
+{
+  return entry_value (elt) & ~3; /* We drop the last bits so that we
+                                    can test hash collisions. */
+}
+
+static bool
+compare_element (const Hamt_entry *elt1, const Hamt_entry *elt2)
+{
+  return entry_value (elt1) == entry_value (elt2);
+}
+
+static void
+free_element (Hamt_entry *elt)
+{
+  free (elt);
+}
+
+static Hamt_entry *
+make_element (int n)
+{
+  Element *elt = XMALLOC (Element);
+  elt->val = n;
+  return &elt->entry;
+}
+
+static int sum = 0;
+static int flag;
+
+static bool
+proc (const Hamt_entry *elt, void *data)
+{
+  if (data == &flag)
+    {
+      sum += entry_value (elt);
+      return true;
+    }
+  if (sum > 0)
+    {
+      sum = 0;
+      return true;
+    }
+  return false;
+}
+
+int
+main (void)
+{
+  Hamt *hamt = hamt_create (hash_element, compare_element, free_element);
+
+  Hamt_entry *x5 = make_element (5);
+  Hamt_entry *p = x5;
+  Hamt *hamt1 = hamt_insert (hamt, &p);
+  ASSERT (hamt1 != hamt);
+  ASSERT (hamt_lookup (hamt, x5) == NULL);
+  ASSERT (hamt_lookup (hamt1, x5) == x5);
+
+  Hamt_entry *y5 = make_element (5);
+  p = y5;
+  Hamt *hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 == hamt1);
+  ASSERT (p == x5);
+  ASSERT (hamt_lookup (hamt1, y5) == x5);
+
+  Hamt_entry *z37 = make_element (37);
+  p = z37;
+  hamt2 = hamt_insert (hamt1, &p);
+  ASSERT (hamt2 != hamt1);
+  ASSERT (p == z37);
+  ASSERT (hamt_lookup (hamt1, z37) == NULL);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+
+  hamt_free (hamt);
+  hamt_free (hamt1);
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+  ASSERT (hamt_lookup (hamt2, z37) == z37);
+
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 2);
+  ASSERT (sum == 42);
+  ASSERT (hamt_do_while (hamt2, proc, NULL) == 1);
+  ASSERT (sum == 0);
+
+  p = y5;
+  hamt1 = hamt_delete (hamt2, &p);
+  ASSERT (hamt1 != hamt2);
+  ASSERT (p == x5);
+
+  ASSERT (hamt_lookup (hamt1, x5) == NULL);
+  ASSERT (hamt_lookup (hamt2, x5) == x5);
+
+  hamt_free (hamt1);
+  Hamt_entry *x4 = make_element (4);
+  hamt1 = hamt_insert (hamt2, &x4);
+  hamt_free (hamt2);
+  Hamt_entry *x6 = make_element (6);
+  hamt2 = hamt_insert (hamt1, &x6);
+  hamt_free (hamt1);
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum == 52);
+
+  hamt1 = hamt_delete (hamt2, &x4);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt2, proc, &flag) == 4);
+  ASSERT (sum = 52);
+  sum = 0;
+  ASSERT (hamt_do_while (hamt1, proc, &flag) == 3);
+  ASSERT (sum  = 48);
+
+  hamt_free (hamt1);
+  hamt_free (hamt2);
+  free_element (y5);
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2021-04-03 10:27 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-09 21:07 [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
2020-10-10 14:35 ` Bruno Haible
2020-10-10 14:46   ` Marc Nieper-Wißkirchen
2020-10-10 17:34     ` Bruno Haible
2020-10-10 14:54 ` Bruno Haible
2020-10-10 15:01   ` Marc Nieper-Wißkirchen
2020-10-10 15:04     ` Marc Nieper-Wißkirchen
2020-10-10 17:41       ` Bruno Haible
2020-10-10 17:49         ` Marc Nieper-Wißkirchen
2020-10-10 18:19       ` Paul Eggert
2020-10-10 21:24         ` Marc Nieper-Wißkirchen
2020-10-10 21:46           ` Marc Nieper-Wißkirchen
2020-10-11  1:28             ` Bruno Haible
2020-10-11  8:20               ` Marc Nieper-Wißkirchen
2020-10-11  9:43                 ` Marc Nieper-Wißkirchen
2020-10-11 11:02                   ` HAMT iterator Bruno Haible
2020-10-11 11:08                     ` Marc Nieper-Wißkirchen
2020-10-11 12:04                       ` Bruno Haible
2020-10-11 12:25                         ` Marc Nieper-Wißkirchen
2020-10-11 13:52                           ` Bruno Haible
2020-10-11 12:14                       ` Bruno Haible
2020-10-11 12:22                         ` Marc Nieper-Wißkirchen
2020-10-11 10:29                 ` HAMT iterators Bruno Haible
2020-10-11 12:44                   ` Marc Nieper-Wißkirchen
2020-10-11 13:47                     ` Bruno Haible
2020-10-11 10:53                 ` out-of-memory handling Bruno Haible
2020-10-11 11:07                   ` Marc Nieper-Wißkirchen
2020-10-11 11:56                     ` Bruno Haible
2020-10-11 12:20                       ` Marc Nieper-Wißkirchen
2020-10-11 14:01                         ` HAMT for gl_set and gl_map Bruno Haible
2020-10-11 17:32                 ` [New module] Persistent Hash Array Mapped Tries (HAMTs) Marc Nieper-Wißkirchen
2020-10-11 18:22                   ` Draft #3 (with iterators) Marc Nieper-Wißkirchen
2020-10-11 19:09                     ` Bruno Haible
2020-10-12  6:06                       ` Non-opaque hamt type? Marc Nieper-Wißkirchen
2020-10-18 14:39                         ` Bruno Haible
2020-10-18 15:29                           ` Marc Nieper-Wißkirchen
2020-10-18 17:58                             ` Bruno Haible
2020-10-18 18:11                               ` Marc Nieper-Wißkirchen
2021-04-03  9:08                                 ` Marc Nieper-Wißkirchen
2021-04-03 10:26                                   ` Bruno Haible
2020-10-11 14:14             ` terminology Bruno Haible
2020-10-11 14:20               ` terminology Marc Nieper-Wißkirchen
2020-10-10 22:39         ` _Atomic Bruno Haible
2020-10-11 20:15           ` _Atomic Paul Eggert
2020-10-11 21:47             ` _Atomic Bruno Haible

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).