unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org>
To: libc-alpha@sourceware.org
Subject: Re: [PATCH 25/28] elf: Implement tail merging of strings in ldconfig
Date: Thu, 22 Oct 2020 18:08:45 -0300	[thread overview]
Message-ID: <d1c92c14-0d88-bcc5-8b9d-60c13ea2bb9c@linaro.org> (raw)
In-Reply-To: <c27e67d78494ae19a8a3bd0f3add4567a9900aae.1601569371.git.fweimer@redhat.com>



On 01/10/2020 13:34, Florian Weimer via Libc-alpha wrote:
> This simplifies the string table construction in elf/cache.c
> because there is no more need to keep track of offsets explicitly;
> the string table implementation does this internally.
> 
> This change slightly reduces the size of the cache on disk.  The
> file format does not change as a result.  The strings are
> null-terminated, without explicit length, so tail merging is
> transparent to readers.

LGTM, thanks.

> ---
>  elf/Makefile |  3 +-
>  elf/cache.c  | 84 ++++++++++++++++++++++++++++------------------------
>  2 files changed, 48 insertions(+), 39 deletions(-)
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index ad50a3e16e..5ad8df7da3 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -118,7 +118,8 @@ others-static	+= ldconfig
>  others		+= ldconfig
>  install-rootsbin += ldconfig
>  
> -ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs
> +ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs \
> +  stringtable
>  extra-objs	+= $(ldconfig-modules:=.o)
>  others-extras   = $(ldconfig-modules)
>  endif

Ok.

> diff --git a/elf/cache.c b/elf/cache.c
> index 3a02a4070a..eda3da98a7 100644
> --- a/elf/cache.c
> +++ b/elf/cache.c
> @@ -35,11 +35,15 @@
>  #include <ldconfig.h>
>  #include <dl-cache.h>
>  #include <version.h>
> +#include <stringtable.h>
> +
> +/* Used to store library names, paths, and other strings.  */
> +struct stringtable strings;

Maybe static here?

>  
>  struct cache_entry
>  {
> -  char *lib;			/* Library name.  */
> -  char *path;			/* Path to find library.  */
> +  struct stringtable_entry *lib; /* Library name.  */
> +  struct stringtable_entry *path; /* Path to find library.  */
>    int flags;			/* Flags to indicate kind of library.  */
>    unsigned int osversion;	/* Required OS version.  */
>    uint64_t hwcap;		/* Important hardware capabilities.  */

Ok.

> @@ -300,7 +304,7 @@ static int
>  compare (const struct cache_entry *e1, const struct cache_entry *e2)
>  {
>    /* We need to swap entries here to get the correct sort order.  */
> -  int res = _dl_cache_libcmp (e2->lib, e1->lib);
> +  int res = _dl_cache_libcmp (e2->lib->string, e1->lib->string);
>    if (res == 0)
>      {
>        if (e1->flags < e2->flags)

Ok.

> @@ -369,26 +373,24 @@ save_cache (const char *cache_name)
>  {
>    /* The cache entries are sorted already, save them in this order. */
>  
> -  /* Count the length of all strings.  */
> -  /* The old format doesn't contain hwcap entries and doesn't contain
> -     libraries in subdirectories with hwcaps entries.  Count therefore
> -     also all entries with hwcap == 0.  */
> -  size_t total_strlen = 0;
>    struct cache_entry *entry;
>    /* Number of cache entries.  */
>    int cache_entry_count = 0;
> -  /* Number of normal cache entries.  */
> +  /* The old format doesn't contain hwcap entries and doesn't contain
> +     libraries in subdirectories with hwcaps entries.  Count therefore
> +     also all entries with hwcap == 0.  */
>    int cache_entry_old_count = 0;
>  
>    for (entry = entries; entry != NULL; entry = entry->next)
>      {
> -      /* Account the final NULs.  */
> -      total_strlen += strlen (entry->lib) + strlen (entry->path) + 2;
>        ++cache_entry_count;
>        if (entry->hwcap == 0)
>  	++cache_entry_old_count;
>      }
>  
> +  struct stringtable_finalized strings_finalized;
> +  stringtable_finalize (&strings, &strings_finalized);
> +
>    /* Create the on disk cache structure.  */
>    struct cache_file *file_entries = NULL;
>    size_t file_entries_size = 0;

Ok.

> @@ -432,7 +434,7 @@ save_cache (const char *cache_name)
>  	      sizeof CACHE_VERSION - 1);
>  
>        file_entries_new->nlibs = cache_entry_count;
> -      file_entries_new->len_strings = total_strlen;
> +      file_entries_new->len_strings = strings_finalized.size;
>        file_entries_new->flags = cache_file_new_flags_endian;
>      }
>  

Ok.

> @@ -449,20 +451,20 @@ save_cache (const char *cache_name)
>      str_offset = 0;
>  
>    /* An array for all strings.  */
> -  char *strings = xmalloc (total_strlen);
> -  char *str = strings;
>    int idx_old;
>    int idx_new;
>  
>    for (idx_old = 0, idx_new = 0, entry = entries; entry != NULL;
>         entry = entry->next, ++idx_new)
>      {
> -      /* First the library.  */
>        if (opt_format != 2 && entry->hwcap == 0)
>  	{
>  	  file_entries->libs[idx_old].flags = entry->flags;
>  	  /* XXX: Actually we can optimize here and remove duplicates.  */
>  	  file_entries->libs[idx_old].key = str_offset + pad;
> +	  file_entries->libs[idx_new].key = str_offset + entry->lib->offset;
> +	  file_entries->libs[idx_new].value
> +	    = str_offset + entry->path->offset;
>  	}
>        if (opt_format != 0)
>  	{

Ok.

> @@ -473,20 +475,12 @@ save_cache (const char *cache_name)
>  	  file_entries_new->libs[idx_new].flags = entry->flags;
>  	  file_entries_new->libs[idx_new].osversion = entry->osversion;
>  	  file_entries_new->libs[idx_new].hwcap = entry->hwcap;
> -	  file_entries_new->libs[idx_new].key = str_offset;
> +	  file_entries_new->libs[idx_new].key
> +	    = str_offset + entry->lib->offset;
> +	  file_entries_new->libs[idx_new].value
> +	    = str_offset + entry->path->offset;
>  	}
>  
> -      size_t len = strlen (entry->lib) + 1;
> -      str = mempcpy (str, entry->lib, len);
> -      str_offset += len;
> -      /* Then the path.  */
> -      if (opt_format != 2 && entry->hwcap == 0)
> -	file_entries->libs[idx_old].value = str_offset + pad;
> -      if (opt_format != 0)
> -	file_entries_new->libs[idx_new].value = str_offset;
> -      len = strlen (entry->path) + 1;
> -      str = mempcpy (str, entry->path, len);
> -      str_offset += len;
>        /* Ignore entries with hwcap for old format.  */
>        if (entry->hwcap == 0)
>  	++idx_old;

Ok.

> @@ -511,7 +505,7 @@ save_cache (const char *cache_name)
>  	extension_offset += pad;
>        extension_offset += file_entries_new_size;
>      }
> -  extension_offset += total_strlen;
> +  extension_offset += strings_finalized.size;
>    extension_offset = roundup (extension_offset, 4); /* Provide alignment.  */
>    if (opt_format != 0)
>      file_entries_new->extension_offset = extension_offset;

Ok.

> @@ -551,7 +545,8 @@ save_cache (const char *cache_name)
>  	error (EXIT_FAILURE, errno, _("Writing of cache data failed"));
>      }
>  
> -  if (write (fd, strings, total_strlen) != (ssize_t) total_strlen)
> +  if (write (fd, strings_finalized.strings, strings_finalized.size)
> +      != (ssize_t) strings_finalized.size)
>      error (EXIT_FAILURE, errno, _("Writing of cache data failed"));
>  
>    if (opt_format != 0)

Ok.

> @@ -580,7 +575,7 @@ save_cache (const char *cache_name)
>    /* Free all allocated memory.  */
>    free (file_entries_new);
>    free (file_entries);
> -  free (strings);
> +  free (strings_finalized.strings);
>  
>    while (entries)
>      {

Ok.

> @@ -596,14 +591,27 @@ void
>  add_to_cache (const char *path, const char *lib, int flags,
>  	      unsigned int osversion, uint64_t hwcap)
>  {
> -  size_t liblen = strlen (lib) + 1;
> -  size_t len = liblen + strlen (path) + 1;
> -  struct cache_entry *new_entry
> -    = xmalloc (sizeof (struct cache_entry) + liblen + len);
> -
> -  new_entry->lib = memcpy ((char *) (new_entry + 1), lib, liblen);
> -  new_entry->path = new_entry->lib + liblen;
> -  snprintf (new_entry->path, len, "%s/%s", path, lib);
> +  struct cache_entry *new_entry = xmalloc (sizeof (*new_entry));
> +
> +  struct stringtable_entry *path_interned;
> +  {
> +    /* Use a small, on-stack buffer in most cases.  */
> +    char buf[200];
> +    int ret = snprintf (buf, sizeof (buf), "%s/%s", path, lib);
> +    if (ret < 0 || ret >= sizeof (buf) - 1)
> +      {
> +	char *p;
> +	if (asprintf (&p, "%s/%s", path, lib) < 0)
> +	  error (EXIT_FAILURE, errno, _("Could not create library path"));
> +	path_interned = stringtable_intern (&strings, p);
> +	free (p);
> +      }
> +    else
> +      path_interned = stringtable_intern (&strings, buf);
> +  }
> +
> +  new_entry->lib = stringtable_intern (&strings, lib);
> +  new_entry->path = path_interned;
>    new_entry->flags = flags;
>    new_entry->osversion = osversion;
>    new_entry->hwcap = hwcap;
> 

Ok. Is this small string optimization really worth instead of just using
asprintf?

  reply	other threads:[~2020-10-22 21:08 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-01 16:31 [PATCH 00/28] glibc-hwcaps support Florian Weimer via Libc-alpha
2020-10-01 16:31 ` [PATCH 01/28] elf: Do not search HWCAP subdirectories in statically linked binaries Florian Weimer via Libc-alpha
2020-10-01 18:22   ` Adhemerval Zanella via Libc-alpha
2020-10-01 18:24     ` Carlos O'Donell via Libc-alpha
2020-10-01 18:29       ` Adhemerval Zanella via Libc-alpha
2020-10-01 20:24         ` Carlos O'Donell via Libc-alpha
2020-10-01 16:31 ` [PATCH 02/28] elf: Implement __rtld_malloc_is_full Florian Weimer via Libc-alpha
2020-10-01 18:23   ` Adhemerval Zanella via Libc-alpha
2020-10-08  9:44     ` Florian Weimer via Libc-alpha
2020-10-01 16:31 ` [PATCH 03/28] elf: Implement _dl_write Florian Weimer via Libc-alpha
2020-10-05 19:46   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:31 ` [PATCH 04/28] elf: Extract command-line/environment variables state from rtld.c Florian Weimer via Libc-alpha
2020-10-06 20:45   ` Adhemerval Zanella via Libc-alpha
2020-10-08 11:32     ` Florian Weimer via Libc-alpha
2020-10-01 16:32 ` [PATCH 05/28] elf: Move ld.so error/help output to _dl_usage Florian Weimer via Libc-alpha
2020-10-06 21:06   ` Adhemerval Zanella via Libc-alpha
2020-10-08 12:19     ` Florian Weimer via Libc-alpha
2020-10-01 16:32 ` [PATCH 06/28] elf: Record whether paths come from LD_LIBRARY_PATH or --library-path Florian Weimer via Libc-alpha
2020-10-07 16:39   ` Adhemerval Zanella via Libc-alpha
2020-10-07 16:49     ` Florian Weimer
2020-10-01 16:32 ` [PATCH 07/28] elf: Implement ld.so --help Florian Weimer via Libc-alpha
2020-10-07 17:16   ` Adhemerval Zanella via Libc-alpha
2020-10-08 13:13     ` Florian Weimer via Libc-alpha
2020-10-01 16:32 ` [PATCH 08/28] elf: Implement ld.so --version Florian Weimer via Libc-alpha
2020-10-07 18:36   ` Adhemerval Zanella via Libc-alpha
2020-10-07 18:38     ` Adhemerval Zanella via Libc-alpha
2020-10-08 13:37     ` Florian Weimer via Libc-alpha
2020-10-01 16:32 ` [PATCH 09/28] scripts/update-copyrights: Update csu/version.c, elf/dl-usage.c Florian Weimer via Libc-alpha
2020-10-07 18:41   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:32 ` [PATCH 10/28] elf: Use the term "program interpreter" in the ld.so help message Florian Weimer via Libc-alpha
2020-10-07 21:08   ` Adhemerval Zanella via Libc-alpha
2020-10-08 14:08     ` Florian Weimer via Libc-alpha
2020-10-01 16:32 ` [PATCH 11/28] elf: Print the full name of the dynamic loader " Florian Weimer via Libc-alpha
2020-10-08 12:38   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:32 ` [PATCH 12/28] elf: Make __rtld_env_path_list and __rtld_search_dirs global variables Florian Weimer via Libc-alpha
2020-10-08 13:27   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:32 ` [PATCH 13/28] elf: Add library search path information to ld.so --help Florian Weimer via Libc-alpha
2020-10-08 16:22   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:33 ` [PATCH 14/28] elf: Enhance ld.so --help to print HWCAP subdirectories Florian Weimer via Libc-alpha
2020-10-08 16:27   ` Adhemerval Zanella via Libc-alpha
2020-10-09  8:18     ` Florian Weimer via Libc-alpha
2020-10-09 13:49   ` Matheus Castanho via Libc-alpha
2020-10-09 17:08     ` Florian Weimer via Libc-alpha
2020-10-09 17:12       ` Florian Weimer via Libc-alpha
2020-10-09 18:54         ` Matheus Castanho via Libc-alpha
2020-10-12  9:47           ` Florian Weimer via Libc-alpha
2020-10-01 16:33 ` [PATCH 15/28] elf: Do not pass GLRO(dl_platform), GLRO(dl_platformlen) to _dl_important_hwcaps Florian Weimer via Libc-alpha
2020-10-08 18:04   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:33 ` [PATCH 16/28] elf: Add glibc-hwcaps support for LD_LIBRARY_PATH Florian Weimer via Libc-alpha
2020-10-08 10:13   ` Szabolcs Nagy via Libc-alpha
2020-10-09  9:08     ` Florian Weimer via Libc-alpha
2020-10-09 10:50       ` Szabolcs Nagy via Libc-alpha
2020-10-09 10:55         ` Florian Weimer via Libc-alpha
2020-10-09 11:03           ` Szabolcs Nagy via Libc-alpha
2020-10-08 23:16   ` Paul A. Clarke via Libc-alpha
2020-10-09  8:56     ` Florian Weimer via Libc-alpha
2020-10-09 13:19   ` Adhemerval Zanella via Libc-alpha
2020-10-12 11:54     ` Florian Weimer via Libc-alpha
2020-10-01 16:33 ` [PATCH 17/28] x86_64: Add glibc-hwcaps support Florian Weimer via Libc-alpha
2020-10-01 16:33 ` [PATCH 18/28] powerpc64le: " Florian Weimer via Libc-alpha
2020-10-01 18:56   ` Paul A. Clarke via Libc-alpha
2020-10-05  9:47     ` Florian Weimer via Libc-alpha
2020-10-05 19:15       ` Paul A. Clarke via Libc-alpha
2020-10-06 12:20         ` Florian Weimer via Libc-alpha
2020-10-06 17:45           ` Paul A. Clarke via Libc-alpha
2020-10-09  9:06             ` Florian Weimer via Libc-alpha
2020-10-01 16:33 ` [PATCH 19/28] s390x: Add " Florian Weimer via Libc-alpha
2020-10-01 16:33 ` [PATCH 20/28] aarch64: " Florian Weimer via Libc-alpha
2020-10-14 13:46   ` Adhemerval Zanella via Libc-alpha
2020-10-14 14:08     ` Florian Weimer via Libc-alpha
2020-10-14 14:15       ` Adhemerval Zanella via Libc-alpha
2020-10-14 14:37         ` Szabolcs Nagy via Libc-alpha
2020-10-14 14:43           ` Adhemerval Zanella via Libc-alpha
2020-10-14 15:13             ` Florian Weimer via Libc-alpha
2020-10-14 14:44           ` Florian Weimer via Libc-alpha
2020-10-14 15:09             ` Szabolcs Nagy via Libc-alpha
2020-10-01 16:33 ` [PATCH 21/28] elf: Add endianness markup to ld.so.cache Florian Weimer via Libc-alpha
2020-10-14 14:07   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:33 ` [PATCH 22/28] elf: Add extension mechanism " Florian Weimer via Libc-alpha
2020-10-15 17:52   ` Adhemerval Zanella via Libc-alpha
2020-10-30 12:22     ` Florian Weimer via Libc-alpha
2020-11-03 12:45       ` Adhemerval Zanella via Libc-alpha
2020-11-03 15:30         ` Florian Weimer via Libc-alpha
2020-10-01 16:34 ` [PATCH 23/28] elf: Unify old and new format cache handling code in ld.so Florian Weimer via Libc-alpha
2020-10-16 14:37   ` Adhemerval Zanella via Libc-alpha
2020-10-30 13:22     ` Florian Weimer via Libc-alpha
2020-11-03 13:02       ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:34 ` [PATCH 24/28] elf: Implement a string table for ldconfig, with tail merging Florian Weimer via Libc-alpha
2020-10-20 14:25   ` Adhemerval Zanella via Libc-alpha
2020-10-30 17:08     ` Florian Weimer via Libc-alpha
2020-11-03 13:05       ` Adhemerval Zanella via Libc-alpha
2020-11-03 15:29         ` Florian Weimer via Libc-alpha
2020-10-01 16:34 ` [PATCH 25/28] elf: Implement tail merging of strings in ldconfig Florian Weimer via Libc-alpha
2020-10-22 21:08   ` Adhemerval Zanella via Libc-alpha [this message]
2020-10-30 17:36     ` Florian Weimer via Libc-alpha
2020-10-01 16:34 ` [PATCH 26/28] elf: In ldconfig, extract the new_sub_entry function from search_dir Florian Weimer via Libc-alpha
2020-10-27 13:15   ` Adhemerval Zanella via Libc-alpha
2020-10-01 16:34 ` [PATCH 27/28] elf: Process glibc-hwcaps subdirectories in ldconfig Florian Weimer via Libc-alpha
2020-10-27 17:28   ` Adhemerval Zanella via Libc-alpha
2020-11-04 11:57     ` Florian Weimer via Libc-alpha
2020-10-01 16:34 ` [PATCH 28/28] elf: Add glibc-hwcaps subdirectory support to ld.so cache processing Florian Weimer via Libc-alpha
2020-10-01 16:50 ` [PATCH 00/28] glibc-hwcaps support H.J. Lu via Libc-alpha
2020-10-01 16:54   ` Florian Weimer via Libc-alpha

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/libc/involved.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d1c92c14-0d88-bcc5-8b9d-60c13ea2bb9c@linaro.org \
    --to=libc-alpha@sourceware.org \
    --cc=adhemerval.zanella@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).