From: "H.J. Lu" <hjl.tools@gmail.com>
To: alexandre.ferrieux@orange.com
Cc: libc-alpha@sourceware.org, carlos@redhat.com
Subject: Re: [PATCH] Fix #27777 - now use a doubly-linked list for _IO_list_all
Date: Fri, 26 Apr 2024 07:45:10 -0700 [thread overview]
Message-ID: <CAMe9rOqo_HaiziwLiHYpi2kFiP+r6OWxH92+484vKkG1i+LNdQ@mail.gmail.com> (raw)
In-Reply-To: <20240426141847.2458829-1-alexandre.ferrieux@orange.com>
On Fri, Apr 26, 2024 at 7:19 AM <alexandre.ferrieux@orange.com> wrote:
>
> From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
>
> Hi,
>
> This patch fixes #27777 "fclose does a linear search, takes ages when many FILE* are opened".
> Simply put, the master list of opened (FILE*), namely _IO_list_all, is a singly-linked list.
> As a consequence, the removal of a single element is in O(N), which cripples the performance
> of fopen(). The patch switches to a doubly-linked list, yielding O(1) removal.
>
> As an illustration, after opening 1 million (FILE*), the fclose() of 100 of them takes more
> than 6 seconds without the patch, and under a millisecond with it.
>
> Signed-off-by: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
>
>
> ---
> elf/libc_early_init.c | 3 +++
> libio/bits/types/struct_FILE.h | 2 +-
> libio/genops.c | 19 +++++++++----------
> libio/libioP.h | 11 +++++++----
> libio/stdfiles.c | 8 ++++++++
> 5 files changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
> index 575b837f8f..756274f652 100644
> --- a/elf/libc_early_init.c
> +++ b/elf/libc_early_init.c
> @@ -23,6 +23,7 @@
> #include <lowlevellock.h>
> #include <pthread_early_init.h>
> #include <sys/single_threaded.h>
> +#include <libioP.h>
>
> #ifdef SHARED
> _Bool __libc_initial;
> @@ -41,6 +42,8 @@ __libc_early_init (_Bool initial)
> __libc_single_threaded_internal = __libc_initial = initial;
> #endif
>
> + __stdfiles_init ();
> +
> __pthread_early_init ();
>
> #if ENABLE_ELISION_SUPPORT
> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> index 7cdaae86f8..daebd6ce0a 100644
> --- a/libio/bits/types/struct_FILE.h
> +++ b/libio/bits/types/struct_FILE.h
> @@ -67,7 +67,7 @@ struct _IO_FILE
>
> struct _IO_marker *_markers;
>
> - struct _IO_FILE *_chain;
> + struct _IO_FILE *_chain,**_prevchain;
We need to add a new symbol version for stdio functions
when FILE size is changed.
> int _fileno;
> int _flags2;
> diff --git a/libio/genops.c b/libio/genops.c
> index bc45e60a09..a27f65581b 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -53,7 +53,6 @@ _IO_un_link (struct _IO_FILE_plus *fp)
> {
> if (fp->file._flags & _IO_LINKED)
> {
> - FILE **f;
> #ifdef _IO_MTSAFE_IO
> _IO_cleanup_region_start_noarg (flush_cleanup);
> _IO_lock_lock (list_all_lock);
> @@ -62,15 +61,13 @@ _IO_un_link (struct _IO_FILE_plus *fp)
> #endif
> if (_IO_list_all == NULL)
> ;
> - else if (fp == _IO_list_all)
> - _IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
> - else
> - for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
> - if (*f == (FILE *) fp)
> - {
> - *f = fp->file._chain;
> - break;
> - }
> + else {
> + struct _IO_FILE_plus **pr,*nx;
> + pr=(struct _IO_FILE_plus**)fp->file._prevchain;
> + nx=(struct _IO_FILE_plus*)fp->file._chain;
> + *pr=nx;
> + if (nx) nx->file._prevchain=(FILE **)pr;
> + }
> fp->file._flags &= ~_IO_LINKED;
> #ifdef _IO_MTSAFE_IO
> _IO_funlockfile ((FILE *) fp);
> @@ -95,6 +92,8 @@ _IO_link_in (struct _IO_FILE_plus *fp)
> _IO_flockfile ((FILE *) fp);
> #endif
> fp->file._chain = (FILE *) _IO_list_all;
> + fp->file._prevchain = (FILE **) &_IO_list_all;
> + _IO_list_all->file._prevchain=&fp->file._chain;
> _IO_list_all = fp;
> #ifdef _IO_MTSAFE_IO
> _IO_funlockfile ((FILE *) fp);
> diff --git a/libio/libioP.h b/libio/libioP.h
> index 1af287b19f..5862f815bc 100644
> --- a/libio/libioP.h
> +++ b/libio/libioP.h
> @@ -429,6 +429,9 @@ libc_hidden_proto (_IO_list_resetlock)
> extern void _IO_enable_locks (void) __THROW;
> libc_hidden_proto (_IO_enable_locks)
>
> +/* Needed for proper initialization of the doubly-linked list */
> +extern void __stdfiles_init(void);
> +
> /* Default jumptable functions. */
>
> extern int _IO_default_underflow (FILE *) __THROW;
> @@ -904,12 +907,12 @@ extern int _IO_vscanf (const char *, va_list) __THROW;
> # ifdef _IO_USE_OLD_IO_FILE
> # define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
> { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD, \
> 0, _IO_pos_BAD, 0, 0, { 0 }, &_IO_stdfile_##FD##_lock }
> # else
> # define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
> { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD, \
> 0, _IO_pos_BAD, 0, 0, { 0 }, &_IO_stdfile_##FD##_lock, _IO_pos_BAD,\
> NULL, WDP, 0 }
> # endif
> @@ -917,12 +920,12 @@ extern int _IO_vscanf (const char *, va_list) __THROW;
> # ifdef _IO_USE_OLD_IO_FILE
> # define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
> { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD, \
> 0, _IO_pos_BAD }
> # else
> # define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
> { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD, \
> 0, _IO_pos_BAD, 0, 0, { 0 }, 0, _IO_pos_BAD, \
> NULL, WDP, 0 }
> # endif
> diff --git a/libio/stdfiles.c b/libio/stdfiles.c
> index cd8eca8bf3..87c9b249df 100644
> --- a/libio/stdfiles.c
> +++ b/libio/stdfiles.c
> @@ -54,4 +54,12 @@ DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
> DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
>
> struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
> +
> +void __stdfiles_init(void) // finish the double-linking for stdfiles as static initialization cannot
> +{
> + struct _IO_FILE **f;
> +
> + for(f=(struct _IO_FILE **)&_IO_list_all;(*f);f=&(**f)._chain) (**f)._prevchain=f;
> +}
> +
> libc_hidden_data_def (_IO_list_all)
> --
> 2.30.2
>
> ____________________________________________________________________________________________________________
> Ce message et ses pieces jointes peuvent contenir des informations confidentielles ou privilegiees et ne doivent donc
> pas etre diffuses, exploites ou copies sans autorisation. Si vous avez recu ce message par erreur, veuillez le signaler
> a l'expediteur et le detruire ainsi que les pieces jointes. Les messages electroniques etant susceptibles d'alteration,
> Orange decline toute responsabilite si ce message a ete altere, deforme ou falsifie. Merci.
>
> This message and its attachments may contain confidential or privileged information that may be protected by law;
> they should not be distributed, used or copied without authorisation.
> If you have received this email in error, please notify the sender and delete this message and its attachments.
> As emails may be altered, Orange is not liable for messages that have been modified, changed or falsified.
> Thank you.
>
--
H.J.
next prev parent reply other threads:[~2024-04-26 14:46 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-26 14:18 [PATCH] Fix #27777 - now use a doubly-linked list for _IO_list_all alexandre.ferrieux
2024-04-26 14:45 ` H.J. Lu [this message]
[not found] ` <ffa6e29b-3a7b-4be6-a0d2-327510a7094d@orange.com>
2024-04-26 15:05 ` H.J. Lu
2024-04-26 15:12 ` H.J. Lu
[not found] ` <84cbc4a9-2ddf-45f3-94be-132441db5c8a@orange.com>
2024-04-26 15:16 ` H.J. Lu
[not found] ` <7fa02e06-42b1-463b-a7c4-66600d524186@orange.com>
2024-04-26 16:08 ` H.J. Lu
[not found] ` <5fad7b2e-43a4-4e57-bd10-a9ce1ce38006@orange.com>
2024-04-26 16:24 ` H.J. Lu
2024-04-26 17:51 ` Florian Weimer
2024-04-26 18:20 ` alexandre.ferrieux
2024-04-26 18:44 ` alexandre.ferrieux
2024-04-26 19:08 ` Florian Weimer
2024-04-26 19:08 ` Florian Weimer
2024-04-26 18:50 ` H.J. Lu
2024-04-26 19:04 ` alexandre.ferrieux
2024-04-26 19:16 ` Florian Weimer
2024-04-26 20:15 ` alexandre.ferrieux
2024-04-29 13:20 ` Florian Weimer
2024-04-29 19:05 ` alexandre.ferrieux
2024-04-30 2:47 ` H.J. Lu
2024-04-30 17:22 ` H.J. Lu
2024-04-26 19:09 ` Florian Weimer
-- strict thread matches above, loose matches on Subject: below --
2024-04-30 17:20 H.J. Lu
2024-04-30 18:00 ` alexandre.ferrieux
2024-04-30 18:11 ` H.J. Lu
2024-04-30 19:37 ` H.J. Lu
2024-04-30 19:52 ` alexandre.ferrieux
2024-04-30 20:02 ` H.J. Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/libc/involved.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAMe9rOqo_HaiziwLiHYpi2kFiP+r6OWxH92+484vKkG1i+LNdQ@mail.gmail.com \
--to=hjl.tools@gmail.com \
--cc=alexandre.ferrieux@orange.com \
--cc=carlos@redhat.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).