bug-gnulib@gnu.org mirror (unofficial)
 help / color / mirror / Atom feed
* [PATCH] tmpdir.c (path_search_alloc): New function.
@ 2020-09-13  9:33 John Darrington
  2020-09-13 11:12 ` Bruno Haible
  0 siblings, 1 reply; 4+ messages in thread
From: John Darrington @ 2020-09-13  9:33 UTC (permalink / raw)
  To: bug-gnulib

* lib/tmpdir.c (path_search_alloc): Define new function similar to
path_search,  but which allocates the buffer for the result instead
of relying on the caller to preallocate it.
*lib/tmpdir.h (path_search_alloc): Declare it.
---
 lib/tmpdir.c | 127 +++++++++++++++++++++++++++++++++++----------------
 lib/tmpdir.h |   6 +++
 2 files changed, 93 insertions(+), 40 deletions(-)

diff --git a/lib/tmpdir.c b/lib/tmpdir.c
index 28ff99f58..70ab7007f 100644
--- a/lib/tmpdir.c
+++ b/lib/tmpdir.c
@@ -83,43 +83,23 @@ direxists (const char *dir)
   return __xstat64 (_STAT_VER, dir, &buf) == 0 && S_ISDIR (buf.st_mode);
 }
 
-/* Path search algorithm, for tmpnam, tmpfile, etc.  If DIR is
-   non-null and exists, uses it; otherwise uses the first of $TMPDIR,
-   P_tmpdir, /tmp that exists.  Copies into TMPL a template suitable
-   for use with mk[s]temp.  Will fail (-1) if DIR is non-null and
-   doesn't exist, none of the searched dirs exists, or there's not
-   enough space in TMPL. */
-int
-path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx,
-             bool try_tmpdir)
+
+static int
+__path_search (char *tmpl, const char **dirx, const char *pfx, bool try_tmpdir)
 {
   const char *d;
-  size_t dlen, plen;
-  bool add_slash;
-
-  if (!pfx || !pfx[0])
-    {
-      pfx = "file";
-      plen = 4;
-    }
-  else
-    {
-      plen = strlen (pfx);
-      if (plen > 5)
-        plen = 5;
-    }
 
   if (try_tmpdir)
     {
       d = __libc_secure_getenv ("TMPDIR");
       if (d != NULL && direxists (d))
-        dir = d;
-      else if (dir != NULL && direxists (dir))
+        *dirx = d;
+      else if (*dirx != NULL && direxists (*dirx))
         /* nothing */ ;
       else
-        dir = NULL;
+        *dirx = NULL;
     }
-  if (dir == NULL)
+  if (*dirx == NULL)
     {
 #if defined _WIN32 && ! defined __CYGWIN__
       char dirbuf[PATH_MAX];
@@ -131,26 +111,63 @@ path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx,
          directory (unless $TMPDIR is set).  */
       retval = GetTempPath (PATH_MAX, dirbuf);
       if (retval > 0 && retval < PATH_MAX && direxists (dirbuf))
-        dir = dirbuf;
+        *dirx = dirbuf;
       else
 #endif
-      if (direxists (P_tmpdir))
-        dir = P_tmpdir;
-      else if (strcmp (P_tmpdir, "/tmp") != 0 && direxists ("/tmp"))
-        dir = "/tmp";
-      else
-        {
-          __set_errno (ENOENT);
-          return -1;
-        }
+        if (direxists (P_tmpdir))
+          *dirx = P_tmpdir;
+        else if (strcmp (P_tmpdir, "/tmp") != 0 && direxists ("/tmp"))
+          *dirx = "/tmp";
+        else
+          {
+            __set_errno (ENOENT);
+            return -1;
+          }
     }
 
-  dlen = strlen (dir);
+  return 0;
+}
+
+static void
+__impute_lengths (const char *pfx, const char *dir, size_t *plen, size_t *dlen, bool *add_slash)
+{
+  if (!pfx || !pfx[0])
+    {
+      pfx = "file";
+      *plen = 4;
+    }
+  else
+    {
+      *plen = strlen (pfx);
+      if (*plen > 5)
+        *plen = 5;
+    }
+
+  *dlen = strlen (dir);
 #ifdef __VMS
-  add_slash = 0;
+  *add_slash = 0;
 #else
-  add_slash = dlen != 0 && !ISSLASH (dir[dlen - 1]);
+  *add_slash = *dlen != 0 && !ISSLASH (dir[*dlen - 1]);
 #endif
+}
+
+/* Path search algorithm, for tmpnam, tmpfile, etc.  If DIR is
+   non-null and exists, uses it; otherwise uses the first of $TMPDIR,
+   P_tmpdir, /tmp that exists.  Copies into TMPL a template suitable
+   for use with mk[s]temp.  Will fail (-1) if DIR is non-null and
+   doesn't exist, none of the searched dirs exists, or there's not
+   enough space in TMPL. */
+int
+path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx,
+             bool try_tmpdir)
+{
+  if (0 != __path_search (tmpl, &dir, pfx, try_tmpdir))
+    return -1;
+
+  size_t plen;
+  size_t dlen;
+  bool add_slash;
+  __impute_lengths (pfx, dir, &plen, &dlen, &add_slash);
 
   /* check we have room for "${dir}/${pfx}XXXXXX\0" */
   if (tmpl_len < dlen + add_slash + plen + 6 + 1)
@@ -163,3 +180,33 @@ path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx,
   sprintf (tmpl + dlen, &"/%.*sXXXXXX"[!add_slash], (int) plen, pfx);
   return 0;
 }
+
+
+/* Like path_search, but this function will allocate TMPL and fill
+   TMPL_LEN with the allocated length.   The caller must free TMPL when
+   no longer required.  */
+int
+path_search_alloc (char **tmpl, size_t *tmpl_len, const char *dir, const char *pfx,
+             bool try_tmpdir)
+{
+  if (0 != __path_search (*tmpl, &dir, pfx, try_tmpdir))
+    return -1;
+
+  size_t plen;
+  size_t dlen;
+  bool add_slash;
+  __impute_lengths (pfx, dir, &plen, &dlen, &add_slash);
+
+  /* check we have room for "${dir}/${pfx}XXXXXX\0" */
+  *tmpl = malloc (dlen + add_slash + plen + 6 + 1);
+  if (!*tmpl)
+    {
+      __set_errno (ENOMEM);
+      return -1;
+    }
+  *tmpl_len = dlen + add_slash + plen + 6 + 1;
+
+  memcpy (*tmpl, dir, dlen);
+  sprintf (*tmpl + dlen, &"/%.*sXXXXXX"[!add_slash], (int) plen, pfx);
+  return 0;
+}
diff --git a/lib/tmpdir.h b/lib/tmpdir.h
index 4d694a3d9..28c62fcc2 100644
--- a/lib/tmpdir.h
+++ b/lib/tmpdir.h
@@ -24,3 +24,9 @@
    doesn't exist, none of the searched dirs exists, or there's not
    enough space in TMPL. */
 extern int path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx, bool try_tmpdir);
+
+/* Like path_search, except that TMPL is allocated automatically.
+   TMPL may not be null.  *TMPL must be freed by the caller, when no longer needed.
+   After calling this function *TMPL_LEN will be set to the lenght of *TMPL.  */
+extern int path_search_alloc (char **tmpl, size_t *tmpl_len, const char *dir, const char *pfx,
+                       bool try_tmpdir);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] tmpdir.c (path_search_alloc): New function.
  2020-09-13  9:33 [PATCH] tmpdir.c (path_search_alloc): New function John Darrington
@ 2020-09-13 11:12 ` Bruno Haible
  2020-09-13 12:11   ` John Darrington
  0 siblings, 1 reply; 4+ messages in thread
From: Bruno Haible @ 2020-09-13 11:12 UTC (permalink / raw)
  To: bug-gnulib

Hi John,

As this is your first major contribution to Gnulib, can you please make sure
you have signed a copyright assignment with the FSF regarding 'GNULIB'?

> * lib/tmpdir.c (path_search_alloc): Define new function similar to
> path_search,  but which allocates the buffer for the result instead
> of relying on the caller to preallocate it.

This is a good idea, because it gets rid of an EINVAL error return.

> +/* Like path_search, except that TMPL is allocated automatically.
> +   TMPL may not be null.  *TMPL must be freed by the caller, when no longer needed.
> +   After calling this function *TMPL_LEN will be set to the lenght of *TMPL.  */
> +extern int path_search_alloc (char **tmpl, size_t *tmpl_len, const char *dir, const char *pfx,
> +                       bool try_tmpdir);

The calling convention is odd: If the caller is only meant to use *TMPL and
later free() it, why does he need *TMPL_LEN? It seems redundant to return it
from this function. And then, if *TMPL is the only output (besides the error
condition), why not make it the return value? That is:

  extern char * path_search_alloc (const char *dir, const char *pfx, bool try_tmpdir);

In case of error, this function would return NULL with errno set.

Indentation: Please make the second arglist line align better.

Typo: s/lenght/length/

> +static int
> +__path_search (char *tmpl, const char **dirx, const char *pfx, bool try_tmpdir)
> ...
> +static void
> +__impute_lengths (const char *pfx, const char *dir, size_t *plen, size_t *dlen, bool *add_slash)

Please don't choose function names that start with underscore. These function
names are in the scope of the vendor libc. And in fact, such function names
would make merging with glibc harder, since glibc already has a function
'__path_search' (defined in sysdeps/posix/tempname.c).

Also, __path_search is a misnomer now: it does not search anything; it
determines the temporary directory in which to place a temporary file.

Bruno



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] tmpdir.c (path_search_alloc): New function.
  2020-09-13 11:12 ` Bruno Haible
@ 2020-09-13 12:11   ` John Darrington
  2020-09-13 14:07     ` Bruno Haible
  0 siblings, 1 reply; 4+ messages in thread
From: John Darrington @ 2020-09-13 12:11 UTC (permalink / raw)
  To: Bruno Haible; +Cc: bug-gnulib

On Sun, Sep 13, 2020 at 01:12:28PM +0200, Bruno Haible wrote:
     Hi John,
     
     As this is your first major contribution to Gnulib, can you please make sure
     you have signed a copyright assignment with the FSF regarding 'GNULIB'?

Yes,  I have.
     
     > * lib/tmpdir.c (path_search_alloc): Define new function similar to
     > path_search,  but which allocates the buffer for the result instead
     > of relying on the caller to preallocate it.
     
     This is a good idea, because it gets rid of an EINVAL error return.

It also means the caller does not have to guess how long the buffer should be.
     
     > +/* Like path_search, except that TMPL is allocated automatically.
     > +   TMPL may not be null.  *TMPL must be freed by the caller, when no longer needed.
     > +   After calling this function *TMPL_LEN will be set to the lenght of *TMPL.  */
     > +extern int path_search_alloc (char **tmpl, size_t *tmpl_len, const char *dir, const char *pfx,
     > +                       bool try_tmpdir);
     
     The calling convention is odd: If the caller is only meant to use *TMPL and
     later free() it, why does he need *TMPL_LEN? It seems redundant to return it
     from this function. And then, if *TMPL is the only output (besides the error
     condition), why not make it the return value? That is:
     
       extern char * path_search_alloc (const char *dir, const char *pfx, bool try_tmpdir);
     
     In case of error, this function would return NULL with errno set.

That would also work.  But I don't think the suggested interface is particularly odd.
It is very similar to the getline function from libc.

Regarding TMPL_LEN,  you are right, it *may* not be needed.   However I often find that
when writing code which munges strings, one needs to know the length of a string which
has already been calculated.   Of course one could simply use strlen to find it, but
strlen is O(n)  So I think, that if a function has to calculate a length of a string
anyway,  then it is worthwhile making that number available to the caller.
The caller is free to use it or ignore it as she wishes.   Often she'll be gratefull
for it.
     
     Typo: s/lenght/length/

Thanks for noticing.
     
     Please don't choose function names that start with underscore. These function
     names are in the scope of the vendor libc. And in fact, such function names
     would make merging with glibc harder, since glibc already has a function
     '__path_search' (defined in sysdeps/posix/tempname.c).

I wondered why that was there.

     Also, __path_search is a misnomer now: it does not search anything; it
     determines the temporary directory in which to place a temporary file.
     
So what name would you suggest?  "get_temp_directory" ?

J'


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] tmpdir.c (path_search_alloc): New function.
  2020-09-13 12:11   ` John Darrington
@ 2020-09-13 14:07     ` Bruno Haible
  0 siblings, 0 replies; 4+ messages in thread
From: Bruno Haible @ 2020-09-13 14:07 UTC (permalink / raw)
  To: John Darrington; +Cc: bug-gnulib

Hi John,

>      > +/* Like path_search, except that TMPL is allocated automatically.
>      > +   TMPL may not be null.  *TMPL must be freed by the caller, when no longer needed.
>      > +   After calling this function *TMPL_LEN will be set to the lenght of *TMPL.  */
>      > +extern int path_search_alloc (char **tmpl, size_t *tmpl_len, const char *dir, const char *pfx,
>      > +                       bool try_tmpdir);
>      
>      The calling convention is odd: If the caller is only meant to use *TMPL and
>      later free() it, why does he need *TMPL_LEN? It seems redundant to return it
>      from this function. And then, if *TMPL is the only output (besides the error
>      condition), why not make it the return value? That is:
>      
>        extern char * path_search_alloc (const char *dir, const char *pfx, bool try_tmpdir);
>      
>      In case of error, this function would return NULL with errno set.
> 
> That would also work.  But I don't think the suggested interface is particularly odd.
> It is very similar to the getline function from libc.

The 'getline' function is not a good model to imitate. Why? Because generally there
should be two supported ways to call such a function
  (a) with a NULL argument - to let the function allocate as much memory as it needs,
  (b) with a stack-allocated buffer as argument - to let the function use that buffer
      if its size is sufficient.
The 'getline' function, as documented [1], does not support the second case.
Its calling convention is thus apparently tailored to the use-case of calling it in
a loop, avoiding memory allocations if a line is shorter than the previous line.
But this is a *very* special use-case, and most functions that people write — including
path_search_alloc — are not like this.

Actually your function supports (a) and (b). But the documentation is lacking and
incorrect:
  - "TMPL may not be null." OK but what can the caller put in TMPL?
  - "*TMPL must be freed by the caller, when no longer needed." Not true.
    In case (b), *TMPL is unchanged, and thus must not be free()d.

How about using this wording, from the GNU libunistring documentation [2]:

  [Functions returning a string result] take a (resultbuf, lengthp) argument
  pair. If resultbuf is not NULL and the result fits into *lengthp units, it
  is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
  allocated string is returned. In both cases, *lengthp is set to the length
  of the returned string. In case of error, NULL is returned and errno is set.

And the prototype that goes with it:

extern char * path_search_alloc (char *resultbuf, size_t *lengthp,
                                 const char *dir, const char *pfx, bool try_tmpdir);

This is simpler than
   extern int path_search_alloc (char **tmpl, size_t *tmpl_len,
                                 const char *dir, const char *pfx, bool try_tmpdir);
in two aspects:
  - It has one less indirection.
  - In the use-cases (a) and (b) the caller has one less local variable.

> I often find that
> when writing code which munges strings, one needs to know the length of a string which
> has already been calculated.   Of course one could simply use strlen to find it, but
> strlen is O(n)

OK, but then make sure that the caller understands what the returned value is.
If you call it 'tmpl_len' everyone would assume that tmpl_len == strlen (tmpl).
But in fast you return strlen (tmpl) + 1. Therefore it should be called
'tmpl_size', not 'tmpl_len'.

>      Also, __path_search is a misnomer now: it does not search anything; it
>      determines the temporary directory in which to place a temporary file.
>      
> So what name would you suggest?  "get_temp_directory" ?

How about renaming
  path_search -> gen_tempfile_template_prealloc
  path_search_alloc -> gen_tempfile_template_alloc
? The first one takes preallocated storage, whereas your function allocates
storage.

Bruno

[1] https://linux.die.net/man/3/getline
[2] https://www.gnu.org/software/libunistring/manual/html_node/Conventions.html



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-09-13 14:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-13  9:33 [PATCH] tmpdir.c (path_search_alloc): New function John Darrington
2020-09-13 11:12 ` Bruno Haible
2020-09-13 12:11   ` John Darrington
2020-09-13 14:07     ` Bruno Haible

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).