git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jeff King <peff@peff.net>
To: "René Scharfe" <l.s.r@web.de>
Cc: Git List <git@vger.kernel.org>,
	Junio C Hamano <gitster@pobox.com>,
	Brandon Williams <bmwill@google.com>
Subject: Re: [PATCH] strbuf: add strbuf_add_real_path()
Date: Sat, 25 Feb 2017 15:11:23 -0500	[thread overview]
Message-ID: <20170225201123.ozrrfgim2rdctczv@sigill.intra.peff.net> (raw)
In-Reply-To: <4d191b86-d36c-e3ec-99c6-d15baa6b659a@web.de>

On Sat, Feb 25, 2017 at 05:00:33PM +0100, René Scharfe wrote:

> Add a function for appending the canonized absolute pathname of a given
> path to a strbuf.  It keeps the existing contents intact, as expected of
> a function of the strbuf_add() family, while avoiding copying the result
> if the given strbuf is empty.  It's more consistent with the rest of the
> strbuf API than strbuf_realpath(), which it's wrapping.
> 
> Also add a semantic patch demonstrating its intended usage and apply it
> to the current tree.  Using strbuf_add_real_path() instead of calling
> strbuf_addstr() and real_path() avoids an extra copy to a static buffer.

It's also re-entrant, which real_path() is not.

> +void strbuf_add_real_path(struct strbuf *sb, const char *path)
> +{
> +	if (sb->len) {
> +		struct strbuf resolved = STRBUF_INIT;
> +		strbuf_realpath(&resolved, path, 1);
> +		strbuf_addbuf(sb, &resolved);
> +		strbuf_release(&resolved);
> +	} else
> +		strbuf_realpath(sb, path, 1);
> +}

The wrapping here seems a little backwards. If strbuf_add_real_path()
were the inner one, then we would not need this extra allocation. I know
that the reasons are historical, but I don't think it would be
impossible to teach the realpath code to do it.

OTOH, it may not be worth the effort. It's not like strbuf_realpath()
doesn't allocate secondary strbufs for its work already, so dropping one
more is probably not that exciting. And certainly think your patch is an
incremental improvement.

Out of curiosity, I took a stab at the patch, which is below. Only
lightly tested by me, and it does make the logic a bit more complicated
to read, as you have to adjust for the original "base" in several
places.

diff --git a/abspath.c b/abspath.c
index 2f0c26e0e..286072f48 100644
--- a/abspath.c
+++ b/abspath.c
@@ -12,9 +12,9 @@ int is_directory(const char *path)
 }
 
 /* removes the last path component from 'path' except if 'path' is root */
-static void strip_last_component(struct strbuf *path)
+static void strip_last_component(struct strbuf *path, size_t base)
 {
-	size_t offset = offset_1st_component(path->buf);
+	size_t offset = base + offset_1st_component(path->buf + base);
 	size_t len = path->len;
 
 	/* Find start of the last component */
@@ -49,14 +49,15 @@ static void get_next_component(struct strbuf *next, struct strbuf *remaining)
 }
 
 /* copies root part from remaining to resolved, canonicalizing it on the way */
-static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
+static void get_root_part(struct strbuf *resolved, struct strbuf *remaining,
+			  size_t base)
 {
 	int offset = offset_1st_component(remaining->buf);
 
-	strbuf_reset(resolved);
+	strbuf_setlen(resolved, base);
 	strbuf_add(resolved, remaining->buf, offset);
 #ifdef GIT_WINDOWS_NATIVE
-	convert_slashes(resolved->buf);
+	convert_slashes(resolved->buf + base);
 #endif
 	strbuf_remove(remaining, 0, offset);
 }
@@ -78,8 +79,8 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
  * informative error message if there is a problem.  Otherwise, return
  * NULL on errors (without generating any output).
  */
-char *strbuf_realpath(struct strbuf *resolved, const char *path,
-		      int die_on_error)
+char *strbuf_add_real_path(struct strbuf *resolved, const char *path,
+			   int die_on_error)
 {
 	struct strbuf remaining = STRBUF_INIT;
 	struct strbuf next = STRBUF_INIT;
@@ -87,6 +88,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 	char *retval = NULL;
 	int num_symlinks = 0;
 	struct stat st;
+	size_t base = resolved->len;
 
 	if (!*path) {
 		if (die_on_error)
@@ -96,9 +98,9 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 	}
 
 	strbuf_addstr(&remaining, path);
-	get_root_part(resolved, &remaining);
+	get_root_part(resolved, &remaining, base);
 
-	if (!resolved->len) {
+	if (resolved->len == base) {
 		/* relative path; can use CWD as the initial resolved path */
 		if (strbuf_getcwd(resolved)) {
 			if (die_on_error)
@@ -118,7 +120,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 			continue; /* '.' component */
 		} else if (next.len == 2 && !strcmp(next.buf, "..")) {
 			/* '..' component; strip the last path component */
-			strip_last_component(resolved);
+			strip_last_component(resolved, base);
 			continue;
 		}
 
@@ -127,12 +129,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 			strbuf_addch(resolved, '/');
 		strbuf_addbuf(resolved, &next);
 
-		if (lstat(resolved->buf, &st)) {
+		if (lstat(resolved->buf + base, &st)) {
 			/* error out unless this was the last component */
 			if (errno != ENOENT || remaining.len) {
 				if (die_on_error)
 					die_errno("Invalid path '%s'",
-						  resolved->buf);
+						  resolved->buf + base);
 				else
 					goto error_out;
 			}
@@ -150,7 +152,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 					goto error_out;
 			}
 
-			len = strbuf_readlink(&symlink, resolved->buf,
+			len = strbuf_readlink(&symlink, resolved->buf + base,
 					      st.st_size);
 			if (len < 0) {
 				if (die_on_error)
@@ -162,14 +164,14 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 
 			if (is_absolute_path(symlink.buf)) {
 				/* absolute symlink; set resolved to root */
-				get_root_part(resolved, &symlink);
+				get_root_part(resolved, &symlink, base);
 			} else {
 				/*
 				 * relative symlink
 				 * strip off the last component since it will
 				 * be replaced with the contents of the symlink
 				 */
-				strip_last_component(resolved);
+				strip_last_component(resolved, base);
 			}
 
 			/*
@@ -202,6 +204,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
 	return retval;
 }
 
+char *strbuf_realpath(struct strbuf *sb, const char *path, int die_on_error)
+{
+	strbuf_reset(sb);
+	return strbuf_add_real_path(sb, path, die_on_error);
+}
+
 const char *real_path(const char *path)
 {
 	static struct strbuf realpath = STRBUF_INIT;

  reply	other threads:[~2017-02-25 20:13 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-25 16:00 [PATCH] strbuf: add strbuf_add_real_path() René Scharfe
2017-02-25 20:11 ` Jeff King [this message]
2017-02-27 18:22 ` Brandon Williams
2017-02-27 22:45   ` René Scharfe
2017-02-28 20:42     ` Brandon Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170225201123.ozrrfgim2rdctczv@sigill.intra.peff.net \
    --to=peff@peff.net \
    --cc=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=l.s.r@web.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).