git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [PATCH v2 1/2] sha1_file: remove static strbuf from sha1_file_name()
@ 2018-01-17 17:54 Christian Couder
  2018-01-17 17:54 ` [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs Christian Couder
  0 siblings, 1 reply; 6+ messages in thread
From: Christian Couder @ 2018-01-17 17:54 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff Hostetler, Jonathan Tan, Derrick Stolee,
	Kevin Daudt, Christian Couder

Using a static buffer in sha1_file_name() is error prone
and the performance improvements it gives are not needed
in many of the callers.

So let's get rid of this static buffer and, if necessary
or helpful, let's use one in the caller.

Suggested-by: Jeff Hostetler <git@jeffhostetler.com>
Helped-by: Kevin Daudt <me@ikke.info>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 cache.h       |  8 +++-----
 http-walker.c |  6 ++++--
 http.c        | 16 ++++++++++------
 sha1_file.c   | 38 +++++++++++++++++++++++++-------------
 4 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/cache.h b/cache.h
index d8b975a571..6db565408e 100644
--- a/cache.h
+++ b/cache.h
@@ -957,12 +957,10 @@ extern void check_repository_format(void);
 #define TYPE_CHANGED    0x0040
 
 /*
- * Return the name of the file in the local object database that would
- * be used to store a loose object with the specified sha1.  The
- * return value is a pointer to a statically allocated buffer that is
- * overwritten each time the function is called.
+ * Put in `buf` the name of the file in the local object database that
+ * would be used to store a loose object with the specified sha1.
  */
-extern const char *sha1_file_name(const unsigned char *sha1);
+extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1);
 
 /*
  * Return an abbreviated sha1 unique within this repository's object database.
diff --git a/http-walker.c b/http-walker.c
index 1ae8363de2..07c2b1af82 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -544,8 +544,10 @@ static int fetch_object(struct walker *walker, unsigned char *sha1)
 	} else if (hashcmp(obj_req->sha1, req->real_sha1)) {
 		ret = error("File %s has bad hash", hex);
 	} else if (req->rename < 0) {
-		ret = error("unable to write sha1 filename %s",
-			    sha1_file_name(req->sha1));
+		struct strbuf buf = STRBUF_INIT;
+		sha1_file_name(&buf, req->sha1);
+		ret = error("unable to write sha1 filename %s", buf.buf);
+		strbuf_release(&buf);
 	}
 
 	release_http_object_request(req);
diff --git a/http.c b/http.c
index 5977712712..5979305bc9 100644
--- a/http.c
+++ b/http.c
@@ -2168,7 +2168,7 @@ struct http_object_request *new_http_object_request(const char *base_url,
 	unsigned char *sha1)
 {
 	char *hex = sha1_to_hex(sha1);
-	const char *filename;
+	struct strbuf filename = STRBUF_INIT;
 	char prevfile[PATH_MAX];
 	int prevlocal;
 	char prev_buf[PREV_BUF_SIZE];
@@ -2180,14 +2180,15 @@ struct http_object_request *new_http_object_request(const char *base_url,
 	hashcpy(freq->sha1, sha1);
 	freq->localfile = -1;
 
-	filename = sha1_file_name(sha1);
+	sha1_file_name(&filename, sha1);
 	snprintf(freq->tmpfile, sizeof(freq->tmpfile),
-		 "%s.temp", filename);
+		 "%s.temp", filename.buf);
 
-	snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+	snprintf(prevfile, sizeof(prevfile), "%s.prev", filename.buf);
 	unlink_or_warn(prevfile);
 	rename(freq->tmpfile, prevfile);
 	unlink_or_warn(freq->tmpfile);
+	strbuf_release(&filename);
 
 	if (freq->localfile != -1)
 		error("fd leakage in start: %d", freq->localfile);
@@ -2302,6 +2303,7 @@ void process_http_object_request(struct http_object_request *freq)
 int finish_http_object_request(struct http_object_request *freq)
 {
 	struct stat st;
+	struct strbuf filename = STRBUF_INIT;
 
 	close(freq->localfile);
 	freq->localfile = -1;
@@ -2327,8 +2329,10 @@ int finish_http_object_request(struct http_object_request *freq)
 		unlink_or_warn(freq->tmpfile);
 		return -1;
 	}
-	freq->rename =
-		finalize_object_file(freq->tmpfile, sha1_file_name(freq->sha1));
+
+	sha1_file_name(&filename, freq->sha1);
+	freq->rename = finalize_object_file(freq->tmpfile, filename.buf);
+	strbuf_release(&filename);
 
 	return freq->rename;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 3da70ac650..f66c21b2da 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -321,15 +321,11 @@ static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
 	}
 }
 
-const char *sha1_file_name(const unsigned char *sha1)
+void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-	static struct strbuf buf = STRBUF_INIT;
-
-	strbuf_reset(&buf);
-	strbuf_addf(&buf, "%s/", get_object_directory());
+	strbuf_addf(buf, "%s/", get_object_directory());
 
-	fill_sha1_path(&buf, sha1);
-	return buf.buf;
+	fill_sha1_path(buf, sha1);
 }
 
 struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
@@ -710,7 +706,12 @@ int check_and_freshen_file(const char *fn, int freshen)
 
 static int check_and_freshen_local(const unsigned char *sha1, int freshen)
 {
-	return check_and_freshen_file(sha1_file_name(sha1), freshen);
+	static struct strbuf buf = STRBUF_INIT;
+
+	strbuf_reset(&buf);
+	sha1_file_name(&buf, sha1);
+
+	return check_and_freshen_file(buf.buf, freshen);
 }
 
 static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
@@ -866,8 +867,12 @@ static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
 			  const char **path)
 {
 	struct alternate_object_database *alt;
+	static struct strbuf buf = STRBUF_INIT;
+
+	strbuf_reset(&buf);
+	sha1_file_name(&buf, sha1);
+	*path = buf.buf;
 
-	*path = sha1_file_name(sha1);
 	if (!lstat(*path, st))
 		return 0;
 
@@ -891,8 +896,12 @@ static int open_sha1_file(const unsigned char *sha1, const char **path)
 	int fd;
 	struct alternate_object_database *alt;
 	int most_interesting_errno;
+	static struct strbuf buf = STRBUF_INIT;
+
+	strbuf_reset(&buf);
+	sha1_file_name(&buf, sha1);
+	*path = buf.buf;
 
-	*path = sha1_file_name(sha1);
 	fd = git_open(*path);
 	if (fd >= 0)
 		return fd;
@@ -1557,9 +1566,12 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 	git_SHA_CTX c;
 	unsigned char parano_sha1[20];
 	static struct strbuf tmp_file = STRBUF_INIT;
-	const char *filename = sha1_file_name(sha1);
+	static struct strbuf filename = STRBUF_INIT;
+
+	strbuf_reset(&filename);
+	sha1_file_name(&filename, sha1);
 
-	fd = create_tmpfile(&tmp_file, filename);
+	fd = create_tmpfile(&tmp_file, filename.buf);
 	if (fd < 0) {
 		if (errno == EACCES)
 			return error("insufficient permission for adding an object to repository database %s", get_object_directory());
@@ -1612,7 +1624,7 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 			warning_errno("failed utime() on %s", tmp_file.buf);
 	}
 
-	return finalize_object_file(tmp_file.buf, filename);
+	return finalize_object_file(tmp_file.buf, filename.buf);
 }
 
 static int freshen_loose_object(const unsigned char *sha1)
-- 
2.16.0.rc2.3.g254af8b974


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs
  2018-01-17 17:54 [PATCH v2 1/2] sha1_file: remove static strbuf from sha1_file_name() Christian Couder
@ 2018-01-17 17:54 ` Christian Couder
  2018-01-17 20:37   ` Jeff Hostetler
  0 siblings, 1 reply; 6+ messages in thread
From: Christian Couder @ 2018-01-17 17:54 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff Hostetler, Jonathan Tan, Derrick Stolee,
	Kevin Daudt, Christian Couder

As sha1_file_name() could be performance sensitive, let's
try to make it faster by seeding the initial buffer size
to avoid any need to realloc and by using strbuf_addstr()
and strbuf_addc() instead of strbuf_addf().

Helped-by: Derrick Stolee <stolee@gmail.com>
Helped-by: Jeff Hostetler <git@jeffhostetler.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 sha1_file.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sha1_file.c b/sha1_file.c
index f66c21b2da..1a94716962 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -323,8 +323,14 @@ static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
 
 void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
 {
-	strbuf_addf(buf, "%s/", get_object_directory());
+	const char *obj_dir = get_object_directory();
+	size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;
 
+	if (extra > strbuf_avail(buf))
+		strbuf_grow(buf, extra);
+
+	strbuf_addstr(buf, obj_dir);
+	strbuf_addch(buf, '/');
 	fill_sha1_path(buf, sha1);
 }
 
-- 
2.16.0.rc2.3.g254af8b974


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs
  2018-01-17 17:54 ` [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs Christian Couder
@ 2018-01-17 20:37   ` Jeff Hostetler
  2018-01-17 20:54     ` Junio C Hamano
  2018-01-17 21:44     ` Christian Couder
  0 siblings, 2 replies; 6+ messages in thread
From: Jeff Hostetler @ 2018-01-17 20:37 UTC (permalink / raw)
  To: Christian Couder, git
  Cc: Junio C Hamano, Jeff Hostetler, Jonathan Tan, Derrick Stolee,
	Kevin Daudt, Christian Couder



On 1/17/2018 12:54 PM, Christian Couder wrote:
> As sha1_file_name() could be performance sensitive, let's
> try to make it faster by seeding the initial buffer size
> to avoid any need to realloc and by using strbuf_addstr()
> and strbuf_addc() instead of strbuf_addf().
> 
> Helped-by: Derrick Stolee <stolee@gmail.com>
> Helped-by: Jeff Hostetler <git@jeffhostetler.com>
> Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
> ---
>   sha1_file.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/sha1_file.c b/sha1_file.c
> index f66c21b2da..1a94716962 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -323,8 +323,14 @@ static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
>   
>   void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
>   {
> -	strbuf_addf(buf, "%s/", get_object_directory());
> +	const char *obj_dir = get_object_directory();
> +	size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;

Very minor nit.  Should this be "+3" rather than "+1"?
One for the slash after obj_dir, one for the slash between
"xx/y[38]", and one for the trailing NUL.

>   
> +	if (extra > strbuf_avail(buf))
> +		strbuf_grow(buf, extra);
> +
> +	strbuf_addstr(buf, obj_dir);
> +	strbuf_addch(buf, '/');
>   	fill_sha1_path(buf, sha1);
>   }
>   
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs
  2018-01-17 20:37   ` Jeff Hostetler
@ 2018-01-17 20:54     ` Junio C Hamano
  2018-01-17 22:27       ` Jeff King
  2018-01-17 21:44     ` Christian Couder
  1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2018-01-17 20:54 UTC (permalink / raw)
  To: Jeff Hostetler
  Cc: Christian Couder, git, Jeff Hostetler, Jonathan Tan,
	Derrick Stolee, Kevin Daudt, Christian Couder

Jeff Hostetler <git@jeffhostetler.com> writes:

>>     void sha1_file_name(struct strbuf *buf, const unsigned char
>> *sha1)
>>   {
>> -	strbuf_addf(buf, "%s/", get_object_directory());
>> +	const char *obj_dir = get_object_directory();
>> +	size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;
>
> Very minor nit.  Should this be "+3" rather than "+1"?
> One for the slash after obj_dir, one for the slash between
> "xx/y[38]", and one for the trailing NUL.
>
>>   +	if (extra > strbuf_avail(buf))
>> +		strbuf_grow(buf, extra);

The callers who care use static strbuf with 1/2, which lets them
grow it to an appropriate size after they make their first call.

On the other hand, the ones to which performance does not matter by
definition do not care.

I actually think this whole "extra -> grow" business should be
discarded.  With a miscomputed "extra" like this, it does not help
anybody---everybody may pay cost for one extra realloc due to the
miscalculation, and the ones that do care also do during their first
call.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs
  2018-01-17 20:37   ` Jeff Hostetler
  2018-01-17 20:54     ` Junio C Hamano
@ 2018-01-17 21:44     ` Christian Couder
  1 sibling, 0 replies; 6+ messages in thread
From: Christian Couder @ 2018-01-17 21:44 UTC (permalink / raw)
  To: Jeff Hostetler
  Cc: git, Junio C Hamano, Jeff Hostetler, Jonathan Tan,
	Derrick Stolee, Kevin Daudt, Christian Couder

On Wed, Jan 17, 2018 at 9:37 PM, Jeff Hostetler <git@jeffhostetler.com> wrote:
>
>
> On 1/17/2018 12:54 PM, Christian Couder wrote:
>>
>> As sha1_file_name() could be performance sensitive, let's
>> try to make it faster by seeding the initial buffer size
>> to avoid any need to realloc and by using strbuf_addstr()
>> and strbuf_addc() instead of strbuf_addf().
>>
>> Helped-by: Derrick Stolee <stolee@gmail.com>
>> Helped-by: Jeff Hostetler <git@jeffhostetler.com>
>> Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
>> ---
>>   sha1_file.c | 8 +++++++-
>>   1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/sha1_file.c b/sha1_file.c
>> index f66c21b2da..1a94716962 100644
>> --- a/sha1_file.c
>> +++ b/sha1_file.c
>> @@ -323,8 +323,14 @@ static void fill_sha1_path(struct strbuf *buf, const
>> unsigned char *sha1)
>>     void sha1_file_name(struct strbuf *buf, const unsigned char *sha1)
>>   {
>> -       strbuf_addf(buf, "%s/", get_object_directory());
>> +       const char *obj_dir = get_object_directory();
>> +       size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;
>
> Very minor nit.  Should this be "+3" rather than "+1"?
> One for the slash after obj_dir, one for the slash between
> "xx/y[38]", and one for the trailing NUL.

I think the trailing NUL is handled by strbuf_grow(), but I forgot
about the / between "xx/y[38]", so I think it should be "+2" .

Anyway I agree with Junio that using strbuf_grow() is not really needed.

>>   +     if (extra > strbuf_avail(buf))
>> +               strbuf_grow(buf, extra);
>> +
>> +       strbuf_addstr(buf, obj_dir);
>> +       strbuf_addch(buf, '/');
>>         fill_sha1_path(buf, sha1);
>>   }

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs
  2018-01-17 20:54     ` Junio C Hamano
@ 2018-01-17 22:27       ` Jeff King
  0 siblings, 0 replies; 6+ messages in thread
From: Jeff King @ 2018-01-17 22:27 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jeff Hostetler, Christian Couder, git, Jeff Hostetler,
	Jonathan Tan, Derrick Stolee, Kevin Daudt, Christian Couder

On Wed, Jan 17, 2018 at 12:54:33PM -0800, Junio C Hamano wrote:

> Jeff Hostetler <git@jeffhostetler.com> writes:
> 
> >>     void sha1_file_name(struct strbuf *buf, const unsigned char
> >> *sha1)
> >>   {
> >> -	strbuf_addf(buf, "%s/", get_object_directory());
> >> +	const char *obj_dir = get_object_directory();
> >> +	size_t extra = strlen(obj_dir) + 1 + GIT_MAX_HEXSZ;
> >
> > Very minor nit.  Should this be "+3" rather than "+1"?
> > One for the slash after obj_dir, one for the slash between
> > "xx/y[38]", and one for the trailing NUL.
> >
> >>   +	if (extra > strbuf_avail(buf))
> >> +		strbuf_grow(buf, extra);
> 
> The callers who care use static strbuf with 1/2, which lets them
> grow it to an appropriate size after they make their first call.
> 
> On the other hand, the ones to which performance does not matter by
> definition do not care.
> 
> I actually think this whole "extra -> grow" business should be
> discarded.  With a miscomputed "extra" like this, it does not help
> anybody---everybody may pay cost for one extra realloc due to the
> miscalculation, and the ones that do care also do during their first
> call.

Let me second that. The diffstat here, along with the magic numbers, is
not really encouraging unless we have a demonstrable speedup. In which
case we can then measure and compare other approaches, like pushing a
static strbuf farther up the stack. But without that, it feels like
stumbling around in the dark.

-Peff

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-01-17 22:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-17 17:54 [PATCH v2 1/2] sha1_file: remove static strbuf from sha1_file_name() Christian Couder
2018-01-17 17:54 ` [PATCH v2 2/2] sha1_file: improve sha1_file_name() perfs Christian Couder
2018-01-17 20:37   ` Jeff Hostetler
2018-01-17 20:54     ` Junio C Hamano
2018-01-17 22:27       ` Jeff King
2018-01-17 21:44     ` Christian Couder

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.version-control.git
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.version-control.git
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git