git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/1] upload-pack: buffer ref advertisement writes
@ 2021-08-24 14:02 Jacob Vosmaer
  2021-08-24 21:07 ` Taylor Blau
  0 siblings, 1 reply; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-24 14:02 UTC (permalink / raw)
  To: git; +Cc: Jacob Vosmaer

In both protocol v0 and v2, upload-pack writes one pktline packet per
advertised ref to stdout. That means one or two write(2) syscalls per
ref. This is problematic if these writes become network sends with
high overhead.

This change adds a strbuf buffer to the send_ref callbacks of the v0
ref advertisement and v2's ls-refs. Instead of writing directly to
stdout, send_ref now writes into the buffer, and then checks if there
are more than 32768 bytes in the buffer. If so we flush the buffer to
stdout.

To give an example: I set up a single-threaded loop that calls
ls-remote (with HTTP and protocol v2) on a local GitLab instance, on a
repository with 11K refs. When I switch from Git v2.32.0 to this
patch, I see a 50% reduction in CPU time for Git, and 75% for Gitaly
(GitLab's Git RPC service).

So having these buffers not only saves syscalls in upload-pack, it
also saves time in things that consume upload-pack's output.
---
 ls-refs.c     | 13 ++++++++++++-
 upload-pack.c | 18 +++++++++++++++---
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/ls-refs.c b/ls-refs.c
index 88f6c3f60d..7b9d5813bf 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -7,6 +7,8 @@
 #include "pkt-line.h"
 #include "config.h"
 
+#define OUT_WRITE_SIZE 32768
+
 static int config_read;
 static int advertise_unborn;
 static int allow_unborn;
@@ -65,6 +67,7 @@ struct ls_refs_data {
 	unsigned peel;
 	unsigned symrefs;
 	struct strvec prefixes;
+	struct strbuf out;
 	unsigned unborn : 1;
 };
 
@@ -105,7 +108,12 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	}
 
 	strbuf_addch(&refline, '\n');
-	packet_write(1, refline.buf, refline.len);
+
+	packet_buf_write_len(&data->out, refline.buf, refline.len);
+	if (data->out.len >= OUT_WRITE_SIZE) {
+		write_or_die(1, data->out.buf, data->out.len);
+		strbuf_reset(&data->out);
+	}
 
 	strbuf_release(&refline);
 	return 0;
@@ -145,6 +153,7 @@ int ls_refs(struct repository *r, struct strvec *keys,
 
 	memset(&data, 0, sizeof(data));
 	strvec_init(&data.prefixes);
+	strbuf_init(&data.out, OUT_WRITE_SIZE);
 
 	ensure_config_read();
 	git_config(ls_refs_config, NULL);
@@ -171,7 +180,9 @@ int ls_refs(struct repository *r, struct strvec *keys,
 		strvec_push(&data.prefixes, "");
 	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
 				     send_ref, &data, 0);
+	write_or_die(1, data.out.buf, data.out.len);
 	packet_flush(1);
+	strbuf_release(&data.out);
 	strvec_clear(&data.prefixes);
 	return 0;
 }
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb4..15f9aab4f6 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -42,6 +42,8 @@
 #define ALL_FLAGS (THEY_HAVE | OUR_REF | WANTED | COMMON_KNOWN | SHALLOW | \
 		NOT_SHALLOW | CLIENT_SHALLOW | HIDDEN_REF)
 
+#define SEND_REF_OUT_WRITE_SIZE 32768
+
 /* Enum for allowed unadvertised object request (UOR) */
 enum allow_uor {
 	/* Allow specifying sha1 if it is a ref tip. */
@@ -58,6 +60,7 @@ enum allow_uor {
  */
 struct upload_pack_data {
 	struct string_list symref;				/* v0 only */
+	struct strbuf send_ref_out;				/* v0 only */
 	struct object_array want_obj;
 	struct object_array have_obj;
 	struct oid_array haves;					/* v2 only */
@@ -126,6 +129,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
 	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
 	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
+	struct strbuf send_ref_out = STRBUF_INIT;
 
 	memset(data, 0, sizeof(*data));
 	data->symref = symref;
@@ -141,6 +145,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	data->allow_filter_fallback = 1;
 	data->tree_filter_max_depth = ULONG_MAX;
 	packet_writer_init(&data->writer, 1);
+	data->send_ref_out = send_ref_out;
 
 	data->keepalive = 5;
 	data->advertise_sid = 0;
@@ -158,6 +163,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	object_array_clear(&data->extra_edge_obj);
 	list_objects_filter_release(&data->filter_options);
 	string_list_clear(&data->allowed_filters, 0);
+	strbuf_release(&data->send_ref_out);
 
 	free((char *)data->pack_objects_hook);
 }
@@ -1207,7 +1213,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 
 		format_symref_info(&symref_info, &data->symref);
 		format_session_id(&session_id, data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+		packet_buf_write(&data->send_ref_out, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (data->allow_uor & ALLOW_TIP_SHA1) ?
@@ -1223,11 +1229,15 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		strbuf_release(&symref_info);
 		strbuf_release(&session_id);
 	} else {
-		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
+		packet_buf_write(&data->send_ref_out, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
 	if (!peel_iterated_oid(oid, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+		packet_buf_write(&data->send_ref_out, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+	if (data->send_ref_out.len >= SEND_REF_OUT_WRITE_SIZE) {
+		write_or_die(1, data->send_ref_out.buf, data->send_ref_out.len);
+		strbuf_reset(&data->send_ref_out);
+	}
 	return 0;
 }
 
@@ -1346,8 +1356,10 @@ void upload_pack(struct upload_pack_options *options)
 
 	if (options->advertise_refs || !data.stateless_rpc) {
 		reset_timeout(data.timeout);
+		strbuf_grow(&data.send_ref_out, SEND_REF_OUT_WRITE_SIZE);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		write_or_die(1, data.send_ref_out.buf, data.send_ref_out.len);
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/1] upload-pack: buffer ref advertisement writes
  2021-08-24 14:02 [PATCH 1/1] upload-pack: buffer ref advertisement writes Jacob Vosmaer
@ 2021-08-24 21:07 ` Taylor Blau
  2021-08-24 21:42   ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Taylor Blau @ 2021-08-24 21:07 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: git

On Tue, Aug 24, 2021 at 04:02:59PM +0200, Jacob Vosmaer wrote:
> In both protocol v0 and v2, upload-pack writes one pktline packet per
> advertised ref to stdout. That means one or two write(2) syscalls per
> ref. This is problematic if these writes become network sends with
> high overhead.
>
> This change adds a strbuf buffer to the send_ref callbacks of the v0
> ref advertisement and v2's ls-refs. Instead of writing directly to
> stdout, send_ref now writes into the buffer, and then checks if there
> are more than 32768 bytes in the buffer. If so we flush the buffer to
> stdout.

Are there any consumers that rely on having information early (where
buffering would be a detriment to them)?

> To give an example: I set up a single-threaded loop that calls
> ls-remote (with HTTP and protocol v2) on a local GitLab instance, on a
> repository with 11K refs. When I switch from Git v2.32.0 to this
> patch, I see a 50% reduction in CPU time for Git, and 75% for Gitaly
> (GitLab's Git RPC service).

Hmm. Seeing a reduction in CPU time is surprising to me: do you have an
explanation for why the time-savings isn't coming purely from "system"
(i.e., any blocking I/O)?

> So having these buffers not only saves syscalls in upload-pack, it
> also saves time in things that consume upload-pack's output.

I don't think this explains it, since any time spent waiting on
upload-pack output should be counted against the system bucket, not CPU
time.

> ---
>  ls-refs.c     | 13 ++++++++++++-
>  upload-pack.c | 18 +++++++++++++++---
>  2 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/ls-refs.c b/ls-refs.c
> index 88f6c3f60d..7b9d5813bf 100644
> --- a/ls-refs.c
> +++ b/ls-refs.c
> @@ -7,6 +7,8 @@
>  #include "pkt-line.h"
>  #include "config.h"
>
> +#define OUT_WRITE_SIZE 32768
> +
>  static int config_read;
>  static int advertise_unborn;
>  static int allow_unborn;
> @@ -65,6 +67,7 @@ struct ls_refs_data {
>  	unsigned peel;
>  	unsigned symrefs;
>  	struct strvec prefixes;
> +	struct strbuf out;
>  	unsigned unborn : 1;
>  };
>
> @@ -105,7 +108,12 @@ static int send_ref(const char *refname, const struct object_id *oid,
>  	}
>
>  	strbuf_addch(&refline, '\n');
> -	packet_write(1, refline.buf, refline.len);
> +
> +	packet_buf_write_len(&data->out, refline.buf, refline.len);
> +	if (data->out.len >= OUT_WRITE_SIZE) {
> +		write_or_die(1, data->out.buf, data->out.len);
> +		strbuf_reset(&data->out);
> +	}

None of this looks wrong to me, but it might be nice to teach the
packet writer a buffered mode that would handle this for us. It would be
especially nice to bolt the final flush onto packet_flush(), since it is
otherwise easy to forget to do.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/1] upload-pack: buffer ref advertisement writes
  2021-08-24 21:07 ` Taylor Blau
@ 2021-08-24 21:42   ` Junio C Hamano
  2021-08-25  0:44     ` Jeff King
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2021-08-24 21:42 UTC (permalink / raw)
  To: Taylor Blau; +Cc: Jacob Vosmaer, git

Taylor Blau <me@ttaylorr.com> writes:

>> @@ -105,7 +108,12 @@ static int send_ref(const char *refname, const struct object_id *oid,
>>  	}
>>
>>  	strbuf_addch(&refline, '\n');
>> -	packet_write(1, refline.buf, refline.len);
>> +
>> +	packet_buf_write_len(&data->out, refline.buf, refline.len);
>> +	if (data->out.len >= OUT_WRITE_SIZE) {
>> +		write_or_die(1, data->out.buf, data->out.len);
>> +		strbuf_reset(&data->out);

This is somewhat unexpected.  When one introduces size limit, it is
more natural to make it upper bound and arrange the code more like

	if (currently buffered plus new data exceeds size limit) {
		flush the currently buffered data;
		if (new data alone exceeds size limit)
			flush the new data;
		else
			buffer the new data;
	} else {
		buffer the new data;
	}

When a single packet exceeds the size limit, you'd end up making an
oversize write() call anyway, but otherwise it would keep the write
below the limit, not slightly above the limit, which makes it much
easier to justify why the limit exists at the level it is set.

Also, why is it 32k?  Our jumbo packet limit is 65k, I think, and it
may probably be easier to explain if we matched it.

>> +	}
>
> None of this looks wrong to me, but it might be nice to teach the
> packet writer a buffered mode that would handle this for us. It would be
> especially nice to bolt the final flush onto packet_flush(), since it is
> otherwise easy to forget to do.

FWIW, the packet-line part of the system was from the beginning
written with an eye to allow buffering until _flush() comes; we may
have added some buggy conversation path that deadlocks if we make
the non-flush packets fully buffered, so there may need some fixes,
but I do not expect the fallout would be too hard to diagnose.

It may be worth trying that avenue first before piling on the user
level buffering like this patch does.

Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/1] upload-pack: buffer ref advertisement writes
  2021-08-24 21:42   ` Junio C Hamano
@ 2021-08-25  0:44     ` Jeff King
  2021-08-26 10:02       ` Jacob Vosmaer
  0 siblings, 1 reply; 27+ messages in thread
From: Jeff King @ 2021-08-25  0:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Taylor Blau, Jacob Vosmaer, git

On Tue, Aug 24, 2021 at 02:42:20PM -0700, Junio C Hamano wrote:

> > None of this looks wrong to me, but it might be nice to teach the
> > packet writer a buffered mode that would handle this for us. It would be
> > especially nice to bolt the final flush onto packet_flush(), since it is
> > otherwise easy to forget to do.
> 
> FWIW, the packet-line part of the system was from the beginning
> written with an eye to allow buffering until _flush() comes; we may
> have added some buggy conversation path that deadlocks if we make
> the non-flush packets fully buffered, so there may need some fixes,
> but I do not expect the fallout would be too hard to diagnose.
> 
> It may be worth trying that avenue first before piling on the user
> level buffering like this patch does.

Yeah, I had the same thought. It also feels like this is a problem
already solved by stdio. I.e., a lot of the packet_* functions can
handle descriptors or strbufs. Why not "FILE *" handles?

It would probably involve using the original descriptor _and_ the
filehandle in some cases (e.g., ref advertisement over the handle, and
then muxing pack-objects output straight to the descriptor). But that's
OK as long we are sensible about flushing at the right moments.

It may not be much less complex than just implementing buffering in the
packet_* interfaces, though. The tricky part is likely to be the
interface (not itself, but avoiding repetition between all the
fd/strbuf/buffered variants).

-Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/1] upload-pack: buffer ref advertisement writes
  2021-08-25  0:44     ` Jeff King
@ 2021-08-26 10:02       ` Jacob Vosmaer
  2021-08-26 10:06         ` [PATCH 1/2] pkt-line: add packet_fwrite Jacob Vosmaer
  0 siblings, 1 reply; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-26 10:02 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Taylor Blau, Git Mailing List, ps

> Are there any consumers that rely on having information early (where
> buffering would be a detriment to them)?

I think not.

> Hmm. Seeing a reduction in CPU time is surprising to me: do you have an
> explanation for why the time-savings isn't coming purely from "system"
> (i.e., any blocking I/O)?

Pipe writes only block in the IO sense if the pipe buffer is full.
Otherwise they seem to spend their time in spinlocks and copying into
the page buffer. In my benchmark, I don't believe the pipe buffer was
ever full.

I'm not an expert on the Linux kernel; you can see CPU flame graphs in
https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/1257.

> Yeah, I had the same thought. It also feels like this is a problem
> already solved by stdio. I.e., a lot of the packet_* functions can
> handle descriptors or strbufs. Why not "FILE *" handles?

My colleague Patrick Steinhardt (cc) made the same suggestion
off-list. I'll post an alternative patch set to this thread.

Jacob


On Wed, Aug 25, 2021 at 2:44 AM Jeff King <peff@peff.net> wrote:
>
> On Tue, Aug 24, 2021 at 02:42:20PM -0700, Junio C Hamano wrote:
>
> > > None of this looks wrong to me, but it might be nice to teach the
> > > packet writer a buffered mode that would handle this for us. It would be
> > > especially nice to bolt the final flush onto packet_flush(), since it is
> > > otherwise easy to forget to do.
> >
> > FWIW, the packet-line part of the system was from the beginning
> > written with an eye to allow buffering until _flush() comes; we may
> > have added some buggy conversation path that deadlocks if we make
> > the non-flush packets fully buffered, so there may need some fixes,
> > but I do not expect the fallout would be too hard to diagnose.
> >
> > It may be worth trying that avenue first before piling on the user
> > level buffering like this patch does.
>
> Yeah, I had the same thought. It also feels like this is a problem
> already solved by stdio. I.e., a lot of the packet_* functions can
> handle descriptors or strbufs. Why not "FILE *" handles?
>
> It would probably involve using the original descriptor _and_ the
> filehandle in some cases (e.g., ref advertisement over the handle, and
> then muxing pack-objects output straight to the descriptor). But that's
> OK as long we are sensible about flushing at the right moments.
>
> It may not be much less complex than just implementing buffering in the
> packet_* interfaces, though. The tricky part is likely to be the
> interface (not itself, but avoiding repetition between all the
> fd/strbuf/buffered variants).
>
> -Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/2] pkt-line: add packet_fwrite
  2021-08-26 10:02       ` Jacob Vosmaer
@ 2021-08-26 10:06         ` Jacob Vosmaer
  2021-08-26 10:06           ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
  2021-08-26 16:33           ` [PATCH 1/2] pkt-line: add packet_fwrite Junio C Hamano
  0 siblings, 2 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-26 10:06 UTC (permalink / raw)
  To: peff, me, git, gitster, ps; +Cc: Jacob Vosmaer

This adds a new function packet_fwrite which works like packet_write,
except it writes to a stdio stream.

Helped-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 cache.h        |  1 +
 pkt-line.c     | 16 ++++++++++++++++
 pkt-line.h     |  1 +
 write-or-die.c |  6 ++++++
 4 files changed, 24 insertions(+)

diff --git a/cache.h b/cache.h
index bd4869beee..fbefe0927b 100644
--- a/cache.h
+++ b/cache.h
@@ -1736,6 +1736,7 @@ extern const char *git_mailmap_blob;
 void maybe_flush_or_die(FILE *, const char *);
 __attribute__((format (printf, 2, 3)))
 void fprintf_or_die(FILE *, const char *fmt, ...);
+void fwrite_or_die(FILE *f, const void *buf, size_t count);
 
 #define COPY_READ_ERROR (-2)
 #define COPY_WRITE_ERROR (-3)
diff --git a/pkt-line.c b/pkt-line.c
index 9f63eae2e6..244b326708 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -243,6 +243,22 @@ void packet_write(int fd_out, const char *buf, size_t size)
 		die("%s", err.buf);
 }
 
+void packet_fwrite(FILE *f, const char *buf, size_t size)
+{
+	size_t packet_size;
+	char header[4];
+
+	if (size > LARGE_PACKET_DATA_MAX)
+		die(_("packet write failed - data exceeds max packet size"));
+
+	packet_trace(buf, size, 1);
+	packet_size = size + 4;
+
+	set_packet_header(header, packet_size);
+	fwrite_or_die(f, header, 4);
+	fwrite_or_die(f, buf, size);
+}
+
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
 {
 	va_list args;
diff --git a/pkt-line.h b/pkt-line.h
index 5af5f45687..c9cb5e1719 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -28,6 +28,7 @@ void packet_buf_flush(struct strbuf *buf);
 void packet_buf_delim(struct strbuf *buf);
 void set_packet_header(char *buf, int size);
 void packet_write(int fd_out, const char *buf, size_t size);
+void packet_fwrite(FILE *f, const char *buf, size_t size);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
 void packet_buf_write_len(struct strbuf *buf, const char *data, size_t len);
 int packet_flush_gently(int fd);
diff --git a/write-or-die.c b/write-or-die.c
index d33e68f6ab..d82ef54f90 100644
--- a/write-or-die.c
+++ b/write-or-die.c
@@ -70,3 +70,9 @@ void write_or_die(int fd, const void *buf, size_t count)
 		die_errno("write error");
 	}
 }
+
+void fwrite_or_die(FILE *f, const void *buf, size_t count)
+{
+	if (fwrite(buf, count, 1, f) != 1)
+		die_errno("write error");
+}
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 10:06         ` [PATCH 1/2] pkt-line: add packet_fwrite Jacob Vosmaer
@ 2021-08-26 10:06           ` Jacob Vosmaer
  2021-08-26 16:33             ` Junio C Hamano
  2021-08-26 23:32             ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jeff King
  2021-08-26 16:33           ` [PATCH 1/2] pkt-line: add packet_fwrite Junio C Hamano
  1 sibling, 2 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-26 10:06 UTC (permalink / raw)
  To: peff, me, git, gitster, ps; +Cc: Jacob Vosmaer

In both protocol v0 and v2, upload-pack writes one pktline packet per
advertised ref to stdout. That means one or two write(2) syscalls per
ref. This is problematic if these writes become network sends with
high overhead.

This commit changes both send_ref callbacks to use buffered IO using
stdio. The default buffer size is usually 4KB, but with musl libc it
is only 1KB. So for consistent results we need to set a buffer size
ourselves. During startup of upload-pack, we set the stdout buffer
size to LARGE_PACKET_MAX, i.e. just shy of 64KB.

To give an example of the impact: I set up a single-threaded loop that
calls ls-remote (with HTTP and protocol v2) on a local GitLab
instance, on a repository with 11K refs. When I switch from Git
v2.32.0 to this patch, I see a 50% reduction in CPU time for Git, and
75% for Gitaly (GitLab's Git RPC service).

So using buffered IO not only saves syscalls in upload-pack, it also
saves time in things that consume upload-pack's output.

Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 builtin/upload-pack.c | 10 ++++++++++
 ls-refs.c             |  5 ++++-
 upload-pack.c         | 15 ++++++++++++---
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c
index 6da8fa2607..8033f84124 100644
--- a/builtin/upload-pack.c
+++ b/builtin/upload-pack.c
@@ -48,6 +48,16 @@ int cmd_upload_pack(int argc, const char **argv, const char *prefix)
 	if (!enter_repo(dir, strict))
 		die("'%s' does not appear to be a git repository", dir);
 
+	/*
+	 * Increase the stdio buffer size for stdout, for the benefit of ref
+	 * advertisement writes. We are only allowed to call setvbuf(3) "after
+	 * opening a stream and before any other operations have been performed
+	 * on it", so let's call it before we have written anything to stdout.
+	 */
+	if (setvbuf(stdout, xmalloc(LARGE_PACKET_MAX), _IOFBF,
+			LARGE_PACKET_MAX))
+		die_errno("failed to grow stdout buffer");
+
 	switch (determine_protocol_version_server()) {
 	case protocol_v2:
 		serve_opts.advertise_capabilities = opts.advertise_refs;
diff --git a/ls-refs.c b/ls-refs.c
index 88f6c3f60d..83f2948fc3 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	}
 
 	strbuf_addch(&refline, '\n');
-	packet_write(1, refline.buf, refline.len);
+	packet_fwrite(stdout, refline.buf, refline.len);
 
 	strbuf_release(&refline);
 	return 0;
@@ -171,6 +171,9 @@ int ls_refs(struct repository *r, struct strvec *keys,
 		strvec_push(&data.prefixes, "");
 	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
 				     send_ref, &data, 0);
+	/* Call fflush because send_ref uses stdio. */
+	if (fflush(stdout))
+		die_errno(_("write failure on standard output"));
 	packet_flush(1);
 	strvec_clear(&data.prefixes);
 	return 0;
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb4..b592ac6cfb 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -58,6 +58,7 @@ enum allow_uor {
  */
 struct upload_pack_data {
 	struct string_list symref;				/* v0 only */
+	struct strbuf send_ref_buf;				/* v0 only */
 	struct object_array want_obj;
 	struct object_array have_obj;
 	struct oid_array haves;					/* v2 only */
@@ -126,6 +127,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
 	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
 	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
+	struct strbuf send_ref_buf = STRBUF_INIT;
 
 	memset(data, 0, sizeof(*data));
 	data->symref = symref;
@@ -141,6 +143,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	data->allow_filter_fallback = 1;
 	data->tree_filter_max_depth = ULONG_MAX;
 	packet_writer_init(&data->writer, 1);
+	data->send_ref_buf = send_ref_buf;
 
 	data->keepalive = 5;
 	data->advertise_sid = 0;
@@ -158,6 +161,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	object_array_clear(&data->extra_edge_obj);
 	list_objects_filter_release(&data->filter_options);
 	string_list_clear(&data->allowed_filters, 0);
+	strbuf_release(&data->send_ref_buf);
 
 	free((char *)data->pack_objects_hook);
 }
@@ -1201,13 +1205,14 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	if (mark_our_ref(refname_nons, refname, oid))
 		return 0;
 
+	strbuf_reset(&data->send_ref_buf);
 	if (capabilities) {
 		struct strbuf symref_info = STRBUF_INIT;
 		struct strbuf session_id = STRBUF_INIT;
 
 		format_symref_info(&symref_info, &data->symref);
 		format_session_id(&session_id, data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+		packet_buf_write(&data->send_ref_buf, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (data->allow_uor & ALLOW_TIP_SHA1) ?
@@ -1223,11 +1228,12 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		strbuf_release(&symref_info);
 		strbuf_release(&session_id);
 	} else {
-		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
+		packet_buf_write(&data->send_ref_buf, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
 	if (!peel_iterated_oid(oid, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+		packet_buf_write(&data->send_ref_buf, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+	fwrite_or_die(stdout, data->send_ref_buf.buf, data->send_ref_buf.len);
 	return 0;
 }
 
@@ -1348,6 +1354,9 @@ void upload_pack(struct upload_pack_options *options)
 		reset_timeout(data.timeout);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		/* Call fflush because send_ref uses stdio. */
+		if (fflush(stdout))
+			die_errno(_("write failure on standard output"));
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 10:06           ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
@ 2021-08-26 16:33             ` Junio C Hamano
  2021-08-26 20:21               ` Junio C Hamano
                                 ` (2 more replies)
  2021-08-26 23:32             ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jeff King
  1 sibling, 3 replies; 27+ messages in thread
From: Junio C Hamano @ 2021-08-26 16:33 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: peff, me, git, ps

Jacob Vosmaer <jacob@gitlab.com> writes:

> In both protocol v0 and v2, upload-pack writes one pktline packet per
> advertised ref to stdout. That means one or two write(2) syscalls per
> ref. This is problematic if these writes become network sends with
> high overhead.
>
> This commit changes both send_ref callbacks to use buffered IO using
> stdio. The default buffer size is usually 4KB, but with musl libc it
> is only 1KB. So for consistent results we need to set a buffer size
> ourselves. During startup of upload-pack, we set the stdout buffer
> size to LARGE_PACKET_MAX, i.e. just shy of 64KB.
>
> To give an example of the impact: I set up a single-threaded loop that
> calls ls-remote (with HTTP and protocol v2) on a local GitLab
> instance, on a repository with 11K refs. When I switch from Git
> v2.32.0 to this patch, I see a 50% reduction in CPU time for Git, and
> 75% for Gitaly (GitLab's Git RPC service).
>
> So using buffered IO not only saves syscalls in upload-pack, it also
> saves time in things that consume upload-pack's output.
>
> Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
> ---

Nicely explained.

> +	/*
> +	 * Increase the stdio buffer size for stdout, for the benefit of ref
> +	 * advertisement writes. We are only allowed to call setvbuf(3) "after
> +	 * opening a stream and before any other operations have been performed
> +	 * on it", so let's call it before we have written anything to stdout.
> +	 */
> +	if (setvbuf(stdout, xmalloc(LARGE_PACKET_MAX), _IOFBF,
> +			LARGE_PACKET_MAX))
> +		die_errno("failed to grow stdout buffer");

Nice to see a comment on the tricky part.  I do not think we mind if
we rounded up the allocation size to the next power of two here, but
there probably won't be any measurable benefit for doing so.

> diff --git a/ls-refs.c b/ls-refs.c
> index 88f6c3f60d..83f2948fc3 100644
> --- a/ls-refs.c
> +++ b/ls-refs.c
> @@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
>  	}
>  
>  	strbuf_addch(&refline, '\n');
> -	packet_write(1, refline.buf, refline.len);
> +	packet_fwrite(stdout, refline.buf, refline.len);
>  
>  	strbuf_release(&refline);
>  	return 0;
> @@ -171,6 +171,9 @@ int ls_refs(struct repository *r, struct strvec *keys,
>  		strvec_push(&data.prefixes, "");
>  	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
>  				     send_ref, &data, 0);
> +	/* Call fflush because send_ref uses stdio. */
> +	if (fflush(stdout))
> +		die_errno(_("write failure on standard output"));

There is maybe_flush_or_die() helper that does a bit too much (I do
not think this code path needs to worry about GIT_FLUSH) but does
call check_pipe(errno) like packet_write_fmt() does upon seeing a
write failure.

>  	packet_flush(1);

I briefly wondered if all the operations on refline can be turned
into direct operations on stdout, but the answer obviously is no.
We still need to have a strbuf to assemble a single packet and
measure its final length before we can send it out to stdout.

> diff --git a/upload-pack.c b/upload-pack.c
> index 297b76fcb4..b592ac6cfb 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -58,6 +58,7 @@ enum allow_uor {
>   */
>  struct upload_pack_data {
>  	struct string_list symref;				/* v0 only */
> +	struct strbuf send_ref_buf;				/* v0 only */
>  	struct object_array want_obj;
>  	struct object_array have_obj;
>  	struct oid_array haves;					/* v2 only */
> @@ -126,6 +127,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
>  	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
>  	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
>  	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
> +	struct strbuf send_ref_buf = STRBUF_INIT;

IIUC, the most notable difference between this round and the
previous one is that now we are no longer buffering more than one
packet worth of data because we let the stdio to accumulate them.
I was a bit surprised that we still want to have a strbuf inside
this structure (which is there only because it wants to persist
during the whole conversation with the other side).

Ahh, sorry, scratch that.  I do remember the discussion/patch that
it was hurting to make calls to strbuf-init/strbuf-release once per
ref, and it is an easy way to have the structure here to reuse it.

OK.  This step looks quite sensible, other than the same "do we want
check_pipe() before dying upon fflush() failure?" we see in the last
hunk below.  I didn't mention this in the review of 1/2, but the new
fwrite_or_die() helper function may also have the same issue.

Thanks.

> @@ -1348,6 +1354,9 @@ void upload_pack(struct upload_pack_options *options)
>  		reset_timeout(data.timeout);
>  		head_ref_namespaced(send_ref, &data);
>  		for_each_namespaced_ref(send_ref, &data);
> +		/* Call fflush because send_ref uses stdio. */
> +		if (fflush(stdout))
> +			die_errno(_("write failure on standard output"));
>  		advertise_shallow_grafts(1);
>  		packet_flush(1);
>  	} else {

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/2] pkt-line: add packet_fwrite
  2021-08-26 10:06         ` [PATCH 1/2] pkt-line: add packet_fwrite Jacob Vosmaer
  2021-08-26 10:06           ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
@ 2021-08-26 16:33           ` Junio C Hamano
  1 sibling, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2021-08-26 16:33 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: peff, me, git, ps

Jacob Vosmaer <jacob@gitlab.com> writes:

> This adds a new function packet_fwrite which works like packet_write,
> except it writes to a stdio stream.
>
> Helped-by: Patrick Steinhardt <ps@pks.im>
> Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
> ---
>  cache.h        |  1 +
>  pkt-line.c     | 16 ++++++++++++++++
>  pkt-line.h     |  1 +
>  write-or-die.c |  6 ++++++
>  4 files changed, 24 insertions(+)


I wonder if our readers are all proficient enough not to require
some comment to warn them about the care that must be taken when
packet_fwrite() and packet_write() are mixed together (i.e. stdio
flushing, etc.), but other than that, this sounds quite
straight-forward.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 16:33             ` Junio C Hamano
@ 2021-08-26 20:21               ` Junio C Hamano
  2021-08-26 22:35               ` Taylor Blau
  2021-08-26 23:24               ` Jeff King
  2 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2021-08-26 20:21 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: peff, me, git, ps

Junio C Hamano <gitster@pobox.com> writes:

> IIUC, the most notable difference between this round and the
> previous one is that now we are no longer buffering more than one
> packet worth of data because we let the stdio to accumulate them.
> I was a bit surprised that we still want to have a strbuf inside
> this structure (which is there only because it wants to persist
> during the whole conversation with the other side).
>
> Ahh, sorry, scratch that.  I do remember the discussion/patch that
> it was hurting to make calls to strbuf-init/strbuf-release once per
> ref, and it is an easy way to have the structure here to reuse it.

But that means this majorly overlaps what Patrick is already doing
in his ps/ls-refs-strbuf-optim topic.  Perhaps these should be
rebased on top of that topic branch, I wonder?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 16:33             ` Junio C Hamano
  2021-08-26 20:21               ` Junio C Hamano
@ 2021-08-26 22:35               ` Taylor Blau
  2021-08-26 23:24               ` Jeff King
  2 siblings, 0 replies; 27+ messages in thread
From: Taylor Blau @ 2021-08-26 22:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jacob Vosmaer, peff, git, ps

On Thu, Aug 26, 2021 at 09:33:08AM -0700, Junio C Hamano wrote:
> > @@ -171,6 +171,9 @@ int ls_refs(struct repository *r, struct strvec *keys,
> >  		strvec_push(&data.prefixes, "");
> >  	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
> >  				     send_ref, &data, 0);
> > +	/* Call fflush because send_ref uses stdio. */
> > +	if (fflush(stdout))
> > +		die_errno(_("write failure on standard output"));
>
> There is maybe_flush_or_die() helper that does a bit too much (I do
> not think this code path needs to worry about GIT_FLUSH) but does
> call check_pipe(errno) like packet_write_fmt() does upon seeing a
> write failure.
>
> >  	packet_flush(1);

I was somewhat surprised to see the fflush call here and not in a
companion to the existing packet_flush (when working with stdio instead
of file descriptors, of course).

What Jacob wrote is not wrong, of course, but I think having
packet_fflush() or similar would be less error-prone.

> OK.  This step looks quite sensible, other than the same "do we want
> check_pipe() before dying upon fflush() failure?" we see in the last
> hunk below.  I didn't mention this in the review of 1/2, but the new
> fwrite_or_die() helper function may also have the same issue.

Agreed.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 16:33             ` Junio C Hamano
  2021-08-26 20:21               ` Junio C Hamano
  2021-08-26 22:35               ` Taylor Blau
@ 2021-08-26 23:24               ` Jeff King
  2021-08-27 16:15                 ` Junio C Hamano
  2 siblings, 1 reply; 27+ messages in thread
From: Jeff King @ 2021-08-26 23:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jacob Vosmaer, me, git, ps

On Thu, Aug 26, 2021 at 09:33:08AM -0700, Junio C Hamano wrote:

> > +	/*
> > +	 * Increase the stdio buffer size for stdout, for the benefit of ref
> > +	 * advertisement writes. We are only allowed to call setvbuf(3) "after
> > +	 * opening a stream and before any other operations have been performed
> > +	 * on it", so let's call it before we have written anything to stdout.
> > +	 */
> > +	if (setvbuf(stdout, xmalloc(LARGE_PACKET_MAX), _IOFBF,
> > +			LARGE_PACKET_MAX))
> > +		die_errno("failed to grow stdout buffer");
> 
> Nice to see a comment on the tricky part.  I do not think we mind if
> we rounded up the allocation size to the next power of two here, but
> there probably won't be any measurable benefit for doing so.

I'm a little negative on this part, actually. The commit message claims
it's for consistency across platforms. But I would argue that if your
libc buffer size is sub-optimal, then we shouldn't be sprinkling these
adjustments through the code. Either:

 - we should consider it a quality-of-implementation issue, and people
   using that libc should push back on their platform to change the
   default size; or

 - we should fix it consistently and transparently throughout Git by
   adjusting std{in,out,err} in common-main, and using fopen()/fdopen()
   wrappers to adjust to something more sensible

I also find the use of LARGE_PACKET_MAX weird here. Is that really the
optimal stdio buffer size? The whole point of this is coalescing _small_
packets into a single write() syscall. So how many small packets fit
into LARGE_PACKET_MAX is comparing apples and oranges.

> >  	packet_flush(1);
> 
> I briefly wondered if all the operations on refline can be turned
> into direct operations on stdout, but the answer obviously is no.
> We still need to have a strbuf to assemble a single packet and
> measure its final length before we can send it out to stdout.

I think we could push quite a bit more down into the pkt-line code by
making a few more operations FILE-aware. E.g., the patch below shows an
alternate version of patch 2, where we have more helpers and the changes
to the caller are much smaller.

The proliferation of packet_write_fmt() vs packet_fwrite_fmt() is a
little ugly, but:

  - I didn't bother to factor out commonality for this proof-of-concept.
    I think we could be sharing a bit more code in pkt-line.c, as noted.

  - This would all be much nicer if we refactored upload-pack to use
    the packet_writer interface. And then the caller sets up the "FILE
    vs descriptor" choice once when creating the writer, and all of the
    code it calls doesn't have to care either way.

diff --git a/ls-refs.c b/ls-refs.c
index 88f6c3f60d..e6a2dbd962 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	}
 
 	strbuf_addch(&refline, '\n');
-	packet_write(1, refline.buf, refline.len);
+	packet_fwrite(stdout, refline.buf, refline.len);
 
 	strbuf_release(&refline);
 	return 0;
@@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys,
 		strvec_push(&data.prefixes, "");
 	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
 				     send_ref, &data, 0);
-	packet_flush(1);
+	packet_fflush(stdout);
 	strvec_clear(&data.prefixes);
 	return 0;
 }
diff --git a/pkt-line.c b/pkt-line.c
index 244b326708..12cf09804b 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -92,6 +92,14 @@ void packet_flush(int fd)
 		die_errno(_("unable to write flush packet"));
 }
 
+void packet_fflush(FILE *fh)
+{
+	packet_trace("0000", 4, 1);
+	fwrite_or_die(fh, "0000", 4);
+	if (fflush(fh))
+		die_errno(_("unable to flush packet stream"));
+}
+
 void packet_delim(int fd)
 {
 	packet_trace("0001", 4, 1);
@@ -194,6 +202,21 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
 	return status;
 }
 
+/* this could be refactored to share code with packet_write_fmt_1 */
+void packet_fwrite_fmt(FILE *fh, const char *fmt, ...)
+{
+	static struct strbuf buf = STRBUF_INIT;
+	va_list args;
+
+	strbuf_reset(&buf);
+
+	va_start(args, fmt);
+	format_packet(&buf, "", fmt, args);
+	va_end(args);
+
+	fwrite_or_die(fh, buf.buf, buf.len);
+}
+
 static int do_packet_write(const int fd_out, const char *buf, size_t size,
 			   struct strbuf *err)
 {
diff --git a/pkt-line.h b/pkt-line.h
index c9cb5e1719..7899f8d8e1 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -21,9 +21,11 @@
  * side can't, we stay with pure read/write interfaces.
  */
 void packet_flush(int fd);
+void packet_fflush(FILE *fh);
 void packet_delim(int fd);
 void packet_response_end(int fd);
 void packet_write_fmt(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
+void packet_fwrite_fmt(FILE *fh, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
 void packet_buf_flush(struct strbuf *buf);
 void packet_buf_delim(struct strbuf *buf);
 void set_packet_header(char *buf, int size);
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb4..7c98c04d73 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 
 		format_symref_info(&symref_info, &data->symref);
 		format_session_id(&session_id, data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+		packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (data->allow_uor & ALLOW_TIP_SHA1) ?
@@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		strbuf_release(&symref_info);
 		strbuf_release(&session_id);
 	} else {
-		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
+		packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
 	if (!peel_iterated_oid(oid, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+		packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
 	return 0;
 }
 
@@ -1348,6 +1348,14 @@ void upload_pack(struct upload_pack_options *options)
 		reset_timeout(data.timeout);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		/*
+		 * sadly we still have to fflush manually here, because
+		 * advertise_shallow_grafts() uses the descriptor directly
+		 * before we could call packet_fflush(). But isn't that a good
+		 * reason to convert that function to stdio, too?
+		 */
+		if (fflush(stdout))
+			die_errno(_("write failure on standard output"));
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {

> > +	struct strbuf send_ref_buf = STRBUF_INIT;
> 
> IIUC, the most notable difference between this round and the
> previous one is that now we are no longer buffering more than one
> packet worth of data because we let the stdio to accumulate them.
> I was a bit surprised that we still want to have a strbuf inside
> this structure (which is there only because it wants to persist
> during the whole conversation with the other side).
> 
> Ahh, sorry, scratch that.  I do remember the discussion/patch that
> it was hurting to make calls to strbuf-init/strbuf-release once per
> ref, and it is an easy way to have the structure here to reuse it.

You'll note in my patch above that we format into a static strbuf to
avoid this cost. Perhaps a little ugly, but that's exactly what the
existing packet_write_fmt() code is doing!

If we want to get rid of that, I think there are two options:

  - for stdio handles, we don't ever need the fully-formatted packet. We
    can get the formatted size with a too-small snprintf(), and then
    directly fprintf() out the result (this doesn't work with
    descriptors, because we have to format the result to feed it to
    write()).

  - if this were all using packet_writer, it could easily learn to
    heap-allocate a single buffer for repeated use. That's roughly the
    same as what's happening in this series, but it would all be taken
    care of by the pkt-line code.

-Peff

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 10:06           ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
  2021-08-26 16:33             ` Junio C Hamano
@ 2021-08-26 23:32             ` Jeff King
  1 sibling, 0 replies; 27+ messages in thread
From: Jeff King @ 2021-08-26 23:32 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: me, git, gitster, ps

On Thu, Aug 26, 2021 at 12:06:48PM +0200, Jacob Vosmaer wrote:

> @@ -126,6 +127,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
>  	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
>  	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
>  	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
> +	struct strbuf send_ref_buf = STRBUF_INIT;
>  
>  	memset(data, 0, sizeof(*data));
>  	data->symref = symref;
> @@ -141,6 +143,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
>  	data->allow_filter_fallback = 1;
>  	data->tree_filter_max_depth = ULONG_MAX;
>  	packet_writer_init(&data->writer, 1);
> +	data->send_ref_buf = send_ref_buf;

This does a struct copy of the strbuf, which is usually a bad thing
(both copies think they own the pointer). It works here because the
original immediately goes out of scope. The usual thing would be to do
this instead:

  strbuf_init(&data->send_ref_buf, 0);

but I notice this whole function is somewhat odd that way (lots of
static initializers followed by struct assignment, rather than using
initialization functions).

I'm not sure if it's worth changing or not. Again, I don't think it's
doing the wrong thing, but just sort of unusual for our code base. A few
of the data structures in use here don't have _init() functions
(object_array and oid_array), but that probably means we ought to add
them.

-Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-26 23:24               ` Jeff King
@ 2021-08-27 16:15                 ` Junio C Hamano
  2021-08-31  9:34                   ` [PATCH v3 0/2] send_ref buffering Jacob Vosmaer
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2021-08-27 16:15 UTC (permalink / raw)
  To: Jeff King; +Cc: Jacob Vosmaer, me, git, ps

Jeff King <peff@peff.net> writes:

> On Thu, Aug 26, 2021 at 09:33:08AM -0700, Junio C Hamano wrote:
>
>> > +	/*
>> > +	 * Increase the stdio buffer size for stdout, for the benefit of ref
>> > +	 * advertisement writes. We are only allowed to call setvbuf(3) "after
>> > +	 * opening a stream and before any other operations have been performed
>> > +	 * on it", so let's call it before we have written anything to stdout.
>> > +	 */
>> > +	if (setvbuf(stdout, xmalloc(LARGE_PACKET_MAX), _IOFBF,
>> > +			LARGE_PACKET_MAX))
>> > +		die_errno("failed to grow stdout buffer");
>> 
>> Nice to see a comment on the tricky part.  I do not think we mind if
>> we rounded up the allocation size to the next power of two here, but
>> there probably won't be any measurable benefit for doing so.
>
> I'm a little negative on this part, actually. The commit message claims
> it's for consistency across platforms. But I would argue that if your
> libc buffer size is sub-optimal, then we shouldn't be sprinkling these
> adjustments through the code. Either:
>
>  - we should consider it a quality-of-implementation issue, and people
>    using that libc should push back on their platform to change the
>    default size; or
>
>  - we should fix it consistently and transparently throughout Git by
>    adjusting std{in,out,err} in common-main, and using fopen()/fdopen()
>    wrappers to adjust to something more sensible

I agree that it would be ideal if we didn't do setvbuf() at all, the
second best would be to do so at a central place.  My "Nice" applied
only to the comment ;-)

> I also find the use of LARGE_PACKET_MAX weird here. Is that really the
> optimal stdio buffer size? The whole point of this is coalescing _small_
> packets into a single write() syscall. So how many small packets fit
> into LARGE_PACKET_MAX is comparing apples and oranges.

The sizing is all my fault.  The original used 32k threashold and
implemented manual buffering by flushing whenever the accumulated
data exceeded the threashold, as opposed to "if we buffer this new
piece, it would exceed, so let's flush first what we got so far,
which is smaller than the threashold", which I found indefensible in
two ways.  The "flush _after_ we go over it" semantics looked iffy,
and 32k was totally out of thin air.  As LARGE_PACKET_MAX is the
hard upper limit of each packet that has been with us forever, it
was more defensible than 32k ;-)

But if we are using stdio, I agree that it is much better not to
worry about sizing at all by not doing setvbuf() and leaving it to
libc implementation.  They ought to know what works on their
platform the best.

Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v3 0/2] send_ref buffering
  2021-08-27 16:15                 ` Junio C Hamano
@ 2021-08-31  9:34                   ` Jacob Vosmaer
  2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
                                       ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-31  9:34 UTC (permalink / raw)
  To: gitster, peff, me, git, ps; +Cc: Jacob Vosmaer

Changes compared to v2:
- remove setvbuf call
- add packet_fwrite_fmt
- add packet_fflush

Non-changes:
- no ferror calls because those are for reads, not for writes

Thanks for the reactions everyone. I agree that packet_fwrite_fmt
simplifies the patch nicely. Jeff, I hope I have given you credit
in an appropriate way, let me know if you want me to change something
there.

Regarding setvbuf: I have found out that GNU coreutils has a utility
called stdbuf that lets you modify the stdout buffer size at runtime
using some LD_PRELOAD hack so we can use that in Gitaly. I don't
think this is the best outcome for users, we ought to give them a
good default instead of expecting them to invoke git-upload-pack
as 'stdbuf -o 64K git-upload-pack'. But I can't judge the impact
of globally changing the stdout buffer size for Git so I'll settle
for having to use stdbuf.

Jacob Vosmaer (2):
  pkt-line: add packet_fwrite and packet_fwrite_fmt
  upload-pack: use stdio in send_ref callbacks

 cache.h        |  2 ++
 ls-refs.c      |  4 +++-
 pkt-line.c     | 30 ++++++++++++++++++++++++++++++
 pkt-line.h     |  8 ++++++++
 upload-pack.c  |  8 +++++---
 write-or-die.c | 12 ++++++++++++
 6 files changed, 60 insertions(+), 4 deletions(-)

-- 
2.32.0


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v3 1/2] pkt-line: add stdio packet write functions
  2021-08-31  9:34                   ` [PATCH v3 0/2] send_ref buffering Jacob Vosmaer
@ 2021-08-31  9:34                     ` Jacob Vosmaer
  2021-08-31 10:37                       ` Jeff King
  2021-08-31 18:13                       ` Junio C Hamano
  2021-08-31  9:34                     ` [PATCH v3 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
  2021-08-31 10:25                     ` [PATCH v3 0/2] send_ref buffering Jeff King
  2 siblings, 2 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-31  9:34 UTC (permalink / raw)
  To: gitster, peff, me, git, ps; +Cc: Jacob Vosmaer

This adds three new functions to pkt-line.c: packet_fwrite,
packet_fwrite_fmt and packet_fflush. Besides writing a pktline flush
packet, packet_fflush also flushes the stdio buffer of the stream.

Helped-by: Patrick Steinhardt <ps@pks.im>
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 cache.h        |  2 ++
 pkt-line.c     | 37 +++++++++++++++++++++++++++++++++++++
 pkt-line.h     | 11 +++++++++++
 write-or-die.c | 12 ++++++++++++
 4 files changed, 62 insertions(+)

diff --git a/cache.h b/cache.h
index bd4869beee..dcf2454c3b 100644
--- a/cache.h
+++ b/cache.h
@@ -1736,6 +1736,8 @@ extern const char *git_mailmap_blob;
 void maybe_flush_or_die(FILE *, const char *);
 __attribute__((format (printf, 2, 3)))
 void fprintf_or_die(FILE *, const char *fmt, ...);
+void fwrite_or_die(FILE *f, const void *buf, size_t count);
+void fflush_or_die(FILE *f);
 
 #define COPY_READ_ERROR (-2)
 #define COPY_WRITE_ERROR (-3)
diff --git a/pkt-line.c b/pkt-line.c
index 9f63eae2e6..de4a94b437 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -243,6 +243,43 @@ void packet_write(int fd_out, const char *buf, size_t size)
 		die("%s", err.buf);
 }
 
+void packet_fwrite(FILE *f, const char *buf, size_t size)
+{
+	size_t packet_size;
+	char header[4];
+
+	if (size > LARGE_PACKET_DATA_MAX)
+		die(_("packet write failed - data exceeds max packet size"));
+
+	packet_trace(buf, size, 1);
+	packet_size = size + 4;
+
+	set_packet_header(header, packet_size);
+	fwrite_or_die(f, header, 4);
+	fwrite_or_die(f, buf, size);
+}
+
+void packet_fwrite_fmt(FILE *fh, const char *fmt, ...)
+{
+       static struct strbuf buf = STRBUF_INIT;
+       va_list args;
+
+       strbuf_reset(&buf);
+
+       va_start(args, fmt);
+       format_packet(&buf, "", fmt, args);
+       va_end(args);
+
+       fwrite_or_die(fh, buf.buf, buf.len);
+}
+
+void packet_fflush(FILE *f)
+{
+	packet_trace("0000", 4, 1);
+	fwrite_or_die(f, "0000", 4);
+	fflush_or_die(f);
+}
+
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
 {
 	va_list args;
diff --git a/pkt-line.h b/pkt-line.h
index 5af5f45687..0a38a68d15 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -35,6 +35,17 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format
 int write_packetized_from_fd_no_flush(int fd_in, int fd_out);
 int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out);
 
+/*
+ * Stdio versions of packet_write functions. When mixing these with fd
+ * based functions, take care to call fflush or packet_fflush before
+ * doing fd writes or closing the fd.
+ */
+void packet_fwrite(FILE *f, const char *buf, size_t size);
+void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
+
+/* packet_fflush writes a flush packet and flushes the stdio buffer of f */
+void packet_fflush(FILE *f);
+
 /*
  * Read a packetized line into the buffer, which must be at least size bytes
  * long. The return value specifies the number of bytes read into the buffer.
diff --git a/write-or-die.c b/write-or-die.c
index d33e68f6ab..7a2f84e2ee 100644
--- a/write-or-die.c
+++ b/write-or-die.c
@@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count)
 		die_errno("write error");
 	}
 }
+
+void fwrite_or_die(FILE *f, const void *buf, size_t count)
+{
+	if (fwrite(buf, count, 1, f) != 1)
+		die_errno("fwrite error");
+}
+
+void fflush_or_die(FILE *f)
+{
+	if (fflush(f))
+		die_errno("fflush error");
+}
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 2/2] upload-pack: use stdio in send_ref callbacks
  2021-08-31  9:34                   ` [PATCH v3 0/2] send_ref buffering Jacob Vosmaer
  2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
@ 2021-08-31  9:34                     ` Jacob Vosmaer
  2021-08-31 10:25                     ` [PATCH v3 0/2] send_ref buffering Jeff King
  2 siblings, 0 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-31  9:34 UTC (permalink / raw)
  To: gitster, peff, me, git, ps; +Cc: Jacob Vosmaer

In both protocol v0 and v2, upload-pack writes one pktline packet per
advertised ref to stdout. That means one or two write(2) syscalls per
ref. This is problematic if these writes become network sends with
high overhead.

This commit changes both send_ref callbacks to use buffered IO using
stdio.

To give an example of the impact: I set up a single-threaded loop that
calls ls-remote (with HTTP and protocol v2) on a local GitLab
instance, on a repository with 11K refs. When I switch from Git
v2.32.0 to this patch, I see a 40% reduction in CPU time for Git, and
65% for Gitaly (GitLab's Git RPC service).

So using buffered IO not only saves syscalls in upload-pack, it also
saves time in things that consume upload-pack's output.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 ls-refs.c     |  4 ++--
 upload-pack.c | 11 ++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/ls-refs.c b/ls-refs.c
index 88f6c3f60d..e6a2dbd962 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	}
 
 	strbuf_addch(&refline, '\n');
-	packet_write(1, refline.buf, refline.len);
+	packet_fwrite(stdout, refline.buf, refline.len);
 
 	strbuf_release(&refline);
 	return 0;
@@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys,
 		strvec_push(&data.prefixes, "");
 	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
 				     send_ref, &data, 0);
-	packet_flush(1);
+	packet_fflush(stdout);
 	strvec_clear(&data.prefixes);
 	return 0;
 }
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb4..2fdd73dfcb 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 
 		format_symref_info(&symref_info, &data->symref);
 		format_session_id(&session_id, data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+		packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (data->allow_uor & ALLOW_TIP_SHA1) ?
@@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		strbuf_release(&symref_info);
 		strbuf_release(&session_id);
 	} else {
-		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
+		packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
 	if (!peel_iterated_oid(oid, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+		packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
 	return 0;
 }
 
@@ -1348,6 +1348,11 @@ void upload_pack(struct upload_pack_options *options)
 		reset_timeout(data.timeout);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		/* 
+		 * fflush stdout before calling advertise_shallow_grafts because send_ref
+		 * uses stdio.
+		 */
+		fflush_or_die(stdout);
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 0/2] send_ref buffering
  2021-08-31  9:34                   ` [PATCH v3 0/2] send_ref buffering Jacob Vosmaer
  2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
  2021-08-31  9:34                     ` [PATCH v3 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
@ 2021-08-31 10:25                     ` Jeff King
  2021-08-31 13:08                       ` Jacob Vosmaer
  2 siblings, 1 reply; 27+ messages in thread
From: Jeff King @ 2021-08-31 10:25 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: gitster, me, git, ps

On Tue, Aug 31, 2021 at 11:34:42AM +0200, Jacob Vosmaer wrote:

> Thanks for the reactions everyone. I agree that packet_fwrite_fmt
> simplifies the patch nicely. Jeff, I hope I have given you credit
> in an appropriate way, let me know if you want me to change something
> there.

What you did looks fine.

Overall the series looks much nicer, and I don't have any real
complaints. I do think it would be nice to take the packet_writer
interface further (letting it replace the static buf, and use stdio
handles, and using it throughout upload-pack). But this is a strict
improvement, so we can do that other refactoring later.

> Regarding setvbuf: I have found out that GNU coreutils has a utility
> called stdbuf that lets you modify the stdout buffer size at runtime
> using some LD_PRELOAD hack so we can use that in Gitaly. I don't
> think this is the best outcome for users, we ought to give them a
> good default instead of expecting them to invoke git-upload-pack
> as 'stdbuf -o 64K git-upload-pack'. But I can't judge the impact
> of globally changing the stdout buffer size for Git so I'll settle
> for having to use stdbuf.

Does the 64k buffer actually improve things? Here are the timings I get
on a repo with ~1M refs (it's linux.git with one ref per commit). "git"
is current unbuffered version, and "git.compile" is master with your
patches on top:

  $ hyperfine -i 'git upload-pack .' 'git.compile upload-pack .' 'stdbuf -o 64K git.compile upload-pack .'
  Benchmark #1: git upload-pack .
    Time (mean ± σ):     948.6 ms ±   7.3 ms    [User: 840.8 ms, System: 107.8 ms]
    Range (min … max):   937.7 ms … 961.1 ms    10 runs
   
    Warning: Ignoring non-zero exit code.
   
  Benchmark #2: git.compile upload-pack .
    Time (mean ± σ):     867.3 ms ±   6.8 ms    [User: 821.5 ms, System: 45.7 ms]
    Range (min … max):   859.7 ms … 883.0 ms    10 runs
   
    Warning: Ignoring non-zero exit code.
   
  Benchmark #3: stdbuf -o 64K git.compile upload-pack .
    Time (mean ± σ):     861.1 ms ±   8.2 ms    [User: 815.5 ms, System: 45.6 ms]
    Range (min … max):   846.1 ms … 872.0 ms    10 runs
   
    Warning: Ignoring non-zero exit code.
   
  Summary
    'stdbuf -o 64K git.compile upload-pack .' ran
      1.01 ± 0.01 times faster than 'git.compile upload-pack .'
      1.10 ± 0.01 times faster than 'git upload-pack .'

This is on a glibc system, so the default buffers should be 4k. It
doesn't appear to make any difference (there's a slight improvement, but
well within the noise, and I had other runs where it did worse).

By the way, if you really want to speed things up, try this:

  $ hyperfine -i 'git.compile upload-pack .' 'GIT_REF_PARANOIA=1 git.compile upload-pack .'
  Benchmark #1: git.compile upload-pack .
    Time (mean ± σ):     855.4 ms ±   5.8 ms    [User: 803.4 ms, System: 52.0 ms]
    Range (min … max):   848.7 ms … 869.5 ms    10 runs
   
    Warning: Ignoring non-zero exit code.
   
  Benchmark #2: GIT_REF_PARANOIA=1 git.compile upload-pack .
    Time (mean ± σ):     394.4 ms ±   3.0 ms    [User: 357.9 ms, System: 36.4 ms]
    Range (min … max):   390.6 ms … 400.3 ms    10 runs
   
    Warning: Ignoring non-zero exit code.
   
  Summary
    'GIT_REF_PARANOIA=1 git.compile upload-pack .' ran
      2.17 ± 0.02 times faster than 'git.compile upload-pack .'

It's not exactly the intended use of that environment variable, but its
side effect is that we do not call has_object_file() on each ref tip.

-Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 1/2] pkt-line: add stdio packet write functions
  2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
@ 2021-08-31 10:37                       ` Jeff King
  2021-08-31 18:13                       ` Junio C Hamano
  1 sibling, 0 replies; 27+ messages in thread
From: Jeff King @ 2021-08-31 10:37 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: gitster, me, git, ps

On Tue, Aug 31, 2021 at 11:34:43AM +0200, Jacob Vosmaer wrote:

> +void fwrite_or_die(FILE *f, const void *buf, size_t count)
> +{
> +	if (fwrite(buf, count, 1, f) != 1)
> +		die_errno("fwrite error");
> +}

One small oddity I noticed. The definition of fwrite is
fwrite(ptr, size, nmemb, strea), where we write "nmemb" items of "size"
bytes each. I'd argue we're writing "count" single bytes, so it should
be:

  if (fwrite(buf, 1, count, f) != count)

This matters a lot for fread(), where any read shorter than "count"
(e.g., due to EOF) would return "0" rather than a partial result. But I
have a hard time imagining an implementation of fwrite() where the
distinction would matter. And grepping around, we seem to have both
forms in our code base already. So it's probably fine.

-Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 0/2] send_ref buffering
  2021-08-31 10:25                     ` [PATCH v3 0/2] send_ref buffering Jeff King
@ 2021-08-31 13:08                       ` Jacob Vosmaer
  2021-08-31 17:44                         ` Jacob Vosmaer
  2021-09-01  0:15                         ` Jeff King
  0 siblings, 2 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-31 13:08 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Taylor Blau, Git Mailing List, ps

On Tue, Aug 31, 2021 at 12:25 PM Jeff King <peff@peff.net> wrote:
> I do think it would be nice to take the packet_writer
> interface further (letting it replace the static buf, and use stdio
> handles, and using it throughout upload-pack).
I would like that too, for the sake of neatness and general
performance, but I don't have the time to take on a larger project
like that at the moment.

> Does the 64k buffer actually improve things? Here are the timings I get
> on a repo with ~1M refs (it's linux.git with one ref per commit).
Thanks for challenging that. I have a repeatable benchmark where it
matters, because each write syscall wakes up a chain of proxies
between the user and git-upload-pack. Larger buffers means fewer
wake-ups. But then I tried to simplify my example by having sshd as
the only intermediary, and in that experiment 64K buffers were not
better than 4K buffers. I think that goes to show that picking a good
buffer size is hard, and we'd be better off picking one specifically
for Gitaly (and GitLab) that works with our stack.

>   Summary
>     'GIT_REF_PARANOIA=1 git.compile upload-pack .' ran
>       2.17 ± 0.02 times faster than 'git.compile upload-pack .'
>
> It's not exactly the intended use of that environment variable, but its
> side effect is that we do not call has_object_file() on each ref tip.
That is nice to know, but as a user of Git I don't know when it is or
is not safe to skip those has_object_file() calls. If it's safe to
skip them then Git should skip them always. If not, then I will err on
the side of caution and keep the checks.

Jacob

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 0/2] send_ref buffering
  2021-08-31 13:08                       ` Jacob Vosmaer
@ 2021-08-31 17:44                         ` Jacob Vosmaer
  2021-09-01  0:15                         ` Jeff King
  1 sibling, 0 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-08-31 17:44 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Taylor Blau, Git Mailing List, ps

On Tue, Aug 31, 2021 at 3:08 PM Jacob Vosmaer <jacob@gitlab.com> wrote:
>
> On Tue, Aug 31, 2021 at 12:25 PM Jeff King <peff@peff.net> wrote:
> > I do think it would be nice to take the packet_writer
> > interface further (letting it replace the static buf, and use stdio
> > handles, and using it throughout upload-pack).
> I would like that too, for the sake of neatness and general
> performance, but I don't have the time to take on a larger project
> like that at the moment.
I gave solving the problem with packet_writer a couple of hours today.
The diff gets too big, and I have too little confidence I'm not
introducing deadlocks. This really is more work than I can chew off
right now. Sorry!

Jacob

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 1/2] pkt-line: add stdio packet write functions
  2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
  2021-08-31 10:37                       ` Jeff King
@ 2021-08-31 18:13                       ` Junio C Hamano
  2021-09-01 12:54                         ` [PATCH v4 0/2] send_ref buffering Jacob Vosmaer
  1 sibling, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2021-08-31 18:13 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: peff, me, git, ps

Jacob Vosmaer <jacob@gitlab.com> writes:

> +/*
> + * Stdio versions of packet_write functions. When mixing these with fd
> + * based functions, take care to call fflush or packet_fflush before
> + * doing fd writes or closing the fd.
> + */

You may have wanted to say that fflush() is not needed immediately
after packet_fflush() (because the latter calls fflush()), but I
find the "or packet_fflush" in the above comment highly misleading.

It's not like you can randomly call packet_fflush() where you would
call fflush(), as the former will insert a FLUSH packet to the
output stream.  "... take care to call fflush(3) before doihng fd
writes or closing the fd" would be more appropriate.  After all, if
you make fflush() even after calling packet_fflush(), nothing will
break.

> +void packet_fwrite(FILE *f, const char *buf, size_t size);
> +void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> +
> +/* packet_fflush writes a flush packet and flushes the stdio buffer of f */
> +void packet_fflush(FILE *f);
> +
>  /*
>   * Read a packetized line into the buffer, which must be at least size bytes
>   * long. The return value specifies the number of bytes read into the buffer.
> diff --git a/write-or-die.c b/write-or-die.c
> index d33e68f6ab..7a2f84e2ee 100644
> --- a/write-or-die.c
> +++ b/write-or-die.c
> @@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count)
>  		die_errno("write error");
>  	}
>  }
> +
> +void fwrite_or_die(FILE *f, const void *buf, size_t count)
> +{
> +	if (fwrite(buf, count, 1, f) != 1)

This counts the size of the buffer the wrong way.

fwrite() gives the size of each element to be written out first, and
then number of elements that are written, which is the same as
fread() but unfortunately the other way around from calloc() where
count comes first X-<.

Other than that, nicely done.

Thanks.

> +		die_errno("fwrite error");
> +}
> +
> +void fflush_or_die(FILE *f)
> +{
> +	if (fflush(f))
> +		die_errno("fflush error");
> +}

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 0/2] send_ref buffering
  2021-08-31 13:08                       ` Jacob Vosmaer
  2021-08-31 17:44                         ` Jacob Vosmaer
@ 2021-09-01  0:15                         ` Jeff King
  1 sibling, 0 replies; 27+ messages in thread
From: Jeff King @ 2021-09-01  0:15 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: Junio C Hamano, Taylor Blau, Git Mailing List, ps

On Tue, Aug 31, 2021 at 03:08:25PM +0200, Jacob Vosmaer wrote:

> > Does the 64k buffer actually improve things? Here are the timings I get
> > on a repo with ~1M refs (it's linux.git with one ref per commit).
> Thanks for challenging that. I have a repeatable benchmark where it
> matters, because each write syscall wakes up a chain of proxies
> between the user and git-upload-pack. Larger buffers means fewer
> wake-ups. But then I tried to simplify my example by having sshd as
> the only intermediary, and in that experiment 64K buffers were not
> better than 4K buffers. I think that goes to show that picking a good
> buffer size is hard, and we'd be better off picking one specifically
> for Gitaly (and GitLab) that works with our stack.

Thanks for explaining. Yeah, I think leaving it as a custom thing makes
sense, then.

> >   Summary
> >     'GIT_REF_PARANOIA=1 git.compile upload-pack .' ran
> >       2.17 ± 0.02 times faster than 'git.compile upload-pack .'
> >
> > It's not exactly the intended use of that environment variable, but its
> > side effect is that we do not call has_object_file() on each ref tip.
> That is nice to know, but as a user of Git I don't know when it is or
> is not safe to skip those has_object_file() calls. If it's safe to
> skip them then Git should skip them always. If not, then I will err on
> the side of caution and keep the checks.

Yeah, the use of REF_PARANOIA there was just an easy illustration. IMHO
it would be reasonable for upload-pack to just assume that the refs
files are valid. If they aren't, then either:

  - the receiver is uninterested in those objects or already has them,
    so won't ask for them. They're happy either way.

  - the receiver _will_ ask for them, at which point we'd barf later in
    pack-objects when we try to access them.

There are some thoughts in this old thread which introduce
GIT_REF_PARANOIA:

  https://lore.kernel.org/git/20150317073730.GA25267@peff.net/

I think I was mostly too cowardly to make the change back then. And I
hadn't considered that the performance implications would be all that
big (though I will say this million-ref repo is at the edge of what I'd
consider reasonable).

> > > I do think it would be nice to take the packet_writer
> > > interface further (letting it replace the static buf, and use stdio
> > > handles, and using it throughout upload-pack).
> > I would like that too, for the sake of neatness and general
> > performance, but I don't have the time to take on a larger project
> > like that at the moment.
> I gave solving the problem with packet_writer a couple of hours today.
> The diff gets too big, and I have too little confidence I'm not
> introducing deadlocks. This really is more work than I can chew off
> right now. Sorry!

Thanks for taking a look! I think we can proceed with your series for
now, then.

-Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v4 0/2] send_ref buffering
  2021-08-31 18:13                       ` Junio C Hamano
@ 2021-09-01 12:54                         ` Jacob Vosmaer
  2021-09-01 12:54                           ` [PATCH v4 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
                                             ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-09-01 12:54 UTC (permalink / raw)
  To: gitster, peff, me, git, ps; +Cc: Jacob Vosmaer

Changes:
- pkt-line.h comment
- fwrite argument order

Jacob Vosmaer (2):
  pkt-line: add stdio packet write functions
  upload-pack: use stdio in send_ref callbacks

 cache.h        |  2 ++
 ls-refs.c      |  4 ++--
 pkt-line.c     | 37 +++++++++++++++++++++++++++++++++++++
 pkt-line.h     | 11 +++++++++++
 upload-pack.c  | 11 ++++++++---
 write-or-die.c | 12 ++++++++++++
 6 files changed, 72 insertions(+), 5 deletions(-)

-- 
2.32.0


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v4 1/2] pkt-line: add stdio packet write functions
  2021-09-01 12:54                         ` [PATCH v4 0/2] send_ref buffering Jacob Vosmaer
@ 2021-09-01 12:54                           ` Jacob Vosmaer
  2021-09-01 12:54                           ` [PATCH v4 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
  2021-09-02  9:18                           ` [PATCH v4 0/2] send_ref buffering Jeff King
  2 siblings, 0 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-09-01 12:54 UTC (permalink / raw)
  To: gitster, peff, me, git, ps; +Cc: Jacob Vosmaer

This adds three new functions to pkt-line.c: packet_fwrite,
packet_fwrite_fmt and packet_fflush. Besides writing a pktline flush
packet, packet_fflush also flushes the stdio buffer of the stream.

Helped-by: Patrick Steinhardt <ps@pks.im>
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 cache.h        |  2 ++
 pkt-line.c     | 37 +++++++++++++++++++++++++++++++++++++
 pkt-line.h     | 11 +++++++++++
 write-or-die.c | 12 ++++++++++++
 4 files changed, 62 insertions(+)

diff --git a/cache.h b/cache.h
index bd4869beee..dcf2454c3b 100644
--- a/cache.h
+++ b/cache.h
@@ -1736,6 +1736,8 @@ extern const char *git_mailmap_blob;
 void maybe_flush_or_die(FILE *, const char *);
 __attribute__((format (printf, 2, 3)))
 void fprintf_or_die(FILE *, const char *fmt, ...);
+void fwrite_or_die(FILE *f, const void *buf, size_t count);
+void fflush_or_die(FILE *f);
 
 #define COPY_READ_ERROR (-2)
 #define COPY_WRITE_ERROR (-3)
diff --git a/pkt-line.c b/pkt-line.c
index 9f63eae2e6..de4a94b437 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -243,6 +243,43 @@ void packet_write(int fd_out, const char *buf, size_t size)
 		die("%s", err.buf);
 }
 
+void packet_fwrite(FILE *f, const char *buf, size_t size)
+{
+	size_t packet_size;
+	char header[4];
+
+	if (size > LARGE_PACKET_DATA_MAX)
+		die(_("packet write failed - data exceeds max packet size"));
+
+	packet_trace(buf, size, 1);
+	packet_size = size + 4;
+
+	set_packet_header(header, packet_size);
+	fwrite_or_die(f, header, 4);
+	fwrite_or_die(f, buf, size);
+}
+
+void packet_fwrite_fmt(FILE *fh, const char *fmt, ...)
+{
+       static struct strbuf buf = STRBUF_INIT;
+       va_list args;
+
+       strbuf_reset(&buf);
+
+       va_start(args, fmt);
+       format_packet(&buf, "", fmt, args);
+       va_end(args);
+
+       fwrite_or_die(fh, buf.buf, buf.len);
+}
+
+void packet_fflush(FILE *f)
+{
+	packet_trace("0000", 4, 1);
+	fwrite_or_die(f, "0000", 4);
+	fflush_or_die(f);
+}
+
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
 {
 	va_list args;
diff --git a/pkt-line.h b/pkt-line.h
index 5af5f45687..82b95e4bdd 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -35,6 +35,17 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format
 int write_packetized_from_fd_no_flush(int fd_in, int fd_out);
 int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out);
 
+/*
+ * Stdio versions of packet_write functions. When mixing these with fd
+ * based functions, take care to call fflush(3) before doing fd writes or
+ * closing the fd.
+ */
+void packet_fwrite(FILE *f, const char *buf, size_t size);
+void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
+
+/* packet_fflush writes a flush packet and flushes the stdio buffer of f */
+void packet_fflush(FILE *f);
+
 /*
  * Read a packetized line into the buffer, which must be at least size bytes
  * long. The return value specifies the number of bytes read into the buffer.
diff --git a/write-or-die.c b/write-or-die.c
index d33e68f6ab..0b1ec8190b 100644
--- a/write-or-die.c
+++ b/write-or-die.c
@@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count)
 		die_errno("write error");
 	}
 }
+
+void fwrite_or_die(FILE *f, const void *buf, size_t count)
+{
+	if (fwrite(buf, 1, count, f) != count)
+		die_errno("fwrite error");
+}
+
+void fflush_or_die(FILE *f)
+{
+	if (fflush(f))
+		die_errno("fflush error");
+}
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v4 2/2] upload-pack: use stdio in send_ref callbacks
  2021-09-01 12:54                         ` [PATCH v4 0/2] send_ref buffering Jacob Vosmaer
  2021-09-01 12:54                           ` [PATCH v4 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
@ 2021-09-01 12:54                           ` Jacob Vosmaer
  2021-09-02  9:18                           ` [PATCH v4 0/2] send_ref buffering Jeff King
  2 siblings, 0 replies; 27+ messages in thread
From: Jacob Vosmaer @ 2021-09-01 12:54 UTC (permalink / raw)
  To: gitster, peff, me, git, ps; +Cc: Jacob Vosmaer

In both protocol v0 and v2, upload-pack writes one pktline packet per
advertised ref to stdout. That means one or two write(2) syscalls per
ref. This is problematic if these writes become network sends with
high overhead.

This commit changes both send_ref callbacks to use buffered IO using
stdio.

To give an example of the impact: I set up a single-threaded loop that
calls ls-remote (with HTTP and protocol v2) on a local GitLab
instance, on a repository with 11K refs. When I switch from Git
v2.32.0 to this patch, I see a 40% reduction in CPU time for Git, and
65% for Gitaly (GitLab's Git RPC service).

So using buffered IO not only saves syscalls in upload-pack, it also
saves time in things that consume upload-pack's output.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jacob Vosmaer <jacob@gitlab.com>
---
 ls-refs.c     |  4 ++--
 upload-pack.c | 11 ++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/ls-refs.c b/ls-refs.c
index 88f6c3f60d..e6a2dbd962 100644
--- a/ls-refs.c
+++ b/ls-refs.c
@@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	}
 
 	strbuf_addch(&refline, '\n');
-	packet_write(1, refline.buf, refline.len);
+	packet_fwrite(stdout, refline.buf, refline.len);
 
 	strbuf_release(&refline);
 	return 0;
@@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys,
 		strvec_push(&data.prefixes, "");
 	for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v,
 				     send_ref, &data, 0);
-	packet_flush(1);
+	packet_fflush(stdout);
 	strvec_clear(&data.prefixes);
 	return 0;
 }
diff --git a/upload-pack.c b/upload-pack.c
index 297b76fcb4..2fdd73dfcb 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 
 		format_symref_info(&symref_info, &data->symref);
 		format_session_id(&session_id, data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
+		packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (data->allow_uor & ALLOW_TIP_SHA1) ?
@@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		strbuf_release(&symref_info);
 		strbuf_release(&session_id);
 	} else {
-		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
+		packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
 	if (!peel_iterated_oid(oid, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+		packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
 	return 0;
 }
 
@@ -1348,6 +1348,11 @@ void upload_pack(struct upload_pack_options *options)
 		reset_timeout(data.timeout);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		/* 
+		 * fflush stdout before calling advertise_shallow_grafts because send_ref
+		 * uses stdio.
+		 */
+		fflush_or_die(stdout);
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v4 0/2] send_ref buffering
  2021-09-01 12:54                         ` [PATCH v4 0/2] send_ref buffering Jacob Vosmaer
  2021-09-01 12:54                           ` [PATCH v4 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
  2021-09-01 12:54                           ` [PATCH v4 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
@ 2021-09-02  9:18                           ` Jeff King
  2 siblings, 0 replies; 27+ messages in thread
From: Jeff King @ 2021-09-02  9:18 UTC (permalink / raw)
  To: Jacob Vosmaer; +Cc: gitster, me, git, ps

On Wed, Sep 01, 2021 at 02:54:40PM +0200, Jacob Vosmaer wrote:

> Changes:
> - pkt-line.h comment
> - fwrite argument order
> 
> Jacob Vosmaer (2):
>   pkt-line: add stdio packet write functions
>   upload-pack: use stdio in send_ref callbacks

Thanks, this version looks good to me.

-Peff

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2021-09-02  9:19 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-24 14:02 [PATCH 1/1] upload-pack: buffer ref advertisement writes Jacob Vosmaer
2021-08-24 21:07 ` Taylor Blau
2021-08-24 21:42   ` Junio C Hamano
2021-08-25  0:44     ` Jeff King
2021-08-26 10:02       ` Jacob Vosmaer
2021-08-26 10:06         ` [PATCH 1/2] pkt-line: add packet_fwrite Jacob Vosmaer
2021-08-26 10:06           ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
2021-08-26 16:33             ` Junio C Hamano
2021-08-26 20:21               ` Junio C Hamano
2021-08-26 22:35               ` Taylor Blau
2021-08-26 23:24               ` Jeff King
2021-08-27 16:15                 ` Junio C Hamano
2021-08-31  9:34                   ` [PATCH v3 0/2] send_ref buffering Jacob Vosmaer
2021-08-31  9:34                     ` [PATCH v3 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
2021-08-31 10:37                       ` Jeff King
2021-08-31 18:13                       ` Junio C Hamano
2021-09-01 12:54                         ` [PATCH v4 0/2] send_ref buffering Jacob Vosmaer
2021-09-01 12:54                           ` [PATCH v4 1/2] pkt-line: add stdio packet write functions Jacob Vosmaer
2021-09-01 12:54                           ` [PATCH v4 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
2021-09-02  9:18                           ` [PATCH v4 0/2] send_ref buffering Jeff King
2021-08-31  9:34                     ` [PATCH v3 2/2] upload-pack: use stdio in send_ref callbacks Jacob Vosmaer
2021-08-31 10:25                     ` [PATCH v3 0/2] send_ref buffering Jeff King
2021-08-31 13:08                       ` Jacob Vosmaer
2021-08-31 17:44                         ` Jacob Vosmaer
2021-09-01  0:15                         ` Jeff King
2021-08-26 23:32             ` [PATCH 2/2] upload-pack: use stdio in send_ref callbacks Jeff King
2021-08-26 16:33           ` [PATCH 1/2] pkt-line: add packet_fwrite Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).