From: Junio C Hamano <gitster@pobox.com>
To: Jeff King <peff@peff.net>
Cc: Konstantin Ryabitsev <konstantin@linuxfoundation.org>,
git@vger.kernel.org
Subject: Re: [PATCH 2/2] http-backend: spool ref negotiation requests to buffer
Date: Fri, 15 May 2015 11:22:42 -0700 [thread overview]
Message-ID: <xmqqegmhhf9p.fsf@gitster.dls.corp.google.com> (raw)
In-Reply-To: <20150515063339.GB30890@peff.net> (Jeff King's message of "Fri, 15 May 2015 02:33:39 -0400")
Jeff King <peff@peff.net> writes:
> The solution is fairly straight-forward: we read the request
> body into an in-memory buffer in http-backend, freeing up
> Apache, and then feed the data ourselves to upload-pack. But
> there are a few important things to note:
>
> 1. We limit in-memory buffer to no larger than 1 megabyte
> to prevent an obvious denial-of-service attack. This
> is a new hard limit on requests, but it's likely that
> requests of this size didn't work before at all (i.e.,
> they would have run into the pipe buffer thing and
> deadlocked).
>
> 2. We must take care only to buffer when we have to. For
> pushes, the incoming packfile may be of arbitrary
> size, and we should connect the input directly to
> receive-pack. There's no deadlock problem here, though,
> because we do not produce any output until the whole
> packfile has been read.
>
> For upload-pack's initial ref advertisement, we
> similarly do not need to buffer. Even though we may
> generate a lot of output, there is no request body at
> all (i.e., it is a GET, not a POST).
Thanks.
One unrelated thing I noticed was that three codepaths independently
have close(0) in run_service() now, and made me follow the two
helper functions to see they both do the close at the end. It might
have made the flow easier to follow if run_service() were
...
close(1);
if (gzip)
inflate();
else if (buffer)
copy();
close(0);
...
But that is minor.
Also, is it worth allocating small and then growing up to the maximum?
I think this only relays one request at a time anyway, and I suspect
that a single 1MB allocation at the first call kept getting reused
may be sufficient (and much simpler).
> [1] http://article.gmane.org/gmane.comp.version-control.git/269020
>
> Signed-off-by: Jeff King <peff@peff.net>
> ---
> http-backend.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 77 insertions(+), 11 deletions(-)
>
> diff --git a/http-backend.c b/http-backend.c
> index 3ad82a8..7cc2e8c 100644
> --- a/http-backend.c
> +++ b/http-backend.c
> @@ -19,12 +19,13 @@ static struct string_list *query_params;
> struct rpc_service {
> const char *name;
> const char *config_name;
> + unsigned buffer_input : 1;
> signed enabled : 2;
> };
>
> static struct rpc_service rpc_service[] = {
> - { "upload-pack", "uploadpack", 1 },
> - { "receive-pack", "receivepack", -1 },
> + { "upload-pack", "uploadpack", 1, 1 },
> + { "receive-pack", "receivepack", 0, -1 },
> };
>
> static struct string_list *get_parameters(void)
> @@ -266,9 +267,49 @@ static struct rpc_service *select_service(const char *name)
> return svc;
> }
>
> -static void inflate_request(const char *prog_name, int out)
> +/*
> + * This is basically strbuf_read(), except that if we
> + * hit MAX_REQUEST_BUFFER we die (we'd rather reject a
> + * maliciously large request than chew up infinite memory).
> + */
> +#define MAX_REQUEST_BUFFER (1024 * 1024)
> +static ssize_t read_request(int fd, unsigned char **out)
> +{
> + size_t len = 0, alloc = 8192;
> + unsigned char *buf = xmalloc(alloc);
> +
> + while (1) {
> + ssize_t cnt;
> +
> + cnt = read_in_full(fd, buf + len, alloc - len);
> + if (cnt < 0) {
> + free(buf);
> + return -1;
> + }
> +
> + /* partial read from read_in_full means we hit EOF */
> + len += cnt;
> + if (len < alloc) {
> + *out = buf;
> + return len;
> + }
> +
> + /* otherwise, grow and try again (if we can) */
> + if (alloc == MAX_REQUEST_BUFFER)
> + die("request was larger than our maximum size (%lu)",
> + (unsigned long)(MAX_REQUEST_BUFFER-1));
> +
> + alloc = alloc_nr(alloc);
> + if (alloc > MAX_REQUEST_BUFFER)
> + alloc = MAX_REQUEST_BUFFER;
> + REALLOC_ARRAY(buf, alloc);
> + }
> +}
> +
> +static void inflate_request(const char *prog_name, int out, int buffer_input)
> {
> git_zstream stream;
> + unsigned char *full_request = NULL;
> unsigned char in_buf[8192];
> unsigned char out_buf[8192];
> unsigned long cnt = 0;
> @@ -277,11 +318,21 @@ static void inflate_request(const char *prog_name, int out)
> git_inflate_init_gzip_only(&stream);
>
> while (1) {
> - ssize_t n = xread(0, in_buf, sizeof(in_buf));
> + ssize_t n;
> +
> + if (buffer_input) {
> + if (full_request)
> + n = 0; /* nothing left to read */
> + else
> + n = read_request(0, &full_request);
> + stream.next_in = full_request;
> + } else {
> + n = xread(0, in_buf, sizeof(in_buf));
> + stream.next_in = in_buf;
> + }
> +
> if (n <= 0)
> die("request ended in the middle of the gzip stream");
> -
> - stream.next_in = in_buf;
> stream.avail_in = n;
>
> while (0 < stream.avail_in) {
> @@ -307,9 +358,22 @@ static void inflate_request(const char *prog_name, int out)
> done:
> git_inflate_end(&stream);
> close(out);
> + free(full_request);
> }
>
> -static void run_service(const char **argv)
> +static void copy_request(const char *prog_name, int out)
> +{
> + unsigned char *buf;
> + ssize_t n = read_request(0, &buf);
> + if (n < 0)
> + die_errno("error reading request body");
> + if (write_in_full(out, buf, n) != n)
> + die("%s aborted reading request", prog_name);
> + close(out);
> + free(buf);
> +}
> +
> +static void run_service(const char **argv, int buffer_input)
> {
> const char *encoding = getenv("HTTP_CONTENT_ENCODING");
> const char *user = getenv("REMOTE_USER");
> @@ -334,7 +398,7 @@ static void run_service(const char **argv)
> "GIT_COMMITTER_EMAIL=%s@http.%s", user, host);
>
> cld.argv = argv;
> - if (gzipped_request)
> + if (buffer_input || gzipped_request)
> cld.in = -1;
> cld.git_cmd = 1;
> if (start_command(&cld))
> @@ -342,7 +406,9 @@ static void run_service(const char **argv)
>
> close(1);
> if (gzipped_request)
> - inflate_request(argv[0], cld.in);
> + inflate_request(argv[0], cld.in, buffer_input);
> + else if (buffer_input)
> + copy_request(argv[0], cld.in);
> else
> close(0);
>
> @@ -392,7 +458,7 @@ static void get_info_refs(char *arg)
> packet_flush(1);
>
> argv[0] = svc->name;
> - run_service(argv);
> + run_service(argv, 0);
>
> } else {
> select_getanyfile();
> @@ -496,7 +562,7 @@ static void service_rpc(char *service_name)
> end_headers();
>
> argv[0] = svc->name;
> - run_service(argv);
> + run_service(argv, svc->buffer_input);
> strbuf_release(&buf);
> }
next prev parent reply other threads:[~2015-05-15 18:22 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-13 21:04 Clone hangs when done over http with --reference Konstantin Ryabitsev
2015-05-14 0:47 ` Jeff King
2015-05-14 1:02 ` [PATCH] http-backend: fix die recursion with custom handler Jeff King
2015-05-15 6:20 ` Jeff King
2015-05-14 17:08 ` Clone hangs when done over http with --reference Konstantin Ryabitsev
2015-05-14 19:52 ` Jeff King
2015-05-15 6:29 ` [PATCH 0/2] fix http deadlock on giant ref negotiations Jeff King
2015-05-15 6:29 ` [PATCH 1/2] http-backend: fix die recursion with custom handler Jeff King
2015-05-15 6:33 ` [PATCH 2/2] http-backend: spool ref negotiation requests to buffer Jeff King
2015-05-15 18:22 ` Junio C Hamano [this message]
2015-05-15 18:28 ` Konstantin Ryabitsev
2015-05-20 7:35 ` [PATCH v2 0/3] fix http deadlock on giant ref negotiations Jeff King
2015-05-20 7:36 ` [PATCH v2 1/3] http-backend: fix die recursion with custom handler Jeff King
2015-05-20 7:36 ` [PATCH v2 2/3] t5551: factor out tag creation Jeff King
2015-05-20 7:37 ` [PATCH v2 3/3] http-backend: spool ref negotiation requests to buffer Jeff King
2015-05-26 2:07 ` Konstantin Ryabitsev
2015-05-26 2:24 ` Jeff King
2015-05-26 3:43 ` Junio C Hamano
2015-05-15 19:16 ` [PATCH 2/2] " Jeff King
2015-05-15 7:41 ` [PATCH 0/2] fix http deadlock on giant ref negotiations Dennis Kaarsemaker
2015-05-15 8:38 ` Jeff King
2015-05-15 8:44 ` Dennis Kaarsemaker
2015-05-15 8:53 ` Jeff King
2015-05-15 9:11 ` Dennis Kaarsemaker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=xmqqegmhhf9p.fsf@gitster.dls.corp.google.com \
--to=gitster@pobox.com \
--cc=git@vger.kernel.org \
--cc=konstantin@linuxfoundation.org \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).