git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH v2] object-info: support for retrieving object info
@ 2021-04-20 23:38 Bruno Albuquerque
  2021-04-21  0:42 ` Junio C Hamano
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Bruno Albuquerque @ 2021-04-20 23:38 UTC (permalink / raw)
  To: git; +Cc: Bruno Albuquerque, gitster

Sometimes it is useful to get information of an object without having to
download it completely.

Add the "object-info" capability that lets the client ask for
object-related information with their full hexadecimal object names.

Only sizes are returned for now.

Signed-off-by: Bruno Albuquerque <bga@google.com>
---

This version is a small change with just style fixes.

 Documentation/technical/protocol-v2.txt |  31 +++++++
 Makefile                                |   1 +
 protocol-caps.c                         | 113 ++++++++++++++++++++++++
 protocol-caps.h                         |  10 +++
 serve.c                                 |   2 +
 t/t5701-git-serve.sh                    |  26 ++++++
 6 files changed, 183 insertions(+)
 create mode 100644 protocol-caps.c
 create mode 100644 protocol-caps.h

diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt
index a7c806a73e..f4ed141774 100644
--- a/Documentation/technical/protocol-v2.txt
+++ b/Documentation/technical/protocol-v2.txt
@@ -514,3 +514,34 @@ packet-line, and must not contain non-printable or whitespace characters. The
 current implementation uses trace2 session IDs (see
 link:api-trace2.html[api-trace2] for details), but this may change and users of
 the session ID should not rely on this fact.
+
+object-info
+~~~~~~~~~~~
+
+`object-info` is the command to retrieve information about one or more objects.
+Its main purpose is to allow a client to make decisions based on this
+information without having to fully fetch objects. Object size is the only
+information that is currently supported.
+
+An `object-info` request takes the following arguments:
+
+	size
+	Requests size information to be returned for each listed object id.
+
+	oid <oid>
+	Indicates to the server an object which the client wants to obtain
+	information for.
+
+The response of `object-info` is a list of the the requested object ids
+and associated requested information, each separated by a single space.
+
+	output = info flush-pkt
+
+	info = PKT-LINE(attrs) LF)
+		*PKT-LINE(obj-info LF)
+
+	attrs = attr | attrs SP attrs
+
+	attr = "size"
+
+	obj-info = obj-id SP obj-size
diff --git a/Makefile b/Makefile
index 21c0bf1667..3225e37b63 100644
--- a/Makefile
+++ b/Makefile
@@ -961,6 +961,7 @@ LIB_OBJS += progress.o
 LIB_OBJS += promisor-remote.o
 LIB_OBJS += prompt.o
 LIB_OBJS += protocol.o
+LIB_OBJS += protocol-caps.o
 LIB_OBJS += prune-packed.o
 LIB_OBJS += quote.o
 LIB_OBJS += range-diff.o
diff --git a/protocol-caps.c b/protocol-caps.c
new file mode 100644
index 0000000000..13a9e63a04
--- /dev/null
+++ b/protocol-caps.c
@@ -0,0 +1,113 @@
+#include "git-compat-util.h"
+#include "protocol-caps.h"
+#include "gettext.h"
+#include "pkt-line.h"
+#include "strvec.h"
+#include "hash.h"
+#include "object.h"
+#include "object-store.h"
+#include "string-list.h"
+#include "strbuf.h"
+
+struct requested_info {
+	unsigned size : 1;
+};
+
+/*
+ * Parses oids from the given line and collects them in the given
+ * oid_str_list. Returns 1 if parsing was successful and 0 otherwise.
+ */
+static int parse_oid(const char *line, struct string_list *oid_str_list)
+{
+	const char *arg;
+
+	if (!skip_prefix(line, "oid ", &arg))
+		return 0;
+
+	string_list_append(oid_str_list, arg);
+
+	return 1;
+}
+
+/*
+ * Validates and send requested info back to the client. Any errors detected
+ * are returned as they are detected.
+ */
+static void send_info(struct repository *r, struct packet_writer *writer,
+		      struct string_list *oid_str_list,
+		      struct requested_info *info)
+{
+	struct string_list_item *item;
+	struct strbuf send_buffer = STRBUF_INIT;
+
+	if (!oid_str_list->nr)
+		return;
+
+	if (info->size)
+		packet_writer_write(writer, "size");
+
+	for_each_string_list_item (item, oid_str_list) {
+		const char *oid_str = item->string;
+		struct object_id oid;
+		unsigned long object_size;
+
+		if (get_oid_hex(oid_str, &oid) < 0) {
+			packet_writer_error(
+				writer,
+				"object-info: protocol error, expected to get oid, not '%s'",
+				oid_str);
+			continue;
+		}
+
+		strbuf_addstr(&send_buffer, oid_str);
+
+		if (info->size) {
+			if (oid_object_info(r, &oid, &object_size) < 0) {
+				strbuf_addstr(&send_buffer, " ");
+			} else {
+				strbuf_addf(&send_buffer, " %lu", object_size);
+			}
+		}
+
+		packet_writer_write(writer, "%s",
+				    strbuf_detach(&send_buffer, NULL));
+	}
+}
+
+int cap_object_info(struct repository *r, struct strvec *keys,
+		    struct packet_reader *request)
+{
+	struct requested_info info;
+	struct packet_writer writer;
+	struct string_list oid_str_list = STRING_LIST_INIT_DUP;
+
+	packet_writer_init(&writer, 1);
+
+	while (packet_reader_read(request) == PACKET_READ_NORMAL) {
+		if (!strcmp("size", request->line)) {
+			info.size = 1;
+			continue;
+		}
+
+		if (parse_oid(request->line, &oid_str_list))
+			continue;
+
+		packet_writer_error(&writer,
+				    "object-info: unexpected line: '%s'",
+				    request->line);
+	}
+
+	if (request->status != PACKET_READ_FLUSH) {
+		packet_writer_error(
+			&writer, "object-info: expected flush after arguments");
+		die(_("object-info: expected flush after arguments"));
+	}
+
+	send_info(r, &writer, &oid_str_list, &info);
+
+	string_list_clear(&oid_str_list, 1);
+
+	packet_flush(1);
+
+	return 0;
+}
diff --git a/protocol-caps.h b/protocol-caps.h
new file mode 100644
index 0000000000..6351648e37
--- /dev/null
+++ b/protocol-caps.h
@@ -0,0 +1,10 @@
+#ifndef PROTOCOL_CAPS_H
+#define PROTOCOL_CAPS_H
+
+struct repository;
+struct strvec;
+struct packet_reader;
+int cap_object_info(struct repository *r, struct strvec *keys,
+		    struct packet_reader *request);
+
+#endif /* PROTOCOL_CAPS_H */
\ No newline at end of file
diff --git a/serve.c b/serve.c
index ac20c72763..aa8209f147 100644
--- a/serve.c
+++ b/serve.c
@@ -5,6 +5,7 @@
 #include "version.h"
 #include "strvec.h"
 #include "ls-refs.h"
+#include "protocol-caps.h"
 #include "serve.h"
 #include "upload-pack.h"
 
@@ -78,6 +79,7 @@ static struct protocol_capability capabilities[] = {
 	{ "server-option", always_advertise, NULL },
 	{ "object-format", object_format_advertise, NULL },
 	{ "session-id", session_id_advertise, NULL },
+	{ "object-info", always_advertise, cap_object_info },
 };
 
 static void advertise_capabilities(void)
diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
index 509f379d49..73e74a9c54 100755
--- a/t/t5701-git-serve.sh
+++ b/t/t5701-git-serve.sh
@@ -19,6 +19,7 @@ test_expect_success 'test capability advertisement' '
 	fetch=shallow
 	server-option
 	object-format=$(test_oid algo)
+	object-info
 	0000
 	EOF
 
@@ -240,4 +241,29 @@ test_expect_success 'unexpected lines are not allowed in fetch request' '
 	grep "unexpected line: .this-is-not-a-command." err
 '
 
+# Test the basics of object-info
+#
+test_expect_success 'basics of object-info' '
+	test-tool pkt-line pack >in <<-EOF &&
+	command=object-info
+	object-format=$(test_oid algo)
+	0001
+	size
+	oid $(git rev-parse two:two.t)
+	oid $(git rev-parse two:two.t)
+	0000
+	EOF
+
+	cat >expect <<-EOF &&
+	size
+	$(git rev-parse two:two.t) $(wc -c <two.t | xargs)
+	$(git rev-parse two:two.t) $(wc -c <two.t | xargs)
+	0000
+	EOF
+
+	test-tool serve-v2 --stateless-rpc <in >out &&
+	test-tool pkt-line unpack <out >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
2.31.1.368.gbe11c130af-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] object-info: support for retrieving object info
  2021-04-20 23:38 [PATCH v2] object-info: support for retrieving object info Bruno Albuquerque
@ 2021-04-21  0:42 ` Junio C Hamano
  2021-04-30  5:45 ` Junio C Hamano
  2021-06-07 14:33 ` [PATCH] protocol-caps.h: add newline at end of file Ævar Arnfjörð Bjarmason
  2 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2021-04-21  0:42 UTC (permalink / raw)
  To: Bruno Albuquerque; +Cc: git

Bruno Albuquerque <bga@google.com> writes:

> Sometimes it is useful to get information of an object without having to
> download it completely.
>
> Add the "object-info" capability that lets the client ask for
> object-related information with their full hexadecimal object names.
>
> Only sizes are returned for now.
>
> Signed-off-by: Bruno Albuquerque <bga@google.com>
> ---
>
> This version is a small change with just style fixes.

Will queue.  Thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] object-info: support for retrieving object info
  2021-04-20 23:38 [PATCH v2] object-info: support for retrieving object info Bruno Albuquerque
  2021-04-21  0:42 ` Junio C Hamano
@ 2021-04-30  5:45 ` Junio C Hamano
  2021-06-07 14:33 ` [PATCH] protocol-caps.h: add newline at end of file Ævar Arnfjörð Bjarmason
  2 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2021-04-30  5:45 UTC (permalink / raw)
  To: git; +Cc: Bruno Albuquerque

Bruno Albuquerque <bga@google.com> writes:

> Sometimes it is useful to get information of an object without having to
> download it completely.
>
> Add the "object-info" capability that lets the client ask for
> object-related information with their full hexadecimal object names.
>
> Only sizes are returned for now.
>
> Signed-off-by: Bruno Albuquerque <bga@google.com>
> ---

I recall seeing a comment that this would probably not help the vfs
usecase all that much, but does anybody else have further comment on
this change?  Otherwise let me mark the topic to be merged to 'next'
in a few days.

Thanks.

> This version is a small change with just style fixes.
>
>  Documentation/technical/protocol-v2.txt |  31 +++++++
>  Makefile                                |   1 +
>  protocol-caps.c                         | 113 ++++++++++++++++++++++++
>  protocol-caps.h                         |  10 +++
>  serve.c                                 |   2 +
>  t/t5701-git-serve.sh                    |  26 ++++++
>  6 files changed, 183 insertions(+)
>  create mode 100644 protocol-caps.c
>  create mode 100644 protocol-caps.h
>
> diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt
> index a7c806a73e..f4ed141774 100644
> --- a/Documentation/technical/protocol-v2.txt
> +++ b/Documentation/technical/protocol-v2.txt
> @@ -514,3 +514,34 @@ packet-line, and must not contain non-printable or whitespace characters. The
>  current implementation uses trace2 session IDs (see
>  link:api-trace2.html[api-trace2] for details), but this may change and users of
>  the session ID should not rely on this fact.
> +
> +object-info
> +~~~~~~~~~~~
> +
> +`object-info` is the command to retrieve information about one or more objects.
> +Its main purpose is to allow a client to make decisions based on this
> +information without having to fully fetch objects. Object size is the only
> +information that is currently supported.
> +
> +An `object-info` request takes the following arguments:
> +
> +	size
> +	Requests size information to be returned for each listed object id.
> +
> +	oid <oid>
> +	Indicates to the server an object which the client wants to obtain
> +	information for.
> +
> +The response of `object-info` is a list of the the requested object ids
> +and associated requested information, each separated by a single space.
> +
> +	output = info flush-pkt
> +
> +	info = PKT-LINE(attrs) LF)
> +		*PKT-LINE(obj-info LF)
> +
> +	attrs = attr | attrs SP attrs
> +
> +	attr = "size"
> +
> +	obj-info = obj-id SP obj-size
> diff --git a/Makefile b/Makefile
> index 21c0bf1667..3225e37b63 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -961,6 +961,7 @@ LIB_OBJS += progress.o
>  LIB_OBJS += promisor-remote.o
>  LIB_OBJS += prompt.o
>  LIB_OBJS += protocol.o
> +LIB_OBJS += protocol-caps.o
>  LIB_OBJS += prune-packed.o
>  LIB_OBJS += quote.o
>  LIB_OBJS += range-diff.o
> diff --git a/protocol-caps.c b/protocol-caps.c
> new file mode 100644
> index 0000000000..13a9e63a04
> --- /dev/null
> +++ b/protocol-caps.c
> @@ -0,0 +1,113 @@
> +#include "git-compat-util.h"
> +#include "protocol-caps.h"
> +#include "gettext.h"
> +#include "pkt-line.h"
> +#include "strvec.h"
> +#include "hash.h"
> +#include "object.h"
> +#include "object-store.h"
> +#include "string-list.h"
> +#include "strbuf.h"
> +
> +struct requested_info {
> +	unsigned size : 1;
> +};
> +
> +/*
> + * Parses oids from the given line and collects them in the given
> + * oid_str_list. Returns 1 if parsing was successful and 0 otherwise.
> + */
> +static int parse_oid(const char *line, struct string_list *oid_str_list)
> +{
> +	const char *arg;
> +
> +	if (!skip_prefix(line, "oid ", &arg))
> +		return 0;
> +
> +	string_list_append(oid_str_list, arg);
> +
> +	return 1;
> +}
> +
> +/*
> + * Validates and send requested info back to the client. Any errors detected
> + * are returned as they are detected.
> + */
> +static void send_info(struct repository *r, struct packet_writer *writer,
> +		      struct string_list *oid_str_list,
> +		      struct requested_info *info)
> +{
> +	struct string_list_item *item;
> +	struct strbuf send_buffer = STRBUF_INIT;
> +
> +	if (!oid_str_list->nr)
> +		return;
> +
> +	if (info->size)
> +		packet_writer_write(writer, "size");
> +
> +	for_each_string_list_item (item, oid_str_list) {
> +		const char *oid_str = item->string;
> +		struct object_id oid;
> +		unsigned long object_size;
> +
> +		if (get_oid_hex(oid_str, &oid) < 0) {
> +			packet_writer_error(
> +				writer,
> +				"object-info: protocol error, expected to get oid, not '%s'",
> +				oid_str);
> +			continue;
> +		}
> +
> +		strbuf_addstr(&send_buffer, oid_str);
> +
> +		if (info->size) {
> +			if (oid_object_info(r, &oid, &object_size) < 0) {
> +				strbuf_addstr(&send_buffer, " ");
> +			} else {
> +				strbuf_addf(&send_buffer, " %lu", object_size);
> +			}
> +		}
> +
> +		packet_writer_write(writer, "%s",
> +				    strbuf_detach(&send_buffer, NULL));
> +	}
> +}
> +
> +int cap_object_info(struct repository *r, struct strvec *keys,
> +		    struct packet_reader *request)
> +{
> +	struct requested_info info;
> +	struct packet_writer writer;
> +	struct string_list oid_str_list = STRING_LIST_INIT_DUP;
> +
> +	packet_writer_init(&writer, 1);
> +
> +	while (packet_reader_read(request) == PACKET_READ_NORMAL) {
> +		if (!strcmp("size", request->line)) {
> +			info.size = 1;
> +			continue;
> +		}
> +
> +		if (parse_oid(request->line, &oid_str_list))
> +			continue;
> +
> +		packet_writer_error(&writer,
> +				    "object-info: unexpected line: '%s'",
> +				    request->line);
> +	}
> +
> +	if (request->status != PACKET_READ_FLUSH) {
> +		packet_writer_error(
> +			&writer, "object-info: expected flush after arguments");
> +		die(_("object-info: expected flush after arguments"));
> +	}
> +
> +	send_info(r, &writer, &oid_str_list, &info);
> +
> +	string_list_clear(&oid_str_list, 1);
> +
> +	packet_flush(1);
> +
> +	return 0;
> +}
> diff --git a/protocol-caps.h b/protocol-caps.h
> new file mode 100644
> index 0000000000..6351648e37
> --- /dev/null
> +++ b/protocol-caps.h
> @@ -0,0 +1,10 @@
> +#ifndef PROTOCOL_CAPS_H
> +#define PROTOCOL_CAPS_H
> +
> +struct repository;
> +struct strvec;
> +struct packet_reader;
> +int cap_object_info(struct repository *r, struct strvec *keys,
> +		    struct packet_reader *request);
> +
> +#endif /* PROTOCOL_CAPS_H */
> \ No newline at end of file
> diff --git a/serve.c b/serve.c
> index ac20c72763..aa8209f147 100644
> --- a/serve.c
> +++ b/serve.c
> @@ -5,6 +5,7 @@
>  #include "version.h"
>  #include "strvec.h"
>  #include "ls-refs.h"
> +#include "protocol-caps.h"
>  #include "serve.h"
>  #include "upload-pack.h"
>  
> @@ -78,6 +79,7 @@ static struct protocol_capability capabilities[] = {
>  	{ "server-option", always_advertise, NULL },
>  	{ "object-format", object_format_advertise, NULL },
>  	{ "session-id", session_id_advertise, NULL },
> +	{ "object-info", always_advertise, cap_object_info },
>  };
>  
>  static void advertise_capabilities(void)
> diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
> index 509f379d49..73e74a9c54 100755
> --- a/t/t5701-git-serve.sh
> +++ b/t/t5701-git-serve.sh
> @@ -19,6 +19,7 @@ test_expect_success 'test capability advertisement' '
>  	fetch=shallow
>  	server-option
>  	object-format=$(test_oid algo)
> +	object-info
>  	0000
>  	EOF
>  
> @@ -240,4 +241,29 @@ test_expect_success 'unexpected lines are not allowed in fetch request' '
>  	grep "unexpected line: .this-is-not-a-command." err
>  '
>  
> +# Test the basics of object-info
> +#
> +test_expect_success 'basics of object-info' '
> +	test-tool pkt-line pack >in <<-EOF &&
> +	command=object-info
> +	object-format=$(test_oid algo)
> +	0001
> +	size
> +	oid $(git rev-parse two:two.t)
> +	oid $(git rev-parse two:two.t)
> +	0000
> +	EOF
> +
> +	cat >expect <<-EOF &&
> +	size
> +	$(git rev-parse two:two.t) $(wc -c <two.t | xargs)
> +	$(git rev-parse two:two.t) $(wc -c <two.t | xargs)
> +	0000
> +	EOF
> +
> +	test-tool serve-v2 --stateless-rpc <in >out &&
> +	test-tool pkt-line unpack <out >actual &&
> +	test_cmp expect actual
> +'
> +
>  test_done

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] protocol-caps.h: add newline at end of file
  2021-04-20 23:38 [PATCH v2] object-info: support for retrieving object info Bruno Albuquerque
  2021-04-21  0:42 ` Junio C Hamano
  2021-04-30  5:45 ` Junio C Hamano
@ 2021-06-07 14:33 ` Ævar Arnfjörð Bjarmason
  2021-06-09 17:20   ` Bruno Albuquerque
  2 siblings, 1 reply; 5+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-07 14:33 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Bruno Albuquerque,
	Ævar Arnfjörð Bjarmason

Add a trailing newline to the protocol-caps.h file added in the recent
a2ba162cda (object-info: support for retrieving object info,
2021-04-20). Various editors add this implicitly, and some compilers
warn about the lack of a \n here.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 protocol-caps.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protocol-caps.h b/protocol-caps.h
index 6351648e37..0a9f49df11 100644
--- a/protocol-caps.h
+++ b/protocol-caps.h
@@ -7,4 +7,4 @@ struct packet_reader;
 int cap_object_info(struct repository *r, struct strvec *keys,
 		    struct packet_reader *request);
 
-#endif /* PROTOCOL_CAPS_H */
\ No newline at end of file
+#endif /* PROTOCOL_CAPS_H */
-- 
2.32.0.rc3.434.gd8aed1f08a7


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] protocol-caps.h: add newline at end of file
  2021-06-07 14:33 ` [PATCH] protocol-caps.h: add newline at end of file Ævar Arnfjörð Bjarmason
@ 2021-06-09 17:20   ` Bruno Albuquerque
  0 siblings, 0 replies; 5+ messages in thread
From: Bruno Albuquerque @ 2021-06-09 17:20 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano

On Mon, Jun 7, 2021 at 7:33 AM Ævar Arnfjörð Bjarmason <avarab@gmail.com> wrote:
>
> Add a trailing newline to the protocol-caps.h file added in the recent
> a2ba162cda (object-info: support for retrieving object info,
> 2021-04-20). Various editors add this implicitly, and some compilers
> warn about the lack of a \n here.

Thanks for cleaning this up.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-06-09 17:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-20 23:38 [PATCH v2] object-info: support for retrieving object info Bruno Albuquerque
2021-04-21  0:42 ` Junio C Hamano
2021-04-30  5:45 ` Junio C Hamano
2021-06-07 14:33 ` [PATCH] protocol-caps.h: add newline at end of file Ævar Arnfjörð Bjarmason
2021-06-09 17:20   ` Bruno Albuquerque

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).