git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] object-info: support for retrieving object info
@ 2021-04-15 21:20 Bruno Albuquerque
  2021-04-15 21:53 ` Junio C Hamano
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Bruno Albuquerque @ 2021-04-15 21:20 UTC (permalink / raw)
  To: git; +Cc: Bruno Albuquerque

Sometimes it is useful to get information of an object without having to
download it completely.

Add the "object-info" capability that lets the client ask for
object-related information with their full hexadecimal object names.

Only sizes are returned for now.

Signed-off-by: Bruno Albuquerque <bga@google.com>
---
 Documentation/technical/protocol-v2.txt |  31 +++++++
 Makefile                                |   1 +
 protocol-caps.c                         | 115 ++++++++++++++++++++++++
 protocol-caps.h                         |  10 +++
 serve.c                                 |   2 +
 t/t5701-git-serve.sh                    |  26 ++++++
 6 files changed, 185 insertions(+)
 create mode 100644 protocol-caps.c
 create mode 100644 protocol-caps.h

Hello.

This is my first git patch so I thought I would introduce myself. I am a
software engineer at Google and I have been involved with opensource for
a while (mostly with the Haiku OS project) and now I am working on some
Git changes that hopefully will be generally usefull.

For this specific change, a clear usage scenario is implementing a VFS
on top of Git (something like https://github.com/microsoft/VFSForGit) in
a way that would not require someone to always fully download objects to
get information about them. Object size is the obvious one and what is
implemented here.

diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt
index a7c806a73e..f4ed141774 100644
--- a/Documentation/technical/protocol-v2.txt
+++ b/Documentation/technical/protocol-v2.txt
@@ -514,3 +514,34 @@ packet-line, and must not contain non-printable or whitespace characters. The
 current implementation uses trace2 session IDs (see
 link:api-trace2.html[api-trace2] for details), but this may change and users of
 the session ID should not rely on this fact.
+
+object-info
+~~~~~~~~~~~
+
+`object-info` is the command to retrieve information about one or more objects.
+Its main purpose is to allow a client to make decisions based on this
+information without having to fully fetch objects. Object size is the only
+information that is currently supported.
+
+An `object-info` request takes the following arguments:
+
+	size
+	Requests size information to be returned for each listed object id.
+
+	oid <oid>
+	Indicates to the server an object which the client wants to obtain
+	information for.
+
+The response of `object-info` is a list of the the requested object ids
+and associated requested information, each separated by a single space.
+
+	output = info flush-pkt
+
+	info = PKT-LINE(attrs) LF)
+		*PKT-LINE(obj-info LF)
+
+	attrs = attr | attrs SP attrs
+
+	attr = "size"
+
+	obj-info = obj-id SP obj-size
diff --git a/Makefile b/Makefile
index 21c0bf1667..3225e37b63 100644
--- a/Makefile
+++ b/Makefile
@@ -961,6 +961,7 @@ LIB_OBJS += progress.o
 LIB_OBJS += promisor-remote.o
 LIB_OBJS += prompt.o
 LIB_OBJS += protocol.o
+LIB_OBJS += protocol-caps.o
 LIB_OBJS += prune-packed.o
 LIB_OBJS += quote.o
 LIB_OBJS += range-diff.o
diff --git a/protocol-caps.c b/protocol-caps.c
new file mode 100644
index 0000000000..c15e397756
--- /dev/null
+++ b/protocol-caps.c
@@ -0,0 +1,115 @@
+#include "git-compat-util.h"
+#include "protocol-caps.h"
+#include "gettext.h"
+#include "pkt-line.h"
+#include "strvec.h"
+#include "hash.h"
+#include "object.h"
+#include "object-store.h"
+#include "string-list.h"
+#include "strbuf.h"
+
+struct requested_info {
+	unsigned size : 1;
+};
+
+/*
+ * Parses oids from the given line and collects them in the given
+ * oid_str_list. Returns 1 if parsing was successful and 0 otherwise.
+ */
+static int parse_oid(const char *line, struct string_list *oid_str_list)
+{
+	const char *arg;
+
+	if (!skip_prefix(line, "oid ", &arg))
+		return 0;
+
+	string_list_append(oid_str_list, arg);
+
+	return 1;
+}
+
+/*
+ * Validates and send requested info back to the client. Any errors detected
+ * are returned as they are detected.
+ */
+static void send_info(struct repository *r, struct packet_writer *writer,
+		      struct string_list *oid_str_list,
+		      struct requested_info *info)
+{
+	struct string_list_item *item;
+	struct strbuf send_buffer = STRBUF_INIT;
+
+	if (!oid_str_list->nr)
+		return;
+
+	if (info->size)
+		packet_writer_write(writer, "size");
+
+	for_each_string_list_item (item, oid_str_list) {
+		const char *oid_str = item->string;
+		struct object_id oid;
+		unsigned long object_size;
+
+		if (get_oid_hex(oid_str, &oid) < 0) {
+			packet_writer_error(
+				writer,
+				"object-info: protocol error, expected to get oid, not '%s'",
+				oid_str);
+			continue;
+		}
+
+		strbuf_addstr(&send_buffer, oid_str);
+
+		if (info->size) {
+			if (oid_object_info(r, &oid, &object_size) < 0) {
+				strbuf_addstr(&send_buffer, " ");
+			} else {
+				strbuf_addf(&send_buffer, " %lu", object_size);
+			}
+		}
+
+		packet_writer_write(writer, "%s",
+				    strbuf_detach(&send_buffer, NULL));
+	}
+}
+
+int cap_object_info(struct repository *r, struct strvec *keys,
+		    struct packet_reader *request)
+{
+	struct packet_writer writer;
+	packet_writer_init(&writer, 1);
+	int parsed_header;
+	struct requested_info info;
+
+	struct string_list oid_str_list = STRING_LIST_INIT_DUP;
+
+	parsed_header = 0;
+	while (packet_reader_read(request) == PACKET_READ_NORMAL) {
+		if (!strcmp("size", request->line)) {
+			info.size = 1;
+			continue;
+		}
+
+		if (parse_oid(request->line, &oid_str_list))
+			continue;
+
+		packet_writer_error(&writer,
+				    "object-info: unexpected line: '%s'",
+				    request->line);
+	}
+
+	if (request->status != PACKET_READ_FLUSH) {
+		packet_writer_error(
+			&writer, "object-info: expected flush after arguments");
+		die(_("object-info: expected flush after arguments"));
+	}
+
+	send_info(r, &writer, &oid_str_list, &info);
+
+	string_list_clear(&oid_str_list, 1);
+
+	packet_flush(1);
+
+	return 0;
+}
diff --git a/protocol-caps.h b/protocol-caps.h
new file mode 100644
index 0000000000..6351648e37
--- /dev/null
+++ b/protocol-caps.h
@@ -0,0 +1,10 @@
+#ifndef PROTOCOL_CAPS_H
+#define PROTOCOL_CAPS_H
+
+struct repository;
+struct strvec;
+struct packet_reader;
+int cap_object_info(struct repository *r, struct strvec *keys,
+		    struct packet_reader *request);
+
+#endif /* PROTOCOL_CAPS_H */
\ No newline at end of file
diff --git a/serve.c b/serve.c
index ac20c72763..aa8209f147 100644
--- a/serve.c
+++ b/serve.c
@@ -5,6 +5,7 @@
 #include "version.h"
 #include "strvec.h"
 #include "ls-refs.h"
+#include "protocol-caps.h"
 #include "serve.h"
 #include "upload-pack.h"
 
@@ -78,6 +79,7 @@ static struct protocol_capability capabilities[] = {
 	{ "server-option", always_advertise, NULL },
 	{ "object-format", object_format_advertise, NULL },
 	{ "session-id", session_id_advertise, NULL },
+	{ "object-info", always_advertise, cap_object_info },
 };
 
 static void advertise_capabilities(void)
diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
index 509f379d49..73e74a9c54 100755
--- a/t/t5701-git-serve.sh
+++ b/t/t5701-git-serve.sh
@@ -19,6 +19,7 @@ test_expect_success 'test capability advertisement' '
 	fetch=shallow
 	server-option
 	object-format=$(test_oid algo)
+	object-info
 	0000
 	EOF
 
@@ -240,4 +241,29 @@ test_expect_success 'unexpected lines are not allowed in fetch request' '
 	grep "unexpected line: .this-is-not-a-command." err
 '
 
+# Test the basics of object-info
+#
+test_expect_success 'basics of object-info' '
+	test-tool pkt-line pack >in <<-EOF &&
+	command=object-info
+	object-format=$(test_oid algo)
+	0001
+	size
+	oid $(git rev-parse two:two.t)
+	oid $(git rev-parse two:two.t)
+	0000
+	EOF
+
+	cat >expect <<-EOF &&
+	size
+	$(git rev-parse two:two.t) $(wc -c <two.t | xargs)
+	$(git rev-parse two:two.t) $(wc -c <two.t | xargs)
+	0000
+	EOF
+
+	test-tool serve-v2 --stateless-rpc <in >out &&
+	test-tool pkt-line unpack <out >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
2.31.1.368.gbe11c130af-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-04-20 23:43 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-15 21:20 [PATCH] object-info: support for retrieving object info Bruno Albuquerque
2021-04-15 21:53 ` Junio C Hamano
2021-04-15 23:06   ` Bruno Albuquerque
2021-04-15 22:15 ` Junio C Hamano
2021-04-20 23:43   ` Bruno Albuquerque
2021-04-16 22:01 ` brian m. carlson
2021-04-19 21:18   ` Bruno Albuquerque

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).