From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>, sluongng@gmail.com
Subject: [PATCH 2/2] pack-objects: prefetch objects to be packed
Date: Mon, 20 Jul 2020 17:21:44 -0700 [thread overview]
Message-ID: <b87764b711621ea3c614dbc8f9e49a8598a25cb1.1595290841.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1595290841.git.jonathantanmy@google.com>
When an object to be packed is noticed to be missing, prefetch all
to-be-packed objects in one batch.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
There have been recent discussions about using QUICK whenever we use
SKIP_FETCH_OBJECT. I don't think it fully applies here, since here we
fully expect the object to be present in the non-partial-clone case.
Having said that, I wouldn't be opposed to adding QUICK and then, if the
object read fails and if the repo is not a partial clone, to retry the
object load (before setting the type to -1).
---
builtin/pack-objects.c | 36 ++++++++++++++++++++++++++++++++----
t/t5300-pack-object.sh | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 4 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index e09d140eed..ecef5cda44 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -35,6 +35,7 @@
#include "midx.h"
#include "trace2.h"
#include "shallow.h"
+#include "promisor-remote.h"
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
#define SIZE(obj) oe_size(&to_pack, obj)
@@ -1704,7 +1705,26 @@ static int can_reuse_delta(const struct object_id *base_oid,
return 0;
}
-static void check_object(struct object_entry *entry)
+static void prefetch_to_pack(uint32_t object_index_start) {
+ struct oid_array to_fetch = OID_ARRAY_INIT;
+ uint32_t i;
+
+ for (i = object_index_start; i < to_pack.nr_objects; i++) {
+ struct object_entry *entry = to_pack.objects + i;
+
+ if (!oid_object_info_extended(the_repository,
+ &entry->idx.oid,
+ NULL,
+ OBJECT_INFO_FOR_PREFETCH))
+ continue;
+ oid_array_append(&to_fetch, &entry->idx.oid);
+ }
+ promisor_remote_get_direct(the_repository,
+ to_fetch.oid, to_fetch.nr);
+ oid_array_clear(&to_fetch);
+}
+
+static void check_object(struct object_entry *entry, uint32_t object_index)
{
unsigned long canonical_size;
enum object_type type;
@@ -1843,8 +1863,16 @@ static void check_object(struct object_entry *entry)
}
if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
- OBJECT_INFO_LOOKUP_REPLACE) < 0)
- type = -1;
+ OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
+ if (has_promisor_remote()) {
+ prefetch_to_pack(object_index);
+ if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
+ OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
+ type = -1;
+ } else {
+ type = -1;
+ }
+ }
oe_set_type(entry, type);
if (entry->type_valid) {
SET_SIZE(entry, canonical_size);
@@ -2065,7 +2093,7 @@ static void get_object_details(void)
for (i = 0; i < to_pack.nr_objects; i++) {
struct object_entry *entry = sorted_by_offset[i];
- check_object(entry);
+ check_object(entry, i);
if (entry->type_valid &&
oe_size_greater_than(&to_pack, entry, big_file_threshold))
entry->no_try_delta = 1;
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index 746cdb626e..d553d0ca46 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -497,4 +497,40 @@ test_expect_success 'make sure index-pack detects the SHA1 collision (large blob
)
'
+test_expect_success 'prefetch objects' '
+ rm -rf server client &&
+
+ git init server &&
+ test_config -C server uploadpack.allowanysha1inwant 1 &&
+ test_config -C server uploadpack.allowfilter 1 &&
+ test_config -C server protocol.version 2 &&
+
+ echo one >server/one &&
+ git -C server add one &&
+ git -C server commit -m one &&
+ git -C server branch one_branch &&
+
+ echo two_a >server/two_a &&
+ echo two_b >server/two_b &&
+ git -C server add two_a two_b &&
+ git -C server commit -m two &&
+
+ echo three >server/three &&
+ git -C server add three &&
+ git -C server commit -m three &&
+ git -C server branch three_branch &&
+
+ # Clone, fetch "two" with blobs excluded, and re-push it. This requires
+ # the client to have the blobs of "two" - verify that these are
+ # prefetched in one batch.
+ git clone --filter=blob:none --single-branch -b one_branch \
+ "file://$(pwd)/server" client &&
+ test_config -C client protocol.version 2 &&
+ TWO=$(git -C server rev-parse three_branch^) &&
+ git -C client fetch --filter=blob:none origin "$TWO" &&
+ GIT_TRACE_PACKET=$(pwd)/trace git -C client push origin "$TWO":refs/heads/two_branch &&
+ grep "git> done" trace >donelines &&
+ test_line_count = 1 donelines
+'
+
test_done
--
2.28.0.rc0.105.gf9edc3c819-goog
next prev parent reply other threads:[~2020-07-21 0:22 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-21 0:21 [PATCH 0/2] Prefetch objects in pack-objects Jonathan Tan
2020-07-21 0:21 ` [PATCH 1/2] pack-objects: refactor to oid_object_info_extended Jonathan Tan
2020-07-21 0:21 ` Jonathan Tan [this message]
2020-07-21 1:00 ` [PATCH 2/2] pack-objects: prefetch objects to be packed Junio C Hamano
2020-07-21 16:37 ` Jonathan Tan
2020-07-21 19:23 ` Junio C Hamano
2020-07-21 21:27 ` Junio C Hamano
2020-07-21 23:37 ` Jonathan Tan
2020-07-21 23:56 ` Junio C Hamano
2020-07-21 23:20 ` Jonathan Tan
2020-07-21 23:51 ` Junio C Hamano
2020-07-22 21:30 ` Jonathan Tan
2020-07-22 21:45 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b87764b711621ea3c614dbc8f9e49a8598a25cb1.1595290841.git.jonathantanmy@google.com \
--to=jonathantanmy@google.com \
--cc=git@vger.kernel.org \
--cc=sluongng@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).