git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 0/2] Another partial clone prefetch
@ 2021-07-23 18:52 Jonathan Tan
  2021-07-23 18:52 ` [PATCH 1/2] unpack-trees: refactor prefetching code Jonathan Tan
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Jonathan Tan @ 2021-07-23 18:52 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan

Here's another instance in which we need to prefetch in order to avoid
fetching objects one by one. More information is in patch 2's commit
message.

Jonathan Tan (2):
  unpack-trees: refactor prefetching code
  cache-tree: prefetch in partial clone read-tree

 cache-tree.c                       | 11 ++++++++--
 cache.h                            |  9 ++++++++
 read-cache.c                       | 23 +++++++++++++++++++++
 t/t1022-read-tree-partial-clone.sh | 33 ++++++++++++++++++++++++++++++
 unpack-trees.c                     | 27 ++++++++----------------
 5 files changed, 82 insertions(+), 21 deletions(-)
 create mode 100755 t/t1022-read-tree-partial-clone.sh

-- 
2.32.0.432.gabb21c7263-goog


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] unpack-trees: refactor prefetching code
  2021-07-23 18:52 [PATCH 0/2] Another partial clone prefetch Jonathan Tan
@ 2021-07-23 18:52 ` Jonathan Tan
  2021-07-23 20:26   ` Elijah Newren
  2021-07-23 18:52 ` [PATCH 2/2] cache-tree: prefetch in partial clone read-tree Jonathan Tan
  2021-07-26 13:01 ` [PATCH 0/2] Another partial clone prefetch Derrick Stolee
  2 siblings, 1 reply; 8+ messages in thread
From: Jonathan Tan @ 2021-07-23 18:52 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan

Refactor the prefetching code in unpack-trees.c into its own function,
because it will be used elsewhere in a subsequent commit.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 cache.h        |  9 +++++++++
 read-cache.c   | 23 +++++++++++++++++++++++
 unpack-trees.c | 27 ++++++++-------------------
 3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/cache.h b/cache.h
index ba04ff8bd3..6f952e22c6 100644
--- a/cache.h
+++ b/cache.h
@@ -410,6 +410,15 @@ struct cache_entry *dup_cache_entry(const struct cache_entry *ce, struct index_s
  */
 void validate_cache_entries(const struct index_state *istate);
 
+/*
+ * Bulk prefetch all missing cache entries that are not GITLINKs and that match
+ * the given predicate. This function should only be called if
+ * has_promisor_remote() returns true.
+ */
+typedef int (*must_prefetch_predicate)(const struct cache_entry *);
+void prefetch_cache_entries(const struct index_state *istate,
+			    must_prefetch_predicate must_prefetch);
+
 #ifdef USE_THE_INDEX_COMPATIBILITY_MACROS
 extern struct index_state the_index;
 
diff --git a/read-cache.c b/read-cache.c
index ba2b012a6c..4e396bf17f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -27,6 +27,7 @@
 #include "progress.h"
 #include "sparse-index.h"
 #include "csum-file.h"
+#include "promisor-remote.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -3657,3 +3658,25 @@ static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_ta
 		strbuf_add(sb, &buffer, sizeof(uint32_t));
 	}
 }
+
+void prefetch_cache_entries(const struct index_state *istate,
+			    must_prefetch_predicate must_prefetch)
+{
+	int i;
+	struct oid_array to_fetch = OID_ARRAY_INIT;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+
+		if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce))
+			continue;
+		if (!oid_object_info_extended(the_repository, &ce->oid,
+					      NULL,
+					      OBJECT_INFO_FOR_PREFETCH))
+			continue;
+		oid_array_append(&to_fetch, &ce->oid);
+	}
+	promisor_remote_get_direct(the_repository,
+				   to_fetch.oid, to_fetch.nr);
+	oid_array_clear(&to_fetch);
+}
diff --git a/unpack-trees.c b/unpack-trees.c
index f88a69f8e7..ed92794032 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -392,6 +392,11 @@ static void report_collided_checkout(struct index_state *index)
 	string_list_clear(&list, 0);
 }
 
+static int must_checkout(const struct cache_entry *ce)
+{
+	return ce->ce_flags & CE_UPDATE;
+}
+
 static int check_updates(struct unpack_trees_options *o,
 			 struct index_state *index)
 {
@@ -442,28 +447,12 @@ static int check_updates(struct unpack_trees_options *o,
 	if (should_update_submodules())
 		load_gitmodules_file(index, &state);
 
-	if (has_promisor_remote()) {
+	if (has_promisor_remote())
 		/*
 		 * Prefetch the objects that are to be checked out in the loop
 		 * below.
 		 */
-		struct oid_array to_fetch = OID_ARRAY_INIT;
-		for (i = 0; i < index->cache_nr; i++) {
-			struct cache_entry *ce = index->cache[i];
-
-			if (!(ce->ce_flags & CE_UPDATE) ||
-			    S_ISGITLINK(ce->ce_mode))
-				continue;
-			if (!oid_object_info_extended(the_repository, &ce->oid,
-						      NULL,
-						      OBJECT_INFO_FOR_PREFETCH))
-				continue;
-			oid_array_append(&to_fetch, &ce->oid);
-		}
-		promisor_remote_get_direct(the_repository,
-					   to_fetch.oid, to_fetch.nr);
-		oid_array_clear(&to_fetch);
-	}
+		prefetch_cache_entries(index, must_checkout);
 
 	get_parallel_checkout_configs(&pc_workers, &pc_threshold);
 
@@ -473,7 +462,7 @@ static int check_updates(struct unpack_trees_options *o,
 	for (i = 0; i < index->cache_nr; i++) {
 		struct cache_entry *ce = index->cache[i];
 
-		if (ce->ce_flags & CE_UPDATE) {
+		if (must_checkout(ce)) {
 			size_t last_pc_queue_size = pc_queue_size();
 
 			if (ce->ce_flags & CE_WT_REMOVE)
-- 
2.32.0.432.gabb21c7263-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] cache-tree: prefetch in partial clone read-tree
  2021-07-23 18:52 [PATCH 0/2] Another partial clone prefetch Jonathan Tan
  2021-07-23 18:52 ` [PATCH 1/2] unpack-trees: refactor prefetching code Jonathan Tan
@ 2021-07-23 18:52 ` Jonathan Tan
  2021-07-23 18:55   ` Jonathan Tan
                     ` (2 more replies)
  2021-07-26 13:01 ` [PATCH 0/2] Another partial clone prefetch Derrick Stolee
  2 siblings, 3 replies; 8+ messages in thread
From: Jonathan Tan @ 2021-07-23 18:52 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan

"git read-tree" checks the existence of the blobs referenced by the
given tree, but does not bulk prefetch them. Add a bulk prefetch.

The lack of prefetch here was noticed at $DAYJOB during a merge
involving some specific commits, but I couldn't find a minimal merge
that didn't also trigger the prefetch in check_updates() in
unpack-trees.c (and in all these cases, the lack of prefetch in
cache-tree.c didn't matter because all the relevant blobs would have
already been prefetched by then). This is why I used read-tree here to
exercise this code path.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 cache-tree.c                       | 11 ++++++++--
 t/t1022-read-tree-partial-clone.sh | 33 ++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)
 create mode 100755 t/t1022-read-tree-partial-clone.sh

diff --git a/cache-tree.c b/cache-tree.c
index 45e58666af..9ba2c7c6b2 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -237,6 +237,11 @@ int cache_tree_fully_valid(struct cache_tree *it)
 	return 1;
 }
 
+static int must_check_existence(const struct cache_entry *ce)
+{
+	return !(has_promisor_remote() && ce_skip_worktree(ce));
+}
+
 static int update_one(struct cache_tree *it,
 		      struct cache_entry **cache,
 		      int entries,
@@ -378,8 +383,7 @@ static int update_one(struct cache_tree *it,
 		}
 
 		ce_missing_ok = mode == S_IFGITLINK || missing_ok ||
-			(has_promisor_remote() &&
-			 ce_skip_worktree(ce));
+			!must_check_existence(ce);
 		if (is_null_oid(oid) ||
 		    (!ce_missing_ok && !has_object_file(oid))) {
 			strbuf_release(&buffer);
@@ -466,6 +470,9 @@ int cache_tree_update(struct index_state *istate, int flags)
 	if (!istate->cache_tree)
 		istate->cache_tree = cache_tree();
 
+	if (!(flags & WRITE_TREE_MISSING_OK) && has_promisor_remote())
+		prefetch_cache_entries(istate, must_check_existence);
+
 	trace_performance_enter();
 	trace2_region_enter("cache_tree", "update", the_repository);
 	i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
diff --git a/t/t1022-read-tree-partial-clone.sh b/t/t1022-read-tree-partial-clone.sh
new file mode 100755
index 0000000000..a763e27c7d
--- /dev/null
+++ b/t/t1022-read-tree-partial-clone.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description='git read-tree in partial clones'
+
+TEST_NO_CREATE_REPO=1
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+test_expect_success 'read-tree in partial clone prefetches in one batch' '
+	test_when_finished "rm -rf server client trace" &&
+
+	git init server &&
+	echo foo >server/one &&
+	echo bar >server/two &&
+	git -C server add one two &&
+	git -C server commit -m "initial commit" &&
+	TREE=$(git -C server rev-parse HEAD^{tree}) &&
+
+	git -C server config uploadpack.allowfilter 1 &&
+	git -C server config uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:none "file://$(pwd)/server" client &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client read-tree $TREE &&
+
+	# "done" marks the end of negotiation (once per fetch). Expect that
+	# only one fetch occurs.
+	grep "fetch> done" trace >donelines &&
+	test_line_count = 1 donelines
+'
+
+test_done
-- 
2.32.0.432.gabb21c7263-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] cache-tree: prefetch in partial clone read-tree
  2021-07-23 18:52 ` [PATCH 2/2] cache-tree: prefetch in partial clone read-tree Jonathan Tan
@ 2021-07-23 18:55   ` Jonathan Tan
  2021-07-23 21:20   ` Junio C Hamano
  2021-07-23 21:34   ` Elijah Newren
  2 siblings, 0 replies; 8+ messages in thread
From: Jonathan Tan @ 2021-07-23 18:55 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git

> "git read-tree" checks the existence of the blobs referenced by the
> given tree, but does not bulk prefetch them. Add a bulk prefetch.
> 
> The lack of prefetch here was noticed at $DAYJOB during a merge
> involving some specific commits, but I couldn't find a minimal merge
> that didn't also trigger the prefetch in check_updates() in
> unpack-trees.c (and in all these cases, the lack of prefetch in
> cache-tree.c didn't matter because all the relevant blobs would have
> already been prefetched by then). This is why I used read-tree here to
> exercise this code path.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>

Forgot to mention: the $DAYJOB case is the same case as in [1]. In [1] I
noticed that the object wasn't actually being used, so I disabled the
object existence check. But that's probably the wrong approach - if the
caller really didn't want the object's existence to be checked, they
could have used WRITE_TREE_MISSING_OK when calling cache_tree_update().

[1] https://lore.kernel.org/git/cover.1627066238.git.jonathantanmy@google.com/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] unpack-trees: refactor prefetching code
  2021-07-23 18:52 ` [PATCH 1/2] unpack-trees: refactor prefetching code Jonathan Tan
@ 2021-07-23 20:26   ` Elijah Newren
  0 siblings, 0 replies; 8+ messages in thread
From: Elijah Newren @ 2021-07-23 20:26 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Git Mailing List

On Fri, Jul 23, 2021 at 11:55 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Refactor the prefetching code in unpack-trees.c into its own function,
> because it will be used elsewhere in a subsequent commit.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  cache.h        |  9 +++++++++
>  read-cache.c   | 23 +++++++++++++++++++++++
>  unpack-trees.c | 27 ++++++++-------------------
>  3 files changed, 40 insertions(+), 19 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index ba04ff8bd3..6f952e22c6 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -410,6 +410,15 @@ struct cache_entry *dup_cache_entry(const struct cache_entry *ce, struct index_s
>   */
>  void validate_cache_entries(const struct index_state *istate);
>
> +/*
> + * Bulk prefetch all missing cache entries that are not GITLINKs and that match
> + * the given predicate. This function should only be called if
> + * has_promisor_remote() returns true.
> + */
> +typedef int (*must_prefetch_predicate)(const struct cache_entry *);
> +void prefetch_cache_entries(const struct index_state *istate,
> +                           must_prefetch_predicate must_prefetch);
> +
>  #ifdef USE_THE_INDEX_COMPATIBILITY_MACROS
>  extern struct index_state the_index;
>
> diff --git a/read-cache.c b/read-cache.c
> index ba2b012a6c..4e396bf17f 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -27,6 +27,7 @@
>  #include "progress.h"
>  #include "sparse-index.h"
>  #include "csum-file.h"
> +#include "promisor-remote.h"
>
>  /* Mask for the name length in ce_flags in the on-disk index */
>
> @@ -3657,3 +3658,25 @@ static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_ta
>                 strbuf_add(sb, &buffer, sizeof(uint32_t));
>         }
>  }
> +
> +void prefetch_cache_entries(const struct index_state *istate,
> +                           must_prefetch_predicate must_prefetch)
> +{
> +       int i;
> +       struct oid_array to_fetch = OID_ARRAY_INIT;
> +
> +       for (i = 0; i < istate->cache_nr; i++) {
> +               struct cache_entry *ce = istate->cache[i];
> +
> +               if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce))
> +                       continue;
> +               if (!oid_object_info_extended(the_repository, &ce->oid,
> +                                             NULL,
> +                                             OBJECT_INFO_FOR_PREFETCH))
> +                       continue;
> +               oid_array_append(&to_fetch, &ce->oid);
> +       }
> +       promisor_remote_get_direct(the_repository,
> +                                  to_fetch.oid, to_fetch.nr);
> +       oid_array_clear(&to_fetch);
> +}
> diff --git a/unpack-trees.c b/unpack-trees.c
> index f88a69f8e7..ed92794032 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -392,6 +392,11 @@ static void report_collided_checkout(struct index_state *index)
>         string_list_clear(&list, 0);
>  }
>
> +static int must_checkout(const struct cache_entry *ce)
> +{
> +       return ce->ce_flags & CE_UPDATE;
> +}
> +
>  static int check_updates(struct unpack_trees_options *o,
>                          struct index_state *index)
>  {
> @@ -442,28 +447,12 @@ static int check_updates(struct unpack_trees_options *o,
>         if (should_update_submodules())
>                 load_gitmodules_file(index, &state);
>
> -       if (has_promisor_remote()) {
> +       if (has_promisor_remote())
>                 /*
>                  * Prefetch the objects that are to be checked out in the loop
>                  * below.
>                  */
> -               struct oid_array to_fetch = OID_ARRAY_INIT;
> -               for (i = 0; i < index->cache_nr; i++) {
> -                       struct cache_entry *ce = index->cache[i];
> -
> -                       if (!(ce->ce_flags & CE_UPDATE) ||
> -                           S_ISGITLINK(ce->ce_mode))
> -                               continue;
> -                       if (!oid_object_info_extended(the_repository, &ce->oid,
> -                                                     NULL,
> -                                                     OBJECT_INFO_FOR_PREFETCH))
> -                               continue;
> -                       oid_array_append(&to_fetch, &ce->oid);
> -               }
> -               promisor_remote_get_direct(the_repository,
> -                                          to_fetch.oid, to_fetch.nr);
> -               oid_array_clear(&to_fetch);
> -       }
> +               prefetch_cache_entries(index, must_checkout);
>
>         get_parallel_checkout_configs(&pc_workers, &pc_threshold);
>
> @@ -473,7 +462,7 @@ static int check_updates(struct unpack_trees_options *o,
>         for (i = 0; i < index->cache_nr; i++) {
>                 struct cache_entry *ce = index->cache[i];
>
> -               if (ce->ce_flags & CE_UPDATE) {
> +               if (must_checkout(ce)) {
>                         size_t last_pc_queue_size = pc_queue_size();
>
>                         if (ce->ce_flags & CE_WT_REMOVE)
> --
> 2.32.0.432.gabb21c7263-goog

This might have been slightly easier to review if you had introduced
must_checkout() first, and then made the new prefetch_cache_entries()
in a separate commit, so that comparing old and new code I didn't have
to try to deduce the separate steps.  However, that's a really minor
point since must_checkout() is so small.

Anyway, this patch looks good to me.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] cache-tree: prefetch in partial clone read-tree
  2021-07-23 18:52 ` [PATCH 2/2] cache-tree: prefetch in partial clone read-tree Jonathan Tan
  2021-07-23 18:55   ` Jonathan Tan
@ 2021-07-23 21:20   ` Junio C Hamano
  2021-07-23 21:34   ` Elijah Newren
  2 siblings, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2021-07-23 21:20 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

Jonathan Tan <jonathantanmy@google.com> writes:

> @@ -466,6 +470,9 @@ int cache_tree_update(struct index_state *istate, int flags)
>  	if (!istate->cache_tree)
>  		istate->cache_tree = cache_tree();
>  
> +	if (!(flags & WRITE_TREE_MISSING_OK) && has_promisor_remote())
> +		prefetch_cache_entries(istate, must_check_existence);
> +

It's so nice when a "fix" to an issue can be this simple.

Thanks; will queue.

>  	trace_performance_enter();
>  	trace2_region_enter("cache_tree", "update", the_repository);
>  	i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
> diff --git a/t/t1022-read-tree-partial-clone.sh b/t/t1022-read-tree-partial-clone.sh
> new file mode 100755
> index 0000000000..a763e27c7d
> --- /dev/null
> +++ b/t/t1022-read-tree-partial-clone.sh
> @@ -0,0 +1,33 @@
> +#!/bin/sh
> +
> +test_description='git read-tree in partial clones'
> +
> +TEST_NO_CREATE_REPO=1
> +
> +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'read-tree in partial clone prefetches in one batch' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	git init server &&
> +	echo foo >server/one &&
> +	echo bar >server/two &&
> +	git -C server add one two &&
> +	git -C server commit -m "initial commit" &&
> +	TREE=$(git -C server rev-parse HEAD^{tree}) &&
> +
> +	git -C server config uploadpack.allowfilter 1 &&
> +	git -C server config uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:none "file://$(pwd)/server" client &&
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client read-tree $TREE &&
> +
> +	# "done" marks the end of negotiation (once per fetch). Expect that
> +	# only one fetch occurs.
> +	grep "fetch> done" trace >donelines &&
> +	test_line_count = 1 donelines
> +'
> +
> +test_done

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] cache-tree: prefetch in partial clone read-tree
  2021-07-23 18:52 ` [PATCH 2/2] cache-tree: prefetch in partial clone read-tree Jonathan Tan
  2021-07-23 18:55   ` Jonathan Tan
  2021-07-23 21:20   ` Junio C Hamano
@ 2021-07-23 21:34   ` Elijah Newren
  2 siblings, 0 replies; 8+ messages in thread
From: Elijah Newren @ 2021-07-23 21:34 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Git Mailing List

On Fri, Jul 23, 2021 at 11:55 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> "git read-tree" checks the existence of the blobs referenced by the
> given tree, but does not bulk prefetch them. Add a bulk prefetch.
>
> The lack of prefetch here was noticed at $DAYJOB during a merge
> involving some specific commits, but I couldn't find a minimal merge
> that didn't also trigger the prefetch in check_updates() in
> unpack-trees.c (and in all these cases, the lack of prefetch in
> cache-tree.c didn't matter because all the relevant blobs would have
> already been prefetched by then). This is why I used read-tree here to
> exercise this code path.

Okay, you have me stumped, I can't figure out what kind of a merge
would bypass the check_updates() in unpack-trees.c either.  I was
curious about octopus or merge.autostash, but I just can't trigger it.

Using read-tree to trigger the case makes perfect sense, though.

> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  cache-tree.c                       | 11 ++++++++--
>  t/t1022-read-tree-partial-clone.sh | 33 ++++++++++++++++++++++++++++++
>  2 files changed, 42 insertions(+), 2 deletions(-)
>  create mode 100755 t/t1022-read-tree-partial-clone.sh
>
> diff --git a/cache-tree.c b/cache-tree.c
> index 45e58666af..9ba2c7c6b2 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -237,6 +237,11 @@ int cache_tree_fully_valid(struct cache_tree *it)
>         return 1;
>  }
>
> +static int must_check_existence(const struct cache_entry *ce)
> +{
> +       return !(has_promisor_remote() && ce_skip_worktree(ce));
> +}
> +
>  static int update_one(struct cache_tree *it,
>                       struct cache_entry **cache,
>                       int entries,
> @@ -378,8 +383,7 @@ static int update_one(struct cache_tree *it,
>                 }
>
>                 ce_missing_ok = mode == S_IFGITLINK || missing_ok ||
> -                       (has_promisor_remote() &&
> -                        ce_skip_worktree(ce));
> +                       !must_check_existence(ce);
>                 if (is_null_oid(oid) ||
>                     (!ce_missing_ok && !has_object_file(oid))) {
>                         strbuf_release(&buffer);
> @@ -466,6 +470,9 @@ int cache_tree_update(struct index_state *istate, int flags)
>         if (!istate->cache_tree)
>                 istate->cache_tree = cache_tree();
>
> +       if (!(flags & WRITE_TREE_MISSING_OK) && has_promisor_remote())
> +               prefetch_cache_entries(istate, must_check_existence);
> +

Nice that the fix is so simple.

>         trace_performance_enter();
>         trace2_region_enter("cache_tree", "update", the_repository);
>         i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
> diff --git a/t/t1022-read-tree-partial-clone.sh b/t/t1022-read-tree-partial-clone.sh
> new file mode 100755
> index 0000000000..a763e27c7d
> --- /dev/null
> +++ b/t/t1022-read-tree-partial-clone.sh
> @@ -0,0 +1,33 @@
> +#!/bin/sh
> +
> +test_description='git read-tree in partial clones'
> +
> +TEST_NO_CREATE_REPO=1
> +
> +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'read-tree in partial clone prefetches in one batch' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       git init server &&
> +       echo foo >server/one &&
> +       echo bar >server/two &&
> +       git -C server add one two &&
> +       git -C server commit -m "initial commit" &&
> +       TREE=$(git -C server rev-parse HEAD^{tree}) &&
> +
> +       git -C server config uploadpack.allowfilter 1 &&
> +       git -C server config uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:none "file://$(pwd)/server" client &&
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client read-tree $TREE &&
> +
> +       # "done" marks the end of negotiation (once per fetch). Expect that
> +       # only one fetch occurs.
> +       grep "fetch> done" trace >donelines &&
> +       test_line_count = 1 donelines
> +'
> +
> +test_done
> --
> 2.32.0.432.gabb21c7263-goog

Any reason for preferring GIT_TRACE_PACKET over GIT_TRACE2_PERF and
looking for the reported fetch_count (or even the number of
fetch_count lines)?  Just curious.

Anyway, looks good to me.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] Another partial clone prefetch
  2021-07-23 18:52 [PATCH 0/2] Another partial clone prefetch Jonathan Tan
  2021-07-23 18:52 ` [PATCH 1/2] unpack-trees: refactor prefetching code Jonathan Tan
  2021-07-23 18:52 ` [PATCH 2/2] cache-tree: prefetch in partial clone read-tree Jonathan Tan
@ 2021-07-26 13:01 ` Derrick Stolee
  2 siblings, 0 replies; 8+ messages in thread
From: Derrick Stolee @ 2021-07-26 13:01 UTC (permalink / raw)
  To: Jonathan Tan, git

On 7/23/2021 2:52 PM, Jonathan Tan wrote:
> Here's another instance in which we need to prefetch in order to avoid
> fetching objects one by one. More information is in patch 2's commit
> message.

I know that Elijah and Junio already gave their reviews, but I'm here
to chime in that this is a good pair of patches. Thanks!

-Stolee

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-07-26 13:01 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-23 18:52 [PATCH 0/2] Another partial clone prefetch Jonathan Tan
2021-07-23 18:52 ` [PATCH 1/2] unpack-trees: refactor prefetching code Jonathan Tan
2021-07-23 20:26   ` Elijah Newren
2021-07-23 18:52 ` [PATCH 2/2] cache-tree: prefetch in partial clone read-tree Jonathan Tan
2021-07-23 18:55   ` Jonathan Tan
2021-07-23 21:20   ` Junio C Hamano
2021-07-23 21:34   ` Elijah Newren
2021-07-26 13:01 ` [PATCH 0/2] Another partial clone prefetch Derrick Stolee

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).