git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [PATCH] diff: batch fetching of missing blobs
@ 2019-03-26 22:09 Jonathan Tan
  2019-03-27 10:10 ` SZEDER Gábor
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Jonathan Tan @ 2019-03-26 22:09 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan

When running a command like "git show" or "git diff" in a partial clone,
batch all missing blobs to be fetched as one request.

This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
blobs", 2017-12-08), but for another command.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
Here's an improvement for those having partial clones.

I couldn't find a good place to place the test (a place that checks how
diff interfaces with the object store would be ideal), so I created a
new one. Let me know if there's a better place to put it.
---
 diff.c                        | 27 +++++++++++++
 t/t4067-diff-partial-clone.sh | 76 +++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100755 t/t4067-diff-partial-clone.sh

diff --git a/diff.c b/diff.c
index ec5c095199..0e08d05b14 100644
--- a/diff.c
+++ b/diff.c
@@ -25,6 +25,7 @@
 #include "packfile.h"
 #include "parse-options.h"
 #include "help.h"
+#include "fetch-object.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6067,6 +6068,32 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 	if (o->color_moved)
 		o->emitted_symbols = &esm;
 
+	if (repository_format_partial_clone) {
+		/*
+		 * Prefetch the diff pairs that are about to be flushed.
+		 */
+		struct oid_array to_fetch = OID_ARRAY_INIT;
+		int fetch_if_missing_store = fetch_if_missing;
+
+		fetch_if_missing = 0;
+		for (i = 0; i < q->nr; i++) {
+			struct diff_filepair *p = q->queue[i];
+			if (!check_pair_status(p))
+				continue;
+			if (p->one && p->one->oid_valid &&
+			    !has_object_file(&p->one->oid))
+				oid_array_append(&to_fetch, &p->one->oid);
+			if (p->two && p->two->oid_valid &&
+			    !has_object_file(&p->two->oid))
+				oid_array_append(&to_fetch, &p->two->oid);
+		}
+		if (to_fetch.nr)
+			fetch_objects(repository_format_partial_clone,
+				      to_fetch.oid, to_fetch.nr);
+		fetch_if_missing = fetch_if_missing_store;
+		oid_array_clear(&to_fetch);
+	}
+
 	for (i = 0; i < q->nr; i++) {
 		struct diff_filepair *p = q->queue[i];
 		if (check_pair_status(p))
diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
new file mode 100755
index 0000000000..38f03be114
--- /dev/null
+++ b/t/t4067-diff-partial-clone.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+test_description='behavior of diff when reading objects in a partial clone'
+
+. ./test-lib.sh
+
+test_expect_success 'git show batches blobs' '
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff batches blobs' '
+	rm -rf server client trace &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo c >server/c &&
+	echo d >server/d &&
+	git -C server add c d &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff skips same-OID blobs' '
+	rm -rf server client trace &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo another-a >server/a &&
+	git -C server add a &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	echo a | git hash-object --stdin >hash-old-a &&
+	echo another-a | git hash-object --stdin >hash-new-a &&
+	echo b | git hash-object --stdin >hash-b &&
+
+	# Ensure that only a and another-a are fetched.
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "want $(cat hash-old-a)" trace &&
+	grep "want $(cat hash-new-a)" trace &&
+	! grep "want $(cat hash-b)" trace
+'
+
+test_done
-- 
2.21.0.155.ge902e9bcae.dirty


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] diff: batch fetching of missing blobs
  2019-03-26 22:09 [PATCH] diff: batch fetching of missing blobs Jonathan Tan
@ 2019-03-27 10:10 ` SZEDER Gábor
  2019-03-27 22:02 ` Johannes Schindelin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 29+ messages in thread
From: SZEDER Gábor @ 2019-03-27 10:10 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

On Tue, Mar 26, 2019 at 03:09:06PM -0700, Jonathan Tan wrote:
> diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> new file mode 100755
> index 0000000000..38f03be114
> --- /dev/null
> +++ b/t/t4067-diff-partial-clone.sh
> @@ -0,0 +1,76 @@
> +#!/bin/sh
> +
> +test_description='behavior of diff when reading objects in a partial clone'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'git show batches blobs' '
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff batches blobs' '
> +	rm -rf server client trace &&

Please use 'test_when_finished' in the previous test to clean up after
itself.

> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo c >server/c &&
> +	echo d >server/d &&
> +	git -C server add c d &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff skips same-OID blobs' '
> +	rm -rf server client trace &&

Likewise.

> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo another-a >server/a &&
> +	git -C server add a &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	echo a | git hash-object --stdin >hash-old-a &&
> +	echo another-a | git hash-object --stdin >hash-new-a &&
> +	echo b | git hash-object --stdin >hash-b &&
> +
> +	# Ensure that only a and another-a are fetched.
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "want $(cat hash-old-a)" trace &&
> +	grep "want $(cat hash-new-a)" trace &&
> +	! grep "want $(cat hash-b)" trace
> +'
> +
> +test_done
> -- 
> 2.21.0.155.ge902e9bcae.dirty
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] diff: batch fetching of missing blobs
  2019-03-26 22:09 [PATCH] diff: batch fetching of missing blobs Jonathan Tan
  2019-03-27 10:10 ` SZEDER Gábor
@ 2019-03-27 22:02 ` Johannes Schindelin
  2019-03-28  6:52 ` Jeff King
  2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
  3 siblings, 0 replies; 29+ messages in thread
From: Johannes Schindelin @ 2019-03-27 22:02 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

Hi Jonathan,

On Tue, 26 Mar 2019, Jonathan Tan wrote:

> When running a command like "git show" or "git diff" in a partial clone,
> batch all missing blobs to be fetched as one request.
>
> This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
> blobs", 2017-12-08), but for another command.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> Here's an improvement for those having partial clones.
>
> I couldn't find a good place to place the test (a place that checks how
> diff interfaces with the object store would be ideal), so I created a
> new one. Let me know if there's a better place to put it.

My 2c: that's fine.

> diff --git a/diff.c b/diff.c
> index ec5c095199..0e08d05b14 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -25,6 +25,7 @@
>  #include "packfile.h"
>  #include "parse-options.h"
>  #include "help.h"
> +#include "fetch-object.h"
>
>  #ifdef NO_FAST_WORKING_DIRECTORY
>  #define FAST_WORKING_DIRECTORY 0
> @@ -6067,6 +6068,32 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
>  	if (o->color_moved)
>  		o->emitted_symbols = &esm;
>
> +	if (repository_format_partial_clone) {
> +		/*
> +		 * Prefetch the diff pairs that are about to be flushed.
> +		 */
> +		struct oid_array to_fetch = OID_ARRAY_INIT;
> +		int fetch_if_missing_store = fetch_if_missing;
> +
> +		fetch_if_missing = 0;
> +		for (i = 0; i < q->nr; i++) {
> +			struct diff_filepair *p = q->queue[i];
> +			if (!check_pair_status(p))
> +				continue;
> +			if (p->one && p->one->oid_valid &&
> +			    !has_object_file(&p->one->oid))
> +				oid_array_append(&to_fetch, &p->one->oid);
> +			if (p->two && p->two->oid_valid &&
> +			    !has_object_file(&p->two->oid))
> +				oid_array_append(&to_fetch, &p->two->oid);
> +		}
> +		if (to_fetch.nr)
> +			fetch_objects(repository_format_partial_clone,
> +				      to_fetch.oid, to_fetch.nr);
> +		fetch_if_missing = fetch_if_missing_store;

I am slightly uneasy about the fact that this is totally *not*
multi-thread safe. If only we had a way to pass the `fetch_if_missing`
information as a parameter to `has_object_file()`...

Do you think that would be easy to do? If so, I think now would be as fine
an opportunity as ever to introduce that.

Otherwise the patch looks just fine to me.

Thanks,
Dscho

> +		oid_array_clear(&to_fetch);
> +	}
> +
>  	for (i = 0; i < q->nr; i++) {
>  		struct diff_filepair *p = q->queue[i];
>  		if (check_pair_status(p))
> diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> new file mode 100755
> index 0000000000..38f03be114
> --- /dev/null
> +++ b/t/t4067-diff-partial-clone.sh
> @@ -0,0 +1,76 @@
> +#!/bin/sh
> +
> +test_description='behavior of diff when reading objects in a partial clone'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'git show batches blobs' '
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff batches blobs' '
> +	rm -rf server client trace &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo c >server/c &&
> +	echo d >server/d &&
> +	git -C server add c d &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff skips same-OID blobs' '
> +	rm -rf server client trace &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo another-a >server/a &&
> +	git -C server add a &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	echo a | git hash-object --stdin >hash-old-a &&
> +	echo another-a | git hash-object --stdin >hash-new-a &&
> +	echo b | git hash-object --stdin >hash-b &&
> +
> +	# Ensure that only a and another-a are fetched.
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "want $(cat hash-old-a)" trace &&
> +	grep "want $(cat hash-new-a)" trace &&
> +	! grep "want $(cat hash-b)" trace
> +'
> +
> +test_done
> --
> 2.21.0.155.ge902e9bcae.dirty
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] diff: batch fetching of missing blobs
  2019-03-26 22:09 [PATCH] diff: batch fetching of missing blobs Jonathan Tan
  2019-03-27 10:10 ` SZEDER Gábor
  2019-03-27 22:02 ` Johannes Schindelin
@ 2019-03-28  6:52 ` Jeff King
  2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
  3 siblings, 0 replies; 29+ messages in thread
From: Jeff King @ 2019-03-28  6:52 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

On Tue, Mar 26, 2019 at 03:09:06PM -0700, Jonathan Tan wrote:

> When running a command like "git show" or "git diff" in a partial clone,
> batch all missing blobs to be fetched as one request.
> 
> This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
> blobs", 2017-12-08), but for another command.

Sounds like a good idea, and this should make some cases much better
without making any cases worse. Two observations about how we might do
even better, though:

> @@ -6067,6 +6068,32 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
> [...]

At this stage we're looking at actually diffing the contents themselves.
But we'd also potentially need the blobs during rename and break
detection. It's not always the same set of blobs (e.g., unless you've
cranked up the copy-detection flags, renames only look at added/deleted
entries). We could have each phase do its own bulk fetch, which
worst-case gives us probably three fetches. But I wonder if we could
figure out a complete plausible set immediately after the tree diff.

> +	if (repository_format_partial_clone) {
> +		/*
> +		 * Prefetch the diff pairs that are about to be flushed.
> +		 */
> +		struct oid_array to_fetch = OID_ARRAY_INIT;
> +		int fetch_if_missing_store = fetch_if_missing;
> +
> +		fetch_if_missing = 0;
> +		for (i = 0; i < q->nr; i++) {
> +			struct diff_filepair *p = q->queue[i];
> +			if (!check_pair_status(p))
> +				continue;
> +			if (p->one && p->one->oid_valid &&
> +			    !has_object_file(&p->one->oid))
> +				oid_array_append(&to_fetch, &p->one->oid);
> +			if (p->two && p->two->oid_valid &&
> +			    !has_object_file(&p->two->oid))
> +				oid_array_append(&to_fetch, &p->two->oid);
> +		}

These has_object_file() calls may repeatedly re-scan the pack directory,
once per call.  Since it's likely that some may be missing, that may be
a noticeable amount of wasted work for a big diff (still way less than
individually fetching each missing object, so it's a net win, but read
on).

If you use the QUICK flag, that avoids the re-scans, but we may miss
erroneously say "we don't have it" if we race with a repack. For that,
we can either:

  1. Just ignore it. It's relatively rare, and the worst case is that we
     re-fetch an object.

  2. Do a series of QUICK checks, followed by a single
     reprepare_packed_git() if we had any missing, and then another
     series of QUICK checks. Then worst-case we have a single re-scan.

Something like:

  int object_is_missing_cb(const struct object_id *oid, void *data)
  {
	return !has_object_file_with_flags(oid, OBJECT_INFO_QUICK);
  }
  ...

  /* collect all of the possible blobs we need */
  for (i = 0; i < q->nr; i++) {
	...
	oid_array_append(&to_fetch, &p->one->oid);
	oid_array_append(&to_fetch, &p->two->oid);
  }

  /* drop any we already have */
  oid_array_filter(&to_fetch, object_is_missing_cb, NULL);

  /* any missing ones might actually be a race; try again */
  if (to_fetch.nr) {
	  reprepare_packed_git(the_repository);
	  oid_array_filter(&to_fetch, object_is_missing_cb, NULL);
  }

  /* and now anything we have left is definitely not here */
  if (to_fetch.nr)
	fetch_objects(..., to_fetch.oid, to_fetch.nr).

One thing I noticed while writing this: we don't seem to do any
de-duplication of the list (in yours or mine), and it doesn't look like
fetch_objects() does either. I wonder if an oidset would be a better
data structure.

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 0/2] Batch fetching of missing blobs in diff and show
  2019-03-26 22:09 [PATCH] diff: batch fetching of missing blobs Jonathan Tan
                   ` (2 preceding siblings ...)
  2019-03-28  6:52 ` Jeff King
@ 2019-03-29 21:39 ` Jonathan Tan
  2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
                     ` (2 more replies)
  3 siblings, 3 replies; 29+ messages in thread
From: Jonathan Tan @ 2019-03-29 21:39 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, peff, Johannes.Schindelin, szeder.dev

Thanks, everyone for the review.

Changes from v1:
 - used test_when_finished (Szeder)
 - used flag to inhibit fetching of missing objects (Dscho)
 - moved the prefetch so that it also works if we request rename
   detection, and included a test demonstrating that (not sure if that
   was what Peff meant)
 - used QUICK flag (I agree that the rescan is probably not that
   valuable here)

Peff also suggested that I use an oidset instead of an oidarray in order
to eliminate duplicates, but that makes it difficult to interface with
fetch_objects(), which takes a pointer and a length (which is
convenient, because if we want to fetch a single object, we can just
point to it and use a length of 1). Maybe the best idea is to have
oidset maintain its own OID array, and have each hash entry store an
index instead of an OID. For now I added a NEEDSWORK.

Jonathan Tan (2):
  sha1-file: support OBJECT_INFO_FOR_PREFETCH
  diff: batch fetching of missing blobs

 diff.c                        |  32 +++++++++++
 object-store.h                |   6 ++
 sha1-file.c                   |   3 +-
 t/t4067-diff-partial-clone.sh | 103 ++++++++++++++++++++++++++++++++++
 unpack-trees.c                |  17 +++---
 5 files changed, 152 insertions(+), 9 deletions(-)
 create mode 100755 t/t4067-diff-partial-clone.sh

Range-diff against v1:
-:  ---------- > 1:  068861632b sha1-file: support OBJECT_INFO_FOR_PREFETCH
1:  d1e604239b ! 2:  44de02e584 diff: batch fetching of missing blobs
    @@ -9,12 +9,6 @@
         blobs", 2017-12-08), but for another command.
     
         Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
    -    ---
    -    Here's an improvement for those having partial clones.
    -
    -    I couldn't find a good place to place the test (a place that checks how
    -    diff interfaces with the object store would be ideal), so I created a
    -    new one. Let me know if there's a better place to put it.
     
      diff --git a/diff.c b/diff.c
      --- a/diff.c
    @@ -28,38 +22,45 @@
      #ifdef NO_FAST_WORKING_DIRECTORY
      #define FAST_WORKING_DIRECTORY 0
     @@
    - 	if (o->color_moved)
    - 		o->emitted_symbols = &esm;
    + 	QSORT(q->queue, q->nr, diffnamecmp);
    + }
      
    ++static void add_if_missing(struct oid_array *to_fetch,
    ++			   const struct diff_filespec *filespec)
    ++{
    ++	if (filespec && filespec->oid_valid &&
    ++	    oid_object_info_extended(the_repository, &filespec->oid, NULL,
    ++				     OBJECT_INFO_FOR_PREFETCH))
    ++		oid_array_append(to_fetch, &filespec->oid);
    ++}
    ++
    + void diffcore_std(struct diff_options *options)
    + {
     +	if (repository_format_partial_clone) {
     +		/*
     +		 * Prefetch the diff pairs that are about to be flushed.
     +		 */
    ++		int i;
    ++		struct diff_queue_struct *q = &diff_queued_diff;
     +		struct oid_array to_fetch = OID_ARRAY_INIT;
    -+		int fetch_if_missing_store = fetch_if_missing;
     +
    -+		fetch_if_missing = 0;
     +		for (i = 0; i < q->nr; i++) {
     +			struct diff_filepair *p = q->queue[i];
    -+			if (!check_pair_status(p))
    -+				continue;
    -+			if (p->one && p->one->oid_valid &&
    -+			    !has_object_file(&p->one->oid))
    -+				oid_array_append(&to_fetch, &p->one->oid);
    -+			if (p->two && p->two->oid_valid &&
    -+			    !has_object_file(&p->two->oid))
    -+				oid_array_append(&to_fetch, &p->two->oid);
    ++			add_if_missing(&to_fetch, p->one);
    ++			add_if_missing(&to_fetch, p->two);
     +		}
     +		if (to_fetch.nr)
    ++			/*
    ++			 * NEEDSWORK: Consider deduplicating the OIDs sent.
    ++			 */
     +			fetch_objects(repository_format_partial_clone,
     +				      to_fetch.oid, to_fetch.nr);
    -+		fetch_if_missing = fetch_if_missing_store;
     +		oid_array_clear(&to_fetch);
     +	}
     +
    - 	for (i = 0; i < q->nr; i++) {
    - 		struct diff_filepair *p = q->queue[i];
    - 		if (check_pair_status(p))
    + 	/* NOTE please keep the following in sync with diff_tree_combined() */
    + 	if (options->skip_stat_unmatch)
    + 		diffcore_skip_stat_unmatch(options);
     
      diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
      new file mode 100755
    @@ -73,6 +74,8 @@
     +. ./test-lib.sh
     +
     +test_expect_success 'git show batches blobs' '
    ++	test_when_finished "rm -rf server client trace" &&
    ++	
     +	test_create_repo server &&
     +	echo a >server/a &&
     +	echo b >server/b &&
    @@ -91,7 +94,7 @@
     +'
     +
     +test_expect_success 'diff batches blobs' '
    -+	rm -rf server client trace &&
    ++	test_when_finished "rm -rf server client trace" &&
     +
     +	test_create_repo server &&
     +	echo a >server/a &&
    @@ -115,7 +118,7 @@
     +'
     +
     +test_expect_success 'diff skips same-OID blobs' '
    -+	rm -rf server client trace &&
    ++	test_when_finished "rm -rf server client trace" &&
     +
     +	test_create_repo server &&
     +	echo a >server/a &&
    @@ -141,4 +144,29 @@
     +	! grep "want $(cat hash-b)" trace
     +'
     +
    ++test_expect_success 'diff with rename detection batches blobs' '
    ++	test_when_finished "rm -rf server client trace" &&
    ++
    ++	test_create_repo server &&
    ++	echo a >server/a &&
    ++	printf "b\nb\nb\nb\nb\n" >server/b &&
    ++	git -C server add a b &&
    ++	git -C server commit -m x &&
    ++	rm server/b &&
    ++	printf "b\nb\nb\nb\nbX\n" >server/c &&
    ++	git -C server add c &&
    ++	git -C server commit -a -m x &&
    ++
    ++	test_config -C server uploadpack.allowfilter 1 &&
    ++	test_config -C server uploadpack.allowanysha1inwant 1 &&
    ++	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
    ++
    ++	# Ensure that there is exactly 1 negotiation by checking that there is
    ++	# only 1 "done" line sent. ("done" marks the end of negotiation.)
    ++	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
    ++	grep "similarity index" out &&
    ++	grep "git> done" trace >done_lines &&
    ++	test_line_count = 1 done_lines
    ++'
    ++
     +test_done
-- 
2.21.0.197.gd478713db0


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH
  2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
@ 2019-03-29 21:39   ` Jonathan Tan
  2019-04-05 14:13     ` Johannes Schindelin
  2019-04-05 22:00     ` Jeff King
  2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
  2019-04-05 22:12   ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jeff King
  2 siblings, 2 replies; 29+ messages in thread
From: Jonathan Tan @ 2019-03-29 21:39 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, peff, Johannes.Schindelin, szeder.dev

Teach oid_object_info_extended() to support a new flag that inhibits
fetching of missing objects. This is equivalent to setting
fetch_is_missing to 0, calling oid_object_info_extended(), then setting
fetch_if_missing to whatever it was before. Update unpack-trees.c to use
this new flag instead of repeatedly setting fetch_if_missing.

This new flag complicates things slightly in that there are now 2 ways
to do the same thing. But this eliminates the need to repeatedly set a
global variable, and more importantly, allows prefetching to be done in
parallel (in the future); hence, this patch.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 object-store.h |  6 ++++++
 sha1-file.c    |  3 ++-
 unpack-trees.c | 17 +++++++++--------
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/object-store.h b/object-store.h
index 14fc935bd1..dd3f9b75f0 100644
--- a/object-store.h
+++ b/object-store.h
@@ -280,6 +280,12 @@ struct object_info {
 #define OBJECT_INFO_QUICK 8
 /* Do not check loose object */
 #define OBJECT_INFO_IGNORE_LOOSE 16
+/*
+ * Do not attempt to fetch the object if missing (even if fetch_is_missing is
+ * nonzero). This is meant for bulk prefetching of missing blobs in a partial
+ * clone. Implies OBJECT_INFO_QUICK.
+ */
+#define OBJECT_INFO_FOR_PREFETCH (32 + OBJECT_INFO_QUICK)
 
 int oid_object_info_extended(struct repository *r,
 			     const struct object_id *,
diff --git a/sha1-file.c b/sha1-file.c
index 494606f771..ad02649124 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1370,7 +1370,8 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid,
 
 		/* Check if it is a missing object */
 		if (fetch_if_missing && repository_format_partial_clone &&
-		    !already_retried && r == the_repository) {
+		    !already_retried && r == the_repository &&
+		    !(flags & OBJECT_INFO_FOR_PREFETCH)) {
 			/*
 			 * TODO Investigate having fetch_object() return
 			 * TODO error/success and stopping the music here.
diff --git a/unpack-trees.c b/unpack-trees.c
index 22c41a3ba8..381b0cd65e 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -404,20 +404,21 @@ static int check_updates(struct unpack_trees_options *o)
 		 * below.
 		 */
 		struct oid_array to_fetch = OID_ARRAY_INIT;
-		int fetch_if_missing_store = fetch_if_missing;
-		fetch_if_missing = 0;
 		for (i = 0; i < index->cache_nr; i++) {
 			struct cache_entry *ce = index->cache[i];
-			if ((ce->ce_flags & CE_UPDATE) &&
-			    !S_ISGITLINK(ce->ce_mode)) {
-				if (!has_object_file(&ce->oid))
-					oid_array_append(&to_fetch, &ce->oid);
-			}
+
+			if (!(ce->ce_flags & CE_UPDATE) ||
+			    S_ISGITLINK(ce->ce_mode))
+				continue;
+			if (!oid_object_info_extended(the_repository, &ce->oid,
+						      NULL,
+						      OBJECT_INFO_FOR_PREFETCH))
+				continue;
+			oid_array_append(&to_fetch, &ce->oid);
 		}
 		if (to_fetch.nr)
 			fetch_objects(repository_format_partial_clone,
 				      to_fetch.oid, to_fetch.nr);
-		fetch_if_missing = fetch_if_missing_store;
 		oid_array_clear(&to_fetch);
 	}
 	for (i = 0; i < index->cache_nr; i++) {
-- 
2.21.0.197.gd478713db0


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
  2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
@ 2019-03-29 21:39   ` Jonathan Tan
  2019-04-04  2:47     ` SZEDER Gábor
                       ` (2 more replies)
  2019-04-05 22:12   ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jeff King
  2 siblings, 3 replies; 29+ messages in thread
From: Jonathan Tan @ 2019-03-29 21:39 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, peff, Johannes.Schindelin, szeder.dev

When running a command like "git show" or "git diff" in a partial clone,
batch all missing blobs to be fetched as one request.

This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
blobs", 2017-12-08), but for another command.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 diff.c                        |  32 +++++++++++
 t/t4067-diff-partial-clone.sh | 103 ++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100755 t/t4067-diff-partial-clone.sh

diff --git a/diff.c b/diff.c
index ec5c095199..1eccefb4ef 100644
--- a/diff.c
+++ b/diff.c
@@ -25,6 +25,7 @@
 #include "packfile.h"
 #include "parse-options.h"
 #include "help.h"
+#include "fetch-object.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6366,8 +6367,39 @@ void diffcore_fix_diff_index(void)
 	QSORT(q->queue, q->nr, diffnamecmp);
 }
 
+static void add_if_missing(struct oid_array *to_fetch,
+			   const struct diff_filespec *filespec)
+{
+	if (filespec && filespec->oid_valid &&
+	    oid_object_info_extended(the_repository, &filespec->oid, NULL,
+				     OBJECT_INFO_FOR_PREFETCH))
+		oid_array_append(to_fetch, &filespec->oid);
+}
+
 void diffcore_std(struct diff_options *options)
 {
+	if (repository_format_partial_clone) {
+		/*
+		 * Prefetch the diff pairs that are about to be flushed.
+		 */
+		int i;
+		struct diff_queue_struct *q = &diff_queued_diff;
+		struct oid_array to_fetch = OID_ARRAY_INIT;
+
+		for (i = 0; i < q->nr; i++) {
+			struct diff_filepair *p = q->queue[i];
+			add_if_missing(&to_fetch, p->one);
+			add_if_missing(&to_fetch, p->two);
+		}
+		if (to_fetch.nr)
+			/*
+			 * NEEDSWORK: Consider deduplicating the OIDs sent.
+			 */
+			fetch_objects(repository_format_partial_clone,
+				      to_fetch.oid, to_fetch.nr);
+		oid_array_clear(&to_fetch);
+	}
+
 	/* NOTE please keep the following in sync with diff_tree_combined() */
 	if (options->skip_stat_unmatch)
 		diffcore_skip_stat_unmatch(options);
diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
new file mode 100755
index 0000000000..349851be7d
--- /dev/null
+++ b/t/t4067-diff-partial-clone.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+
+test_description='behavior of diff when reading objects in a partial clone'
+
+. ./test-lib.sh
+
+test_expect_success 'git show batches blobs' '
+	test_when_finished "rm -rf server client trace" &&
+	
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff batches blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo c >server/c &&
+	echo d >server/d &&
+	git -C server add c d &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff skips same-OID blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo another-a >server/a &&
+	git -C server add a &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	echo a | git hash-object --stdin >hash-old-a &&
+	echo another-a | git hash-object --stdin >hash-new-a &&
+	echo b | git hash-object --stdin >hash-b &&
+
+	# Ensure that only a and another-a are fetched.
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "want $(cat hash-old-a)" trace &&
+	grep "want $(cat hash-new-a)" trace &&
+	! grep "want $(cat hash-b)" trace
+'
+
+test_expect_success 'diff with rename detection batches blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	printf "b\nb\nb\nb\nb\n" >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	rm server/b &&
+	printf "b\nb\nb\nb\nbX\n" >server/c &&
+	git -C server add c &&
+	git -C server commit -a -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
+	grep "similarity index" out &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_done
-- 
2.21.0.197.gd478713db0


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
@ 2019-04-04  2:47     ` SZEDER Gábor
  2019-04-05 13:38       ` Johannes Schindelin
  2019-04-05  9:39     ` Duy Nguyen
  2019-04-05 14:17     ` [PATCH v2 2/2] " Johannes Schindelin
  2 siblings, 1 reply; 29+ messages in thread
From: SZEDER Gábor @ 2019-04-04  2:47 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Christian Couder, git, peff, Johannes.Schindelin

On Fri, Mar 29, 2019 at 02:39:28PM -0700, Jonathan Tan wrote:
> diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> new file mode 100755
> index 0000000000..349851be7d
> --- /dev/null
> +++ b/t/t4067-diff-partial-clone.sh
> @@ -0,0 +1,103 @@
> +#!/bin/sh
> +
> +test_description='behavior of diff when reading objects in a partial clone'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'git show batches blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +	
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines

These patches and 'cc/multi-promisor' don't seem to work well
together, and all tests checking 'test_line_count = 1 done_lines' in
this test script fail in current 'pu', because there are two
"git> done" lines.

> +'
> +
> +test_expect_success 'diff batches blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo c >server/c &&
> +	echo d >server/d &&
> +	git -C server add c d &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff skips same-OID blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo another-a >server/a &&
> +	git -C server add a &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	echo a | git hash-object --stdin >hash-old-a &&
> +	echo another-a | git hash-object --stdin >hash-new-a &&
> +	echo b | git hash-object --stdin >hash-b &&
> +
> +	# Ensure that only a and another-a are fetched.
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "want $(cat hash-old-a)" trace &&
> +	grep "want $(cat hash-new-a)" trace &&
> +	! grep "want $(cat hash-b)" trace
> +'
> +
> +test_expect_success 'diff with rename detection batches blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	printf "b\nb\nb\nb\nb\n" >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	rm server/b &&
> +	printf "b\nb\nb\nb\nbX\n" >server/c &&
> +	git -C server add c &&
> +	git -C server commit -a -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
> +	grep "similarity index" out &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_done
> -- 
> 2.21.0.197.gd478713db0
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
  2019-04-04  2:47     ` SZEDER Gábor
@ 2019-04-05  9:39     ` Duy Nguyen
  2019-04-05 17:09       ` [PATCH] fixup! " Jonathan Tan
  2019-04-05 14:17     ` [PATCH v2 2/2] " Johannes Schindelin
  2 siblings, 1 reply; 29+ messages in thread
From: Duy Nguyen @ 2019-04-05  9:39 UTC (permalink / raw)
  To: Jonathan Tan
  Cc: Git Mailing List, Jeff King, Johannes Schindelin, SZEDER Gábor

On Sat, Mar 30, 2019 at 4:40 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> When running a command like "git show" or "git diff" in a partial clone,
> batch all missing blobs to be fetched as one request.
>
> This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
> blobs", 2017-12-08), but for another command.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  diff.c                        |  32 +++++++++++
>  t/t4067-diff-partial-clone.sh | 103 ++++++++++++++++++++++++++++++++++
>  2 files changed, 135 insertions(+)
>  create mode 100755 t/t4067-diff-partial-clone.sh
>
> diff --git a/diff.c b/diff.c
> index ec5c095199..1eccefb4ef 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -25,6 +25,7 @@
>  #include "packfile.h"
>  #include "parse-options.h"
>  #include "help.h"
> +#include "fetch-object.h"
>
>  #ifdef NO_FAST_WORKING_DIRECTORY
>  #define FAST_WORKING_DIRECTORY 0
> @@ -6366,8 +6367,39 @@ void diffcore_fix_diff_index(void)
>         QSORT(q->queue, q->nr, diffnamecmp);
>  }
>
> +static void add_if_missing(struct oid_array *to_fetch,
> +                          const struct diff_filespec *filespec)
> +{
> +       if (filespec && filespec->oid_valid &&
> +           oid_object_info_extended(the_repository, &filespec->oid, NULL,

I'm quite sure we can pass 'struct repository *' around in diff code
now. I think it's the "repo" field in "struct diff_options". Please
use it and avoid more references to the_repository.

> +                                    OBJECT_INFO_FOR_PREFETCH))
> +               oid_array_append(to_fetch, &filespec->oid);
> +}
> +
>  void diffcore_std(struct diff_options *options)
>  {
> +       if (repository_format_partial_clone) {
> +               /*
> +                * Prefetch the diff pairs that are about to be flushed.
> +                */
> +               int i;
> +               struct diff_queue_struct *q = &diff_queued_diff;
> +               struct oid_array to_fetch = OID_ARRAY_INIT;
> +
> +               for (i = 0; i < q->nr; i++) {
> +                       struct diff_filepair *p = q->queue[i];
> +                       add_if_missing(&to_fetch, p->one);
> +                       add_if_missing(&to_fetch, p->two);
> +               }
> +               if (to_fetch.nr)
> +                       /*
> +                        * NEEDSWORK: Consider deduplicating the OIDs sent.
> +                        */
> +                       fetch_objects(repository_format_partial_clone,
> +                                     to_fetch.oid, to_fetch.nr);
> +               oid_array_clear(&to_fetch);
> +       }
> +
>         /* NOTE please keep the following in sync with diff_tree_combined() */
>         if (options->skip_stat_unmatch)
>                 diffcore_skip_stat_unmatch(options);
> diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> new file mode 100755
> index 0000000000..349851be7d
> --- /dev/null
> +++ b/t/t4067-diff-partial-clone.sh
> @@ -0,0 +1,103 @@
> +#!/bin/sh
> +
> +test_description='behavior of diff when reading objects in a partial clone'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'git show batches blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       echo b >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       # Ensure that there is exactly 1 negotiation by checking that there is
> +       # only 1 "done" line sent. ("done" marks the end of negotiation.)
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> +       grep "git> done" trace >done_lines &&
> +       test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff batches blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       echo b >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +       echo c >server/c &&
> +       echo d >server/d &&
> +       git -C server add c d &&
> +       git -C server commit -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       # Ensure that there is exactly 1 negotiation by checking that there is
> +       # only 1 "done" line sent. ("done" marks the end of negotiation.)
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +       grep "git> done" trace >done_lines &&
> +       test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff skips same-OID blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       echo b >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +       echo another-a >server/a &&
> +       git -C server add a &&
> +       git -C server commit -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       echo a | git hash-object --stdin >hash-old-a &&
> +       echo another-a | git hash-object --stdin >hash-new-a &&
> +       echo b | git hash-object --stdin >hash-b &&
> +
> +       # Ensure that only a and another-a are fetched.
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +       grep "want $(cat hash-old-a)" trace &&
> +       grep "want $(cat hash-new-a)" trace &&
> +       ! grep "want $(cat hash-b)" trace
> +'
> +
> +test_expect_success 'diff with rename detection batches blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       printf "b\nb\nb\nb\nb\n" >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +       rm server/b &&
> +       printf "b\nb\nb\nb\nbX\n" >server/c &&
> +       git -C server add c &&
> +       git -C server commit -a -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       # Ensure that there is exactly 1 negotiation by checking that there is
> +       # only 1 "done" line sent. ("done" marks the end of negotiation.)
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
> +       grep "similarity index" out &&
> +       grep "git> done" trace >done_lines &&
> +       test_line_count = 1 done_lines
> +'
> +
> +test_done
> --
> 2.21.0.197.gd478713db0
>


-- 
Duy

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-04  2:47     ` SZEDER Gábor
@ 2019-04-05 13:38       ` Johannes Schindelin
  2019-04-07  6:00         ` Christian Couder
  0 siblings, 1 reply; 29+ messages in thread
From: Johannes Schindelin @ 2019-04-05 13:38 UTC (permalink / raw)
  To: SZEDER Gábor; +Cc: Jonathan Tan, gitster, Christian Couder, git, peff

[-- Attachment #1: Type: text/plain, Size: 3611 bytes --]

Hi,

On Thu, 4 Apr 2019, SZEDER Gábor wrote:

> On Fri, Mar 29, 2019 at 02:39:28PM -0700, Jonathan Tan wrote:
> > diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> > new file mode 100755
> > index 0000000000..349851be7d
> > --- /dev/null
> > +++ b/t/t4067-diff-partial-clone.sh
> > @@ -0,0 +1,103 @@
> > +#!/bin/sh
> > +
> > +test_description='behavior of diff when reading objects in a partial clone'
> > +
> > +. ./test-lib.sh
> > +
> > +test_expect_success 'git show batches blobs' '
> > +	test_when_finished "rm -rf server client trace" &&
> > +
> > +	test_create_repo server &&
> > +	echo a >server/a &&
> > +	echo b >server/b &&
> > +	git -C server add a b &&
> > +	git -C server commit -m x &&
> > +
> > +	test_config -C server uploadpack.allowfilter 1 &&
> > +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> > +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> > +
> > +	# Ensure that there is exactly 1 negotiation by checking that there is
> > +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> > +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> > +	grep "git> done" trace >done_lines &&
> > +	test_line_count = 1 done_lines
>
> These patches and 'cc/multi-promisor' don't seem to work well
> together, and all tests checking 'test_line_count = 1 done_lines' in
> this test script fail in current 'pu', because there are two
> "git> done" lines.

I investigated a little further, and it would seem that it is neither this
patch nor the cc/multi-promisor patches that introduce the problem, but
the merge between the two... The latter tries to get away from using the
global variable `repository_format_partial_clone` while this patch
introduces another user.

So that merge between these two branches needs to become an "evil merge"
(or should I say "blessed merge", or even a "merge with a blessing"?). I
have this patch, tentatively, for the `shears/pu` branch [*1*], which
seems to fix the test here:

-- snip --
Subject: [PATCH] fixup??? Merge branch 'cc/multi-promisor' into pu

The `cc/multi-promisor` patch series and the
`jt/batch-fetch-blobs-in-diff` patch series have a semantic conflict:
the former replaces checks for `repository_format_partial_clone` with
checks for `has_promisor_remote()`, while the latter introduces such a
check.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 diff.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index fa12b5d04a..86278ce676 100644
--- a/diff.c
+++ b/diff.c
@@ -26,6 +26,7 @@
 #include "parse-options.h"
 #include "help.h"
 #include "fetch-object.h"
+#include "promisor-remote.h"

 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6493,7 +6494,7 @@ static void add_if_missing(struct oid_array *to_fetch,

 void diffcore_std(struct diff_options *options)
 {
-	if (repository_format_partial_clone) {
+	if (has_promisor_remote()) {
 		/*
 		 * Prefetch the diff pairs that are about to be flushed.
 		 */
-- snap --

Junio, would you terribly mind adopting this?

Ciao,
Dscho

Footnote *1*: I started again to maintain `shears/*` branches in
https://github.com/git-for-windows/git which essentially are Git for
Windows' patch thicket rebased to all four integration branches of
upstream (or "core") Git. I try my best at keeping the CI green on those,
meaning that I liberally add commits on top that are neither in Git for
Windows' `master` nor in core Git's branches.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH
  2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
@ 2019-04-05 14:13     ` Johannes Schindelin
  2019-04-05 22:00     ` Jeff King
  1 sibling, 0 replies; 29+ messages in thread
From: Johannes Schindelin @ 2019-04-05 14:13 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, peff, szeder.dev

Hi Jonathan,

On Fri, 29 Mar 2019, Jonathan Tan wrote:

> Teach oid_object_info_extended() to support a new flag that inhibits
> fetching of missing objects. This is equivalent to setting
> fetch_is_missing to 0, calling oid_object_info_extended(), then setting
> fetch_if_missing to whatever it was before. Update unpack-trees.c to use
> this new flag instead of repeatedly setting fetch_if_missing.
>
> This new flag complicates things slightly in that there are now 2 ways
> to do the same thing.

Just a note that I disagree with the latter part of the sentence: those
are not 2 ways of doing the same thing, but they are two switches that
essentially both have to be flipped to "on". They're just multiple gates.

I do not ask you to rephrase it, merely registering a different opinion.

The patch looks good, I especially like the post-image of
`check_updates()`, which looks much nicer (from my perspective, of
course).

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
  2019-04-04  2:47     ` SZEDER Gábor
  2019-04-05  9:39     ` Duy Nguyen
@ 2019-04-05 14:17     ` Johannes Schindelin
  2 siblings, 0 replies; 29+ messages in thread
From: Johannes Schindelin @ 2019-04-05 14:17 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, peff, szeder.dev

Hi Jonathan,

On Fri, 29 Mar 2019, Jonathan Tan wrote:

> When running a command like "git show" or "git diff" in a partial clone,
> batch all missing blobs to be fetched as one request.
>
> This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
> blobs", 2017-12-08), but for another command.

Still makes sense ;-)

> diff --git a/diff.c b/diff.c
> index ec5c095199..1eccefb4ef 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -25,6 +25,7 @@
>  #include "packfile.h"
>  #include "parse-options.h"
>  #include "help.h"
> +#include "fetch-object.h"
>
>  #ifdef NO_FAST_WORKING_DIRECTORY
>  #define FAST_WORKING_DIRECTORY 0
> @@ -6366,8 +6367,39 @@ void diffcore_fix_diff_index(void)
>  	QSORT(q->queue, q->nr, diffnamecmp);
>  }
>
> +static void add_if_missing(struct oid_array *to_fetch,
> +			   const struct diff_filespec *filespec)
> +{
> +	if (filespec && filespec->oid_valid &&
> +	    oid_object_info_extended(the_repository, &filespec->oid, NULL,
> +				     OBJECT_INFO_FOR_PREFETCH))
> +		oid_array_append(to_fetch, &filespec->oid);
> +}

Thank you for introducing this, in looks more elegant to my eyes than the
previous iteration.

> +
>  void diffcore_std(struct diff_options *options)
>  {
> +	if (repository_format_partial_clone) {
> +		/*
> +		 * Prefetch the diff pairs that are about to be flushed.
> +		 */
> +		int i;
> +		struct diff_queue_struct *q = &diff_queued_diff;
> +		struct oid_array to_fetch = OID_ARRAY_INIT;
> +
> +		for (i = 0; i < q->nr; i++) {
> +			struct diff_filepair *p = q->queue[i];
> +			add_if_missing(&to_fetch, p->one);
> +			add_if_missing(&to_fetch, p->two);
> +		}
> +		if (to_fetch.nr)
> +			/*
> +			 * NEEDSWORK: Consider deduplicating the OIDs sent.
> +			 */
> +			fetch_objects(repository_format_partial_clone,
> +				      to_fetch.oid, to_fetch.nr);
> +		oid_array_clear(&to_fetch);
> +	}
> +
>  	/* NOTE please keep the following in sync with diff_tree_combined() */
>  	if (options->skip_stat_unmatch)
>  		diffcore_skip_stat_unmatch(options);
> diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> new file mode 100755

Also: thank you very much for introducing this test script, to make sure
that things work as expected. Without it, we would not have detected any
regression wrt multi-promisor.

This iteration looks good to me, thank you so much!
Dscho

> index 0000000000..349851be7d
> --- /dev/null
> +++ b/t/t4067-diff-partial-clone.sh
> @@ -0,0 +1,103 @@
> +#!/bin/sh
> +
> +test_description='behavior of diff when reading objects in a partial clone'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'git show batches blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff batches blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo c >server/c &&
> +	echo d >server/d &&
> +	git -C server add c d &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff skips same-OID blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	echo b >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	echo another-a >server/a &&
> +	git -C server add a &&
> +	git -C server commit -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	echo a | git hash-object --stdin >hash-old-a &&
> +	echo another-a | git hash-object --stdin >hash-new-a &&
> +	echo b | git hash-object --stdin >hash-b &&
> +
> +	# Ensure that only a and another-a are fetched.
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +	grep "want $(cat hash-old-a)" trace &&
> +	grep "want $(cat hash-new-a)" trace &&
> +	! grep "want $(cat hash-b)" trace
> +'
> +
> +test_expect_success 'diff with rename detection batches blobs' '
> +	test_when_finished "rm -rf server client trace" &&
> +
> +	test_create_repo server &&
> +	echo a >server/a &&
> +	printf "b\nb\nb\nb\nb\n" >server/b &&
> +	git -C server add a b &&
> +	git -C server commit -m x &&
> +	rm server/b &&
> +	printf "b\nb\nb\nb\nbX\n" >server/c &&
> +	git -C server add c &&
> +	git -C server commit -a -m x &&
> +
> +	test_config -C server uploadpack.allowfilter 1 &&
> +	test_config -C server uploadpack.allowanysha1inwant 1 &&
> +	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +	# Ensure that there is exactly 1 negotiation by checking that there is
> +	# only 1 "done" line sent. ("done" marks the end of negotiation.)
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
> +	grep "similarity index" out &&
> +	grep "git> done" trace >done_lines &&
> +	test_line_count = 1 done_lines
> +'
> +
> +test_done
> --
> 2.21.0.197.gd478713db0
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-05  9:39     ` Duy Nguyen
@ 2019-04-05 17:09       ` Jonathan Tan
  2019-04-05 20:16         ` Johannes Schindelin
  2019-04-06  4:17         ` Duy Nguyen
  0 siblings, 2 replies; 29+ messages in thread
From: Jonathan Tan @ 2019-04-05 17:09 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, pclouds

This is a fixup on the tip of jt/batch-fetch-blobs-in-diff (571debe1d9).

I don't know if Junio has already merged this branch to next (he marked
this as "Will merge to 'next'" in the "What's Cooking" email, but when I
fetched, it hasn't been merged yet). If he has, we can use this commit
message:

diff: propagate options->repo to add_if_missing

Avoid a usage of the_repository by propagating the configured repository
to add_if_missing(). Also, prefetch only if the repository being diffed
is the_repository (because we do not support lazy fetching for any other
repository anyway).

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
Thanks, Duy, for noticing this.
---
 diff.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 1eccefb4ef..811afbdfb1 100644
--- a/diff.c
+++ b/diff.c
@@ -6367,18 +6367,19 @@ void diffcore_fix_diff_index(void)
 	QSORT(q->queue, q->nr, diffnamecmp);
 }
 
-static void add_if_missing(struct oid_array *to_fetch,
+static void add_if_missing(struct oid_array *to_fetch, struct repository *r,
 			   const struct diff_filespec *filespec)
 {
 	if (filespec && filespec->oid_valid &&
-	    oid_object_info_extended(the_repository, &filespec->oid, NULL,
+	    oid_object_info_extended(r, &filespec->oid, NULL,
 				     OBJECT_INFO_FOR_PREFETCH))
 		oid_array_append(to_fetch, &filespec->oid);
 }
 
 void diffcore_std(struct diff_options *options)
 {
-	if (repository_format_partial_clone) {
+	if (options->repo == the_repository &&
+	    repository_format_partial_clone) {
 		/*
 		 * Prefetch the diff pairs that are about to be flushed.
 		 */
@@ -6388,8 +6389,8 @@ void diffcore_std(struct diff_options *options)
 
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			add_if_missing(&to_fetch, p->one);
-			add_if_missing(&to_fetch, p->two);
+			add_if_missing(&to_fetch, options->repo, p->one);
+			add_if_missing(&to_fetch, options->repo, p->two);
 		}
 		if (to_fetch.nr)
 			/*
-- 
2.21.0.392.gf8f6787159e-goog


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-05 17:09       ` [PATCH] fixup! " Jonathan Tan
@ 2019-04-05 20:16         ` Johannes Schindelin
  2019-04-06  4:17         ` Duy Nguyen
  1 sibling, 0 replies; 29+ messages in thread
From: Johannes Schindelin @ 2019-04-05 20:16 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, pclouds

Hi Jonathan,

On Fri, 5 Apr 2019, Jonathan Tan wrote:

> This is a fixup on the tip of jt/batch-fetch-blobs-in-diff (571debe1d9).
>
> I don't know if Junio has already merged this branch to next (he marked
> this as "Will merge to 'next'" in the "What's Cooking" email, but when I
> fetched, it hasn't been merged yet). If he has, we can use this commit
> message:
>
> diff: propagate options->repo to add_if_missing
>
> Avoid a usage of the_repository by propagating the configured repository
> to add_if_missing(). Also, prefetch only if the repository being diffed
> is the_repository (because we do not support lazy fetching for any other
> repository anyway).

True, and the introduction of `has_promisor_remotes()` should probably
help that, by introducing a parameter of type `struct repository *r`.

> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>

The patch and the commit message look good!

Thanks,
Dscho

> diff --git a/diff.c b/diff.c
> index 1eccefb4ef..811afbdfb1 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -6367,18 +6367,19 @@ void diffcore_fix_diff_index(void)
>  	QSORT(q->queue, q->nr, diffnamecmp);
>  }
>
> -static void add_if_missing(struct oid_array *to_fetch,
> +static void add_if_missing(struct oid_array *to_fetch, struct repository *r,
>  			   const struct diff_filespec *filespec)
>  {
>  	if (filespec && filespec->oid_valid &&
> -	    oid_object_info_extended(the_repository, &filespec->oid, NULL,
> +	    oid_object_info_extended(r, &filespec->oid, NULL,
>  				     OBJECT_INFO_FOR_PREFETCH))
>  		oid_array_append(to_fetch, &filespec->oid);
>  }
>
>  void diffcore_std(struct diff_options *options)
>  {
> -	if (repository_format_partial_clone) {
> +	if (options->repo == the_repository &&
> +	    repository_format_partial_clone) {
>  		/*
>  		 * Prefetch the diff pairs that are about to be flushed.
>  		 */
> @@ -6388,8 +6389,8 @@ void diffcore_std(struct diff_options *options)
>
>  		for (i = 0; i < q->nr; i++) {
>  			struct diff_filepair *p = q->queue[i];
> -			add_if_missing(&to_fetch, p->one);
> -			add_if_missing(&to_fetch, p->two);
> +			add_if_missing(&to_fetch, options->repo, p->one);
> +			add_if_missing(&to_fetch, options->repo, p->two);
>  		}
>  		if (to_fetch.nr)
>  			/*
> --
> 2.21.0.392.gf8f6787159e-goog
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH
  2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
  2019-04-05 14:13     ` Johannes Schindelin
@ 2019-04-05 22:00     ` Jeff King
  1 sibling, 0 replies; 29+ messages in thread
From: Jeff King @ 2019-04-05 22:00 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, Johannes.Schindelin, szeder.dev

On Fri, Mar 29, 2019 at 02:39:27PM -0700, Jonathan Tan wrote:

> Teach oid_object_info_extended() to support a new flag that inhibits
> fetching of missing objects. This is equivalent to setting
> fetch_is_missing to 0, calling oid_object_info_extended(), then setting
> fetch_if_missing to whatever it was before. Update unpack-trees.c to use
> this new flag instead of repeatedly setting fetch_if_missing.
> 
> This new flag complicates things slightly in that there are now 2 ways
> to do the same thing. But this eliminates the need to repeatedly set a
> global variable, and more importantly, allows prefetching to be done in
> parallel (in the future); hence, this patch.

Sorry I'm a little late to review this. I don't have any critical
comments, so if this gets ignored, I'll live with it.

> +/*
> + * Do not attempt to fetch the object if missing (even if fetch_is_missing is
> + * nonzero). This is meant for bulk prefetching of missing blobs in a partial
> + * clone. Implies OBJECT_INFO_QUICK.
> + */
> +#define OBJECT_INFO_FOR_PREFETCH (32 + OBJECT_INFO_QUICK)

Mostly I found the name and semantics of this flag to be a little
confusing. Really what we want is to tell oid_object_info() not do any
on-demand fetching for us. That seems like a thing that we might
eventually want for other purposes (e.g., a diff operation that could
produce a real blob diff but would be happy outputting a less-detailed
tree diff).

If it were just OBJECT_INFO_NO_FETCH or similar, that tells more clearly
what it does, and would make sense in more contexts.

I suspect that QUICK would be the norm when used with it, though I
probably would have kept the two orthogonal for the sake of simplicity
and clarity.

> diff --git a/unpack-trees.c b/unpack-trees.c
> index 22c41a3ba8..381b0cd65e 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -404,20 +404,21 @@ static int check_updates(struct unpack_trees_options *o)
>  		 * below.
>  		 */
>  		struct oid_array to_fetch = OID_ARRAY_INIT;
> -		int fetch_if_missing_store = fetch_if_missing;
> -		fetch_if_missing = 0;
>  		for (i = 0; i < index->cache_nr; i++) {
>  			struct cache_entry *ce = index->cache[i];
> -			if ((ce->ce_flags & CE_UPDATE) &&
> -			    !S_ISGITLINK(ce->ce_mode)) {
> -				if (!has_object_file(&ce->oid))
> -					oid_array_append(&to_fetch, &ce->oid);
> -			}
> +
> +			if (!(ce->ce_flags & CE_UPDATE) ||
> +			    S_ISGITLINK(ce->ce_mode))
> +				continue;
> +			if (!oid_object_info_extended(the_repository, &ce->oid,
> +						      NULL,
> +						      OBJECT_INFO_FOR_PREFETCH))
> +				continue;
> +			oid_array_append(&to_fetch, &ce->oid);

Here we get rid of the global set/restore dance, which is nice. But
there's also a behavior change, as we've picked up QUICK. I think that's
probably the right thing to do, but I was a bit surprised not to see any
discussion in the commit message.

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 0/2] Batch fetching of missing blobs in diff and show
  2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
  2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
  2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
@ 2019-04-05 22:12   ` Jeff King
  2 siblings, 0 replies; 29+ messages in thread
From: Jeff King @ 2019-04-05 22:12 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, Johannes.Schindelin, szeder.dev

On Fri, Mar 29, 2019 at 02:39:26PM -0700, Jonathan Tan wrote:

> Thanks, everyone for the review.
> 
> Changes from v1:
>  - used test_when_finished (Szeder)
>  - used flag to inhibit fetching of missing objects (Dscho)
>  - moved the prefetch so that it also works if we request rename
>    detection, and included a test demonstrating that (not sure if that
>    was what Peff meant)

Yes, putting it in diffcore_std() is probably OK. There may be one or
two code paths that don't use that, but arguably they should, and this
will flush them out.

You could possibly get away with fetching less at that stage, since
renames would only need to see deleted/added files. But getting the
minimal set gets ugly quickly, as you have to take into account copy
settings, or even which ones we found exact matches for (where we might
not even show a content diff at all).

I think it's OK for this to be an approximation, and if anybody feels
like trying to narrow it down later, they can.

> Peff also suggested that I use an oidset instead of an oidarray in order
> to eliminate duplicates, but that makes it difficult to interface with
> fetch_objects(), which takes a pointer and a length (which is
> convenient, because if we want to fetch a single object, we can just
> point to it and use a length of 1). Maybe the best idea is to have
> oidset maintain its own OID array, and have each hash entry store an
> index instead of an OID. For now I added a NEEDSWORK.

This is probably OK. There are two reasons to deduplicate:

  1. If we expect a _lot_ of duplicates, it can be faster to de-dup as
     we go, rather than building up a giant list full of redundant
     entries (even if we later sort and de-dup it).

     But I don't think that's the case here. We'd expect relatively few
     duplicates in a single diff.

  2. If the thing we feed the result to does not handle duplicates well.
     I'm not sure how fetch_objects() does with duplicates.

If we just care about (2), then an easy fix is to just ask oid-array to
dedup. It's only mechanism right now is a for_each_unique() method,
which is not used by fetch_objects(). But it would be easy to teach it
to de-dup in place, like (completely untested):

diff --git a/sha1-array.c b/sha1-array.c
index d922e94e3f..aaea02e51e 100644
--- a/sha1-array.c
+++ b/sha1-array.c
@@ -78,6 +78,23 @@ int oid_array_for_each_unique(struct oid_array *array,
 	return 0;
 }
 
+int oid_array_dedup(struct oid_array *array)
+{
+	int i, j;
+
+	if (!array->sorted)
+		oid_array_sort(array);
+
+	for (i = 0; i < array->nr; i++) {
+		if (i > 0 && oideq(array->oid + i, array->oid + i - 1))
+			continue;
+		if (i == j)
+			j++;
+		else
+			array->oid[j++] = i;
+	}
+}
+
 void oid_array_filter(struct oid_array *array,
 		      for_each_oid_fn want,
 		      void *cb_data)

That could also be built on oid_array_for_each_unique(), but dealing
with the callbacks is probably worse than just reimplementing the few
lines of logic.

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-05 17:09       ` [PATCH] fixup! " Jonathan Tan
  2019-04-05 20:16         ` Johannes Schindelin
@ 2019-04-06  4:17         ` Duy Nguyen
  2019-04-08  3:46           ` Junio C Hamano
  2019-04-08  4:06           ` Junio C Hamano
  1 sibling, 2 replies; 29+ messages in thread
From: Duy Nguyen @ 2019-04-06  4:17 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Git Mailing List

On Sat, Apr 6, 2019 at 12:09 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> This is a fixup on the tip of jt/batch-fetch-blobs-in-diff (571debe1d9).
>
> I don't know if Junio has already merged this branch to next (he marked
> this as "Will merge to 'next'" in the "What's Cooking" email, but when I
> fetched, it hasn't been merged yet). If he has, we can use this commit
> message:
>
> diff: propagate options->repo to add_if_missing
>
> Avoid a usage of the_repository by propagating the configured repository
> to add_if_missing(). Also, prefetch only if the repository being diffed
> is the_repository (because we do not support lazy fetching for any other
> repository anyway).
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> Thanks, Duy, for noticing this.
> ---
>  diff.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/diff.c b/diff.c
> index 1eccefb4ef..811afbdfb1 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -6367,18 +6367,19 @@ void diffcore_fix_diff_index(void)
>         QSORT(q->queue, q->nr, diffnamecmp);
>  }
>
> -static void add_if_missing(struct oid_array *to_fetch,
> +static void add_if_missing(struct oid_array *to_fetch, struct repository *r,

One last nit. "struct repository *r" is often the first argument. See
https://public-inbox.org/git/xmqqsh2p6l43.fsf@gitster-ct.c.googlers.com/

>                            const struct diff_filespec *filespec)
>  {
>         if (filespec && filespec->oid_valid &&
> -           oid_object_info_extended(the_repository, &filespec->oid, NULL,
> +           oid_object_info_extended(r, &filespec->oid, NULL,
>                                      OBJECT_INFO_FOR_PREFETCH))
>                 oid_array_append(to_fetch, &filespec->oid);
>  }
>
>  void diffcore_std(struct diff_options *options)
>  {
> -       if (repository_format_partial_clone) {
> +       if (options->repo == the_repository &&
> +           repository_format_partial_clone) {
>                 /*
>                  * Prefetch the diff pairs that are about to be flushed.
>                  */
> @@ -6388,8 +6389,8 @@ void diffcore_std(struct diff_options *options)
>
>                 for (i = 0; i < q->nr; i++) {
>                         struct diff_filepair *p = q->queue[i];
> -                       add_if_missing(&to_fetch, p->one);
> -                       add_if_missing(&to_fetch, p->two);
> +                       add_if_missing(&to_fetch, options->repo, p->one);
> +                       add_if_missing(&to_fetch, options->repo, p->two);
>                 }
>                 if (to_fetch.nr)
>                         /*
> --
> 2.21.0.392.gf8f6787159e-goog
>


-- 
Duy

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-05 13:38       ` Johannes Schindelin
@ 2019-04-07  6:00         ` Christian Couder
  2019-04-08  2:36           ` Junio C Hamano
  0 siblings, 1 reply; 29+ messages in thread
From: Christian Couder @ 2019-04-07  6:00 UTC (permalink / raw)
  To: Johannes Schindelin, Junio C Hamano
  Cc: SZEDER Gábor, Jonathan Tan, git, Jeff King

Hi,

On Fri, Apr 5, 2019 at 3:38 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:

> On Thu, 4 Apr 2019, SZEDER Gábor wrote:
>
> > On Fri, Mar 29, 2019 at 02:39:28PM -0700, Jonathan Tan wrote:
> > > diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> > > new file mode 100755
> > > index 0000000000..349851be7d
> > > --- /dev/null
> > > +++ b/t/t4067-diff-partial-clone.sh
> > > @@ -0,0 +1,103 @@
> > > +#!/bin/sh
> > > +
> > > +test_description='behavior of diff when reading objects in a partial clone'
> > > +
> > > +. ./test-lib.sh
> > > +
> > > +test_expect_success 'git show batches blobs' '
> > > +   test_when_finished "rm -rf server client trace" &&
> > > +
> > > +   test_create_repo server &&
> > > +   echo a >server/a &&
> > > +   echo b >server/b &&
> > > +   git -C server add a b &&
> > > +   git -C server commit -m x &&
> > > +
> > > +   test_config -C server uploadpack.allowfilter 1 &&
> > > +   test_config -C server uploadpack.allowanysha1inwant 1 &&
> > > +   git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> > > +
> > > +   # Ensure that there is exactly 1 negotiation by checking that there is
> > > +   # only 1 "done" line sent. ("done" marks the end of negotiation.)
> > > +   GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> > > +   grep "git> done" trace >done_lines &&
> > > +   test_line_count = 1 done_lines
> >
> > These patches and 'cc/multi-promisor' don't seem to work well
> > together, and all tests checking 'test_line_count = 1 done_lines' in
> > this test script fail in current 'pu', because there are two
> > "git> done" lines.
>
> I investigated a little further, and it would seem that it is neither this
> patch nor the cc/multi-promisor patches that introduce the problem, but
> the merge between the two... The latter tries to get away from using the
> global variable `repository_format_partial_clone` while this patch
> introduces another user.

Thanks for investigating! Yeah, that's part of the problem.

The fix I would suggest is:

diff --git a/diff.c b/diff.c
index f685ab10b5..a2b1241f83 100644
--- a/diff.c
+++ b/diff.c
@@ -26,6 +26,7 @@
 #include "parse-options.h"
 #include "help.h"
 #include "fetch-object.h"
+#include "promisor-remote.h"

 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6489,7 +6490,7 @@ static void add_if_missing(struct oid_array *to_fetch,

 void diffcore_std(struct diff_options *options)
 {
-    if (repository_format_partial_clone) {
+    if (has_promisor_remote()) {
         /*
          * Prefetch the diff pairs that are about to be flushed.
          */
@@ -6506,8 +6507,7 @@ void diffcore_std(struct diff_options *options)
             /*
              * NEEDSWORK: Consider deduplicating the OIDs sent.
              */
-            fetch_objects(repository_format_partial_clone,
-                      to_fetch.oid, to_fetch.nr);
+            promisor_remote_get_direct(to_fetch.oid, to_fetch.nr);
         oid_array_clear(&to_fetch);
     }

I will send a new version with the above soon based on top of
jt/batch-fetch-blobs-in-diff in pu.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-07  6:00         ` Christian Couder
@ 2019-04-08  2:36           ` Junio C Hamano
  2019-04-08  5:51             ` Junio C Hamano
  2019-04-08  6:40             ` Christian Couder
  0 siblings, 2 replies; 29+ messages in thread
From: Junio C Hamano @ 2019-04-08  2:36 UTC (permalink / raw)
  To: Christian Couder
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

Christian Couder <christian.couder@gmail.com> writes:

> Thanks for investigating! Yeah, that's part of the problem.
>
> The fix I would suggest is:
>
> diff --git a/diff.c b/diff.c
> index f685ab10b5..a2b1241f83 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -26,6 +26,7 @@
>  #include "parse-options.h"
>  #include "help.h"
>  #include "fetch-object.h"
> +#include "promisor-remote.h"

Thanks.

>
>  #ifdef NO_FAST_WORKING_DIRECTORY
>  #define FAST_WORKING_DIRECTORY 0
> @@ -6489,7 +6490,7 @@ static void add_if_missing(struct oid_array *to_fetch,
>
>  void diffcore_std(struct diff_options *options)
>  {
> -    if (repository_format_partial_clone) {
> +    if (has_promisor_remote()) {

Hmph, I see quite a few references to the variable disappears
between next and pu.  Is it that in the new world order, nobody
outside the low-level object-access layer should look at the
variable directly, but instead ask the has_promisor_remote()
function?  If so, can we at least document that?  Making it static
(or at least renaming it) would have helped the compiler to notice
this semantic merge conflict better.

> @@ -6506,8 +6507,7 @@ void diffcore_std(struct diff_options *options)
>              /*
>               * NEEDSWORK: Consider deduplicating the OIDs sent.
>               */
> -            fetch_objects(repository_format_partial_clone,
> -                      to_fetch.oid, to_fetch.nr);
> +            promisor_remote_get_direct(to_fetch.oid, to_fetch.nr);

Likewise between fetch_objects() and promisor_remote_get_direct().
Shouldn't the underlying fetch_objects be hidden from general
callers?

>          oid_array_clear(&to_fetch);
>      }
>
> I will send a new version with the above soon based on top of
> jt/batch-fetch-blobs-in-diff in pu.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-06  4:17         ` Duy Nguyen
@ 2019-04-08  3:46           ` Junio C Hamano
  2019-04-08  4:06           ` Junio C Hamano
  1 sibling, 0 replies; 29+ messages in thread
From: Junio C Hamano @ 2019-04-08  3:46 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Jonathan Tan, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

>> -static void add_if_missing(struct oid_array *to_fetch,
>> +static void add_if_missing(struct oid_array *to_fetch, struct repository *r,
>
> One last nit. "struct repository *r" is often the first argument. See

Yes, and that's easy enough to tweak locally ;-)

Thanks.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-06  4:17         ` Duy Nguyen
  2019-04-08  3:46           ` Junio C Hamano
@ 2019-04-08  4:06           ` Junio C Hamano
  2019-04-08  9:58             ` Duy Nguyen
  1 sibling, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2019-04-08  4:06 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Jonathan Tan, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

>> Avoid a usage of the_repository by propagating the configured repository
>> to add_if_missing(). Also, prefetch only if the repository being diffed
>> is the_repository (because we do not support lazy fetching for any other
>> repository anyway).

If we are willing to stay limited to the default repository anyway,
allowing add_if_missing() to take an arbitrary repository does not
really matter, but before the caller of add_if_missing() befcomes
ready to work on an arbitrary repository, this change has to happen.

To update the caller, it seems to me that fetch_objects() must learn
to take an arbitrary repository, but is that the only thing needed?
After that, the function that the caller resides in and callchain
upwards can learn to take a repository instance if we want to be
able to diff inside an arbitrary repository.

But.  Such a change still would not allow us to compare a tree in
one repository against a tree in another repository.  It is likely
that a caller with such a need would simply make sure that objects
in both repositories are available by using the in-core alternate
object store mechanism, making it a more-or-less moot point to be
able to pass a repository instance through the callchain X-<.  We
probably should make it, and spell it out somewhere in a long term
vision shared among the developers, an explicit goal to get rid of
the internal (ab)use of the alternate object store mechanism.

With squashing the fix-up commit in, the 2/2 patch has become like
so.

Thanks, both.

-- >8 --
From: Jonathan Tan <jonathantanmy@google.com>
Date: Fri, 5 Apr 2019 10:09:34 -0700
Subject: [PATCH] diff: batch fetching of missing blobs

When running a command like "git show" or "git diff" in a partial clone,
batch all missing blobs to be fetched as one request.

This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
blobs", 2017-12-08), but for another command.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c                        |  33 +++++++++++
 t/t4067-diff-partial-clone.sh | 103 ++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100755 t/t4067-diff-partial-clone.sh

diff --git a/diff.c b/diff.c
index ec5c095199..811afbdfb1 100644
--- a/diff.c
+++ b/diff.c
@@ -25,6 +25,7 @@
 #include "packfile.h"
 #include "parse-options.h"
 #include "help.h"
+#include "fetch-object.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -6366,8 +6367,40 @@ void diffcore_fix_diff_index(void)
 	QSORT(q->queue, q->nr, diffnamecmp);
 }
 
+static void add_if_missing(struct oid_array *to_fetch, struct repository *r,
+			   const struct diff_filespec *filespec)
+{
+	if (filespec && filespec->oid_valid &&
+	    oid_object_info_extended(r, &filespec->oid, NULL,
+				     OBJECT_INFO_FOR_PREFETCH))
+		oid_array_append(to_fetch, &filespec->oid);
+}
+
 void diffcore_std(struct diff_options *options)
 {
+	if (options->repo == the_repository &&
+	    repository_format_partial_clone) {
+		/*
+		 * Prefetch the diff pairs that are about to be flushed.
+		 */
+		int i;
+		struct diff_queue_struct *q = &diff_queued_diff;
+		struct oid_array to_fetch = OID_ARRAY_INIT;
+
+		for (i = 0; i < q->nr; i++) {
+			struct diff_filepair *p = q->queue[i];
+			add_if_missing(&to_fetch, options->repo, p->one);
+			add_if_missing(&to_fetch, options->repo, p->two);
+		}
+		if (to_fetch.nr)
+			/*
+			 * NEEDSWORK: Consider deduplicating the OIDs sent.
+			 */
+			fetch_objects(repository_format_partial_clone,
+				      to_fetch.oid, to_fetch.nr);
+		oid_array_clear(&to_fetch);
+	}
+
 	/* NOTE please keep the following in sync with diff_tree_combined() */
 	if (options->skip_stat_unmatch)
 		diffcore_skip_stat_unmatch(options);
diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
new file mode 100755
index 0000000000..90c8fb2901
--- /dev/null
+++ b/t/t4067-diff-partial-clone.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+
+test_description='behavior of diff when reading objects in a partial clone'
+
+. ./test-lib.sh
+
+test_expect_success 'git show batches blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff batches blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo c >server/c &&
+	echo d >server/d &&
+	git -C server add c d &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_expect_success 'diff skips same-OID blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	echo b >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	echo another-a >server/a &&
+	git -C server add a &&
+	git -C server commit -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	echo a | git hash-object --stdin >hash-old-a &&
+	echo another-a | git hash-object --stdin >hash-new-a &&
+	echo b | git hash-object --stdin >hash-b &&
+
+	# Ensure that only a and another-a are fetched.
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
+	grep "want $(cat hash-old-a)" trace &&
+	grep "want $(cat hash-new-a)" trace &&
+	! grep "want $(cat hash-b)" trace
+'
+
+test_expect_success 'diff with rename detection batches blobs' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	echo a >server/a &&
+	printf "b\nb\nb\nb\nb\n" >server/b &&
+	git -C server add a b &&
+	git -C server commit -m x &&
+	rm server/b &&
+	printf "b\nb\nb\nb\nbX\n" >server/c &&
+	git -C server add c &&
+	git -C server commit -a -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	# Ensure that there is exactly 1 negotiation by checking that there is
+	# only 1 "done" line sent. ("done" marks the end of negotiation.)
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
+	grep "similarity index" out &&
+	grep "git> done" trace >done_lines &&
+	test_line_count = 1 done_lines
+'
+
+test_done
-- 
2.21.0-196-g041f5ea1cf


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-08  2:36           ` Junio C Hamano
@ 2019-04-08  5:51             ` Junio C Hamano
  2019-04-08  6:03               ` Junio C Hamano
  2019-04-08  6:40             ` Christian Couder
  1 sibling, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2019-04-08  5:51 UTC (permalink / raw)
  To: Christian Couder
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

Junio C Hamano <gitster@pobox.com> writes:

> Christian Couder <christian.couder@gmail.com> writes:
>
>> Thanks for investigating! Yeah, that's part of the problem.
>>
>> The fix I would suggest is:
>>
>> diff --git a/diff.c b/diff.c
>> index f685ab10b5..a2b1241f83 100644
>> --- a/diff.c
>> +++ b/diff.c
>> @@ -26,6 +26,7 @@
>>  #include "parse-options.h"
>>  #include "help.h"
>>  #include "fetch-object.h"
>> +#include "promisor-remote.h"
>
> Thanks.

Together with "if we are forbidding the direct access to the
repository_format_partial_clone variable and the fetch_objects()
funciton, make it clear that is what is being done in the
multi-promisor topic", I think a patch that adds this header should
also remove inclusion of "fetch-object.h".


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-08  5:51             ` Junio C Hamano
@ 2019-04-08  6:03               ` Junio C Hamano
  2019-04-08  6:45                 ` Christian Couder
  0 siblings, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2019-04-08  6:03 UTC (permalink / raw)
  To: Christian Couder
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

Junio C Hamano <gitster@pobox.com> writes:

>>>  #include "fetch-object.h"
>>> +#include "promisor-remote.h"
>>
>> Thanks.
>
> Together with "if we are forbidding the direct access to the
> repository_format_partial_clone variable and the fetch_objects()
> funciton, make it clear that is what is being done in the
> multi-promisor topic", I think a patch that adds this header should
> also remove inclusion of "fetch-object.h".

In fact, your topic itself has the same issue.  I'll queue the
following at the tip of the topic tentatively before merging it to
'pu' with the fix we have been discussing around this thread.

Thanks.

-- >8 --
In multi-promisor world, fetch_objects() should not be used
directly; promisor_remote_get_direct() call replaces the helper.

 sha1-file.c    | 1 -
 unpack-trees.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sha1-file.c b/sha1-file.c
index 715a2b882a..87ea8606f4 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -30,7 +30,6 @@
 #include "mergesort.h"
 #include "quote.h"
 #include "packfile.h"
-#include "fetch-object.h"
 #include "object-store.h"
 #include "promisor-remote.h"
 
diff --git a/unpack-trees.c b/unpack-trees.c
index 47353d85c3..ca0ab68c32 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -16,7 +16,6 @@
 #include "submodule-config.h"
 #include "fsmonitor.h"
 #include "object-store.h"
-#include "fetch-object.h"
 #include "promisor-remote.h"
 
 /*
-- 
2.21.0-196-g041f5ea1cf


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-08  2:36           ` Junio C Hamano
  2019-04-08  5:51             ` Junio C Hamano
@ 2019-04-08  6:40             ` Christian Couder
  2019-04-08  7:59               ` Junio C Hamano
  1 sibling, 1 reply; 29+ messages in thread
From: Christian Couder @ 2019-04-08  6:40 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

On Mon, Apr 8, 2019 at 4:36 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> >  #ifdef NO_FAST_WORKING_DIRECTORY
> >  #define FAST_WORKING_DIRECTORY 0
> > @@ -6489,7 +6490,7 @@ static void add_if_missing(struct oid_array *to_fetch,
> >
> >  void diffcore_std(struct diff_options *options)
> >  {
> > -    if (repository_format_partial_clone) {
> > +    if (has_promisor_remote()) {
>
> Hmph, I see quite a few references to the variable disappears
> between next and pu.  Is it that in the new world order, nobody
> outside the low-level object-access layer should look at the
> variable directly, but instead ask the has_promisor_remote()
> function?

Yeah, repository_format_partial_clone contains the remote configured
in extensions.partialclone, while in the new world order there can be
other promisor remotes than this one, and most of the code is only
interested in asking if we use any promisor remote.

> If so, can we at least document that?  Making it static
> (or at least renaming it) would have helped the compiler to notice
> this semantic merge conflict better.

Ok, I will see if I can make it static (or maybe rename it).

Patch 3f82acbca2 (Use promisor_remote_get_direct() and
has_promisor_remote(), 2019-04-01) in cc/multi-promisor starts with:

    Use promisor_remote_get_direct() and has_promisor_remote()

    Instead of using the repository_format_partial_clone global
    and fetch_objects() directly, let's use has_promisor_remote()
    and promisor_remote_get_direct().

    This way all the configured promisor remotes will be taken
    into account, not only the one specified by
    extensions.partialClone.

I will at least add something telling that in most cases
"repository_format_partial_clone" and fetch_objects() shouldn't be
used directly anymore.

> > @@ -6506,8 +6507,7 @@ void diffcore_std(struct diff_options *options)
> >              /*
> >               * NEEDSWORK: Consider deduplicating the OIDs sent.
> >               */
> > -            fetch_objects(repository_format_partial_clone,
> > -                      to_fetch.oid, to_fetch.nr);
> > +            promisor_remote_get_direct(to_fetch.oid, to_fetch.nr);
>
> Likewise between fetch_objects() and promisor_remote_get_direct().
> Shouldn't the underlying fetch_objects be hidden from general
> callers?

I will see if I can do that, though it has to be used in promisor-remote.c.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-08  6:03               ` Junio C Hamano
@ 2019-04-08  6:45                 ` Christian Couder
  0 siblings, 0 replies; 29+ messages in thread
From: Christian Couder @ 2019-04-08  6:45 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

On Mon, Apr 8, 2019 at 8:03 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> >>>  #include "fetch-object.h"
> >>> +#include "promisor-remote.h"
> >>
> >> Thanks.
> >
> > Together with "if we are forbidding the direct access to the
> > repository_format_partial_clone variable and the fetch_objects()
> > funciton, make it clear that is what is being done in the
> > multi-promisor topic", I think a patch that adds this header should
> > also remove inclusion of "fetch-object.h".
>
> In fact, your topic itself has the same issue.

Yeah sorry, this is the kind of things I easily forget.

> I'll queue the
> following at the tip of the topic tentatively before merging it to
> 'pu' with the fix we have been discussing around this thread.

Thanks!

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-08  6:40             ` Christian Couder
@ 2019-04-08  7:59               ` Junio C Hamano
  2019-04-08  9:56                 ` Christian Couder
  0 siblings, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2019-04-08  7:59 UTC (permalink / raw)
  To: Christian Couder
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

Christian Couder <christian.couder@gmail.com> writes:

> Patch 3f82acbca2 (Use promisor_remote_get_direct() and
> has_promisor_remote(), 2019-04-01) in cc/multi-promisor starts with:
>
>     Use promisor_remote_get_direct() and has_promisor_remote()
>
>     Instead of using the repository_format_partial_clone global
>     and fetch_objects() directly, let's use has_promisor_remote()
>     and promisor_remote_get_direct().
>
>     This way all the configured promisor remotes will be taken
>     into account, not only the one specified by
>     extensions.partialClone.
>
> I will at least add something telling that in most cases
> "repository_format_partial_clone" and fetch_objects() shouldn't be
> used directly anymore.

Yes, that would be necessary not in the log, but in-code comments
next to now "by-exception-only" API functions and variables.

>> > @@ -6506,8 +6507,7 @@ void diffcore_std(struct diff_options *options)
>> >              /*
>> >               * NEEDSWORK: Consider deduplicating the OIDs sent.
>> >               */
>> > -            fetch_objects(repository_format_partial_clone,
>> > -                      to_fetch.oid, to_fetch.nr);
>> > +            promisor_remote_get_direct(to_fetch.oid, to_fetch.nr);
>>
>> Likewise between fetch_objects() and promisor_remote_get_direct().
>> Shouldn't the underlying fetch_objects be hidden from general
>> callers?
>
> I will see if I can do that, though it has to be used in promisor-remote.c.

Does fetch-objects.[ch] even need to exist after multi-promisor
world as an external interface?  

Wouldn't it become an implementation detail of promisor-remote.[ch]
API?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 2/2] diff: batch fetching of missing blobs
  2019-04-08  7:59               ` Junio C Hamano
@ 2019-04-08  9:56                 ` Christian Couder
  0 siblings, 0 replies; 29+ messages in thread
From: Christian Couder @ 2019-04-08  9:56 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Johannes Schindelin, SZEDER Gábor, Jonathan Tan, git, Jeff King

On Mon, Apr 8, 2019 at 9:59 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> > I will at least add something telling that in most cases
> > "repository_format_partial_clone" and fetch_objects() shouldn't be
> > used directly anymore.
>
> Yes, that would be necessary not in the log, but in-code comments
> next to now "by-exception-only" API functions and variables.

Ok, I will add in-code comments then, or ...

> >> Likewise between fetch_objects() and promisor_remote_get_direct().
> >> Shouldn't the underlying fetch_objects be hidden from general
> >> callers?
> >
> > I will see if I can do that, though it has to be used in promisor-remote.c.
>
> Does fetch-objects.[ch] even need to exist after multi-promisor
> world as an external interface?
>
> Wouldn't it become an implementation detail of promisor-remote.[ch]
> API?

... yeah, I will try this.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-08  4:06           ` Junio C Hamano
@ 2019-04-08  9:58             ` Duy Nguyen
  2019-04-09  6:36               ` Junio C Hamano
  0 siblings, 1 reply; 29+ messages in thread
From: Duy Nguyen @ 2019-04-08  9:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Tan, Git Mailing List

On Mon, Apr 8, 2019 at 11:06 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Duy Nguyen <pclouds@gmail.com> writes:
>
> >> Avoid a usage of the_repository by propagating the configured repository
> >> to add_if_missing(). Also, prefetch only if the repository being diffed
> >> is the_repository (because we do not support lazy fetching for any other
> >> repository anyway).
>
> If we are willing to stay limited to the default repository anyway,
> allowing add_if_missing() to take an arbitrary repository does not
> really matter, but before the caller of add_if_missing() befcomes
> ready to work on an arbitrary repository, this change has to happen.
>
> To update the caller, it seems to me that fetch_objects() must learn
> to take an arbitrary repository, but is that the only thing needed?
> After that, the function that the caller resides in and callchain
> upwards can learn to take a repository instance if we want to be
> able to diff inside an arbitrary repository.
>
> But.  Such a change still would not allow us to compare a tree in
> one repository against a tree in another repository.

I feel lost (and the answer "go read partial clone code!" is perfectly
acceptable) but why would we need to diff trees of two different
repositories?

> It is likely
> that a caller with such a need would simply make sure that objects
> in both repositories are available by using the in-core alternate
> object store mechanism, making it a more-or-less moot point to be
> able to pass a repository instance through the callchain X-<.  We
> probably should make it, and spell it out somewhere in a long term
> vision shared among the developers, an explicit goal to get rid of
> the internal (ab)use of the alternate object store mechanism.

I think submodule code so far is doing this way. Though I don't see
any reason we need it for submodule code. Objects are not supposed to
be shared between the super- and the sub-repo.

>
> With squashing the fix-up commit in, the 2/2 patch has become like
> so.
>
> Thanks, both.
>
> -- >8 --
> From: Jonathan Tan <jonathantanmy@google.com>
> Date: Fri, 5 Apr 2019 10:09:34 -0700
> Subject: [PATCH] diff: batch fetching of missing blobs
>
> When running a command like "git show" or "git diff" in a partial clone,
> batch all missing blobs to be fetched as one request.
>
> This is similar to c0c578b33c ("unpack-trees: batch fetching of missing
> blobs", 2017-12-08), but for another command.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
>  diff.c                        |  33 +++++++++++
>  t/t4067-diff-partial-clone.sh | 103 ++++++++++++++++++++++++++++++++++
>  2 files changed, 136 insertions(+)
>  create mode 100755 t/t4067-diff-partial-clone.sh
>
> diff --git a/diff.c b/diff.c
> index ec5c095199..811afbdfb1 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -25,6 +25,7 @@
>  #include "packfile.h"
>  #include "parse-options.h"
>  #include "help.h"
> +#include "fetch-object.h"
>
>  #ifdef NO_FAST_WORKING_DIRECTORY
>  #define FAST_WORKING_DIRECTORY 0
> @@ -6366,8 +6367,40 @@ void diffcore_fix_diff_index(void)
>         QSORT(q->queue, q->nr, diffnamecmp);
>  }
>
> +static void add_if_missing(struct oid_array *to_fetch, struct repository *r,
> +                          const struct diff_filespec *filespec)
> +{
> +       if (filespec && filespec->oid_valid &&
> +           oid_object_info_extended(r, &filespec->oid, NULL,
> +                                    OBJECT_INFO_FOR_PREFETCH))
> +               oid_array_append(to_fetch, &filespec->oid);
> +}
> +
>  void diffcore_std(struct diff_options *options)
>  {
> +       if (options->repo == the_repository &&
> +           repository_format_partial_clone) {
> +               /*
> +                * Prefetch the diff pairs that are about to be flushed.
> +                */
> +               int i;
> +               struct diff_queue_struct *q = &diff_queued_diff;
> +               struct oid_array to_fetch = OID_ARRAY_INIT;
> +
> +               for (i = 0; i < q->nr; i++) {
> +                       struct diff_filepair *p = q->queue[i];
> +                       add_if_missing(&to_fetch, options->repo, p->one);
> +                       add_if_missing(&to_fetch, options->repo, p->two);
> +               }
> +               if (to_fetch.nr)
> +                       /*
> +                        * NEEDSWORK: Consider deduplicating the OIDs sent.
> +                        */
> +                       fetch_objects(repository_format_partial_clone,
> +                                     to_fetch.oid, to_fetch.nr);
> +               oid_array_clear(&to_fetch);
> +       }
> +
>         /* NOTE please keep the following in sync with diff_tree_combined() */
>         if (options->skip_stat_unmatch)
>                 diffcore_skip_stat_unmatch(options);
> diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh
> new file mode 100755
> index 0000000000..90c8fb2901
> --- /dev/null
> +++ b/t/t4067-diff-partial-clone.sh
> @@ -0,0 +1,103 @@
> +#!/bin/sh
> +
> +test_description='behavior of diff when reading objects in a partial clone'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'git show batches blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       echo b >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       # Ensure that there is exactly 1 negotiation by checking that there is
> +       # only 1 "done" line sent. ("done" marks the end of negotiation.)
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client show HEAD &&
> +       grep "git> done" trace >done_lines &&
> +       test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff batches blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       echo b >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +       echo c >server/c &&
> +       echo d >server/d &&
> +       git -C server add c d &&
> +       git -C server commit -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       # Ensure that there is exactly 1 negotiation by checking that there is
> +       # only 1 "done" line sent. ("done" marks the end of negotiation.)
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +       grep "git> done" trace >done_lines &&
> +       test_line_count = 1 done_lines
> +'
> +
> +test_expect_success 'diff skips same-OID blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       echo b >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +       echo another-a >server/a &&
> +       git -C server add a &&
> +       git -C server commit -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       echo a | git hash-object --stdin >hash-old-a &&
> +       echo another-a | git hash-object --stdin >hash-new-a &&
> +       echo b | git hash-object --stdin >hash-b &&
> +
> +       # Ensure that only a and another-a are fetched.
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff HEAD^ HEAD &&
> +       grep "want $(cat hash-old-a)" trace &&
> +       grep "want $(cat hash-new-a)" trace &&
> +       ! grep "want $(cat hash-b)" trace
> +'
> +
> +test_expect_success 'diff with rename detection batches blobs' '
> +       test_when_finished "rm -rf server client trace" &&
> +
> +       test_create_repo server &&
> +       echo a >server/a &&
> +       printf "b\nb\nb\nb\nb\n" >server/b &&
> +       git -C server add a b &&
> +       git -C server commit -m x &&
> +       rm server/b &&
> +       printf "b\nb\nb\nb\nbX\n" >server/c &&
> +       git -C server add c &&
> +       git -C server commit -a -m x &&
> +
> +       test_config -C server uploadpack.allowfilter 1 &&
> +       test_config -C server uploadpack.allowanysha1inwant 1 &&
> +       git clone --bare --filter=blob:limit=0 "file://$(pwd)/server" client &&
> +
> +       # Ensure that there is exactly 1 negotiation by checking that there is
> +       # only 1 "done" line sent. ("done" marks the end of negotiation.)
> +       GIT_TRACE_PACKET="$(pwd)/trace" git -C client diff -M HEAD^ HEAD >out &&
> +       grep "similarity index" out &&
> +       grep "git> done" trace >done_lines &&
> +       test_line_count = 1 done_lines
> +'
> +
> +test_done
> --
> 2.21.0-196-g041f5ea1cf
>


-- 
Duy

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] fixup! diff: batch fetching of missing blobs
  2019-04-08  9:58             ` Duy Nguyen
@ 2019-04-09  6:36               ` Junio C Hamano
  0 siblings, 0 replies; 29+ messages in thread
From: Junio C Hamano @ 2019-04-09  6:36 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Jonathan Tan, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

> I feel lost (and the answer "go read partial clone code!" is perfectly
> acceptable) but why would we need to diff trees of two different
> repositories?

It's just a speculation into far future to open possibilities of
what we cannot do right now.  There is no *NEED*, just like there is
no need for a single process of git to be working in multiple
repositories at the same time (you can just fork another one into
the other repository).


>> It is likely
>> that a caller with such a need would simply make sure that objects
>> in both repositories are available by using the in-core alternate
>> object store mechanism, making it a more-or-less moot point to be
>> able to pass a repository instance through the callchain X-<.  We
>> probably should make it, and spell it out somewhere in a long term
>> vision shared among the developers, an explicit goal to get rid of
>> the internal (ab)use of the alternate object store mechanism.
>
> I think submodule code so far is doing this way. Though I don't see
> any reason we need it for submodule code. Objects are not supposed to
> be shared between the super- and the sub-repo.

Yup, that is why I made that comment---if we were to pass repository
pointer throughout all the codepaths, one of the goal (eh, rather, a
useful yardstick to measure how close we are to the goal) should be
that we use less and less of that pattern to access objects from
random repositories using the in-core alternate mechanism.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2019-04-09  6:36 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-26 22:09 [PATCH] diff: batch fetching of missing blobs Jonathan Tan
2019-03-27 10:10 ` SZEDER Gábor
2019-03-27 22:02 ` Johannes Schindelin
2019-03-28  6:52 ` Jeff King
2019-03-29 21:39 ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jonathan Tan
2019-03-29 21:39   ` [PATCH v2 1/2] sha1-file: support OBJECT_INFO_FOR_PREFETCH Jonathan Tan
2019-04-05 14:13     ` Johannes Schindelin
2019-04-05 22:00     ` Jeff King
2019-03-29 21:39   ` [PATCH v2 2/2] diff: batch fetching of missing blobs Jonathan Tan
2019-04-04  2:47     ` SZEDER Gábor
2019-04-05 13:38       ` Johannes Schindelin
2019-04-07  6:00         ` Christian Couder
2019-04-08  2:36           ` Junio C Hamano
2019-04-08  5:51             ` Junio C Hamano
2019-04-08  6:03               ` Junio C Hamano
2019-04-08  6:45                 ` Christian Couder
2019-04-08  6:40             ` Christian Couder
2019-04-08  7:59               ` Junio C Hamano
2019-04-08  9:56                 ` Christian Couder
2019-04-05  9:39     ` Duy Nguyen
2019-04-05 17:09       ` [PATCH] fixup! " Jonathan Tan
2019-04-05 20:16         ` Johannes Schindelin
2019-04-06  4:17         ` Duy Nguyen
2019-04-08  3:46           ` Junio C Hamano
2019-04-08  4:06           ` Junio C Hamano
2019-04-08  9:58             ` Duy Nguyen
2019-04-09  6:36               ` Junio C Hamano
2019-04-05 14:17     ` [PATCH v2 2/2] " Johannes Schindelin
2019-04-05 22:12   ` [PATCH v2 0/2] Batch fetching of missing blobs in diff and show Jeff King

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.version-control.git
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.version-control.git
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git