[PATCH] tests: replace mingw_test_cmp with a helper in C

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* [PATCH] tests: replace mingw_test_cmp with a helper in C
@ 2022-07-29 14:53 Johannes Schindelin via GitGitGadget
  2022-07-29 14:54 ` Johannes Schindelin
                   ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-07-29 14:53 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

This helper is more performant than running the `mingw_test_cmp` code
with with MSYS2's Bash. And a lot more readable.

To accommodate t1050, which wants to compare files weighing in with 3MB
(falling outside of t1050's malloc limit of 1.5MB), we simply lift the
allocation limit by setting the environment variable GIT_ALLOC_LIMIT to
zero when calling the helper.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
    tests: replace mingw_test_cmp with a helper in C
    
    On the heels of sending a patch to fix a performance regression due to a
    mis-use of test_cmp
    [https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/],
    I was curious to see whether Git for Windows had the same issue. And it
    does not
    [https://github.com/git-for-windows/git/runs/7556381815?check_suite_focus=true#step:5:127]:
    it passes t5351 in 22 seconds, even while using test_cmp to compare pack
    files
    [https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90].
    
    The answer is of course that a test helper written in C is much faster
    than writing the same in Bash, especially on Windows. This is especially
    sad when said Bash code is only used on Windows. So I pulled out this
    helper from the years-long effort to let Git for Windows use BusyBox'
    ash to run the test suite. The result is this patch, which has been in
    Git for Windows since June 2018.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/1309

 Makefile                |  1 +
 t/helper/test-cmp.c     | 73 +++++++++++++++++++++++++++++++++++++++++
 t/helper/test-tool.c    |  1 +
 t/helper/test-tool.h    |  1 +
 t/test-lib-functions.sh | 68 +-------------------------------------
 t/test-lib.sh           |  2 +-
 6 files changed, 78 insertions(+), 68 deletions(-)
 create mode 100644 t/helper/test-cmp.c

diff --git a/Makefile b/Makefile
index 1624471badc..45f108e43a1 100644
--- a/Makefile
+++ b/Makefile
@@ -723,6 +723,7 @@ TEST_BUILTINS_OBJS += test-advise.o
 TEST_BUILTINS_OBJS += test-bitmap.o
 TEST_BUILTINS_OBJS += test-bloom.o
 TEST_BUILTINS_OBJS += test-chmtime.o
+TEST_BUILTINS_OBJS += test-cmp.o
 TEST_BUILTINS_OBJS += test-config.o
 TEST_BUILTINS_OBJS += test-crontab.o
 TEST_BUILTINS_OBJS += test-csprng.o
diff --git a/t/helper/test-cmp.c b/t/helper/test-cmp.c
new file mode 100644
index 00000000000..1c646a54bf6
--- /dev/null
+++ b/t/helper/test-cmp.c
@@ -0,0 +1,73 @@
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "gettext.h"
+#include "parse-options.h"
+#include "run-command.h"
+
+#ifdef WIN32
+#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid"
+#else
+#define NO_SUCH_DIR "/dev/null"
+#endif
+
+static int run_diff(const char *path1, const char *path2)
+{
+	const char *argv[] = {
+		"diff", "--no-index", NULL, NULL, NULL
+	};
+	const char *env[] = {
+		"GIT_PAGER=cat",
+		"GIT_DIR=" NO_SUCH_DIR,
+		"HOME=" NO_SUCH_DIR,
+		NULL
+	};
+
+	argv[2] = path1;
+	argv[3] = path2;
+	return run_command_v_opt_cd_env(argv,
+					RUN_COMMAND_NO_STDIN | RUN_GIT_CMD,
+					NULL, env);
+}
+
+int cmd__cmp(int argc, const char **argv)
+{
+	FILE *f0, *f1;
+	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
+
+	if (argc != 3)
+		die("Require exactly 2 arguments, got %d", argc);
+
+	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
+		return error_errno("could not open '%s'", argv[1]);
+	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
+		fclose(f0);
+		return error_errno("could not open '%s'", argv[2]);
+	}
+
+	for (;;) {
+		int r0 = strbuf_getline(&b0, f0);
+		int r1 = strbuf_getline(&b1, f1);
+
+		if (r0 == EOF) {
+			fclose(f0);
+			fclose(f1);
+			strbuf_release(&b0);
+			strbuf_release(&b1);
+			if (r1 == EOF)
+				return 0;
+cmp_failed:
+			if (!run_diff(argv[1], argv[2]))
+				die("Huh? 'diff --no-index %s %s' succeeded",
+				    argv[1], argv[2]);
+			return 1;
+		}
+		if (r1 == EOF || strbuf_cmp(&b0, &b1)) {
+			fclose(f0);
+			fclose(f1);
+			strbuf_release(&b0);
+			strbuf_release(&b1);
+			goto cmp_failed;
+		}
+	}
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 318fdbab0c3..3334de248a1 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -18,6 +18,7 @@ static struct test_cmd cmds[] = {
 	{ "bitmap", cmd__bitmap },
 	{ "bloom", cmd__bloom },
 	{ "chmtime", cmd__chmtime },
+	{ "cmp", cmd__cmp },
 	{ "config", cmd__config },
 	{ "crontab", cmd__crontab },
 	{ "csprng", cmd__csprng },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index bb799271631..e1104898cc3 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -8,6 +8,7 @@ int cmd__advise_if_enabled(int argc, const char **argv);
 int cmd__bitmap(int argc, const char **argv);
 int cmd__bloom(int argc, const char **argv);
 int cmd__chmtime(int argc, const char **argv);
+int cmd__cmp(int argc, const char **argv);
 int cmd__config(int argc, const char **argv);
 int cmd__crontab(int argc, const char **argv);
 int cmd__csprng(int argc, const char **argv);
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8c44856eaec..28eddbc8e36 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1240,7 +1240,7 @@ test_expect_code () {
 
 test_cmp () {
 	test "$#" -ne 2 && BUG "2 param"
-	eval "$GIT_TEST_CMP" '"$@"'
+	GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"'
 }
 
 # Check that the given config key has the expected value.
@@ -1541,72 +1541,6 @@ test_skip_or_die () {
 	error "$2"
 }
 
-# The following mingw_* functions obey POSIX shell syntax, but are actually
-# bash scripts, and are meant to be used only with bash on Windows.
-
-# A test_cmp function that treats LF and CRLF equal and avoids to fork
-# diff when possible.
-mingw_test_cmp () {
-	# Read text into shell variables and compare them. If the results
-	# are different, use regular diff to report the difference.
-	local test_cmp_a= test_cmp_b=
-
-	# When text came from stdin (one argument is '-') we must feed it
-	# to diff.
-	local stdin_for_diff=
-
-	# Since it is difficult to detect the difference between an
-	# empty input file and a failure to read the files, we go straight
-	# to diff if one of the inputs is empty.
-	if test -s "$1" && test -s "$2"
-	then
-		# regular case: both files non-empty
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-	elif test -s "$1" && test "$2" = -
-	then
-		# read 2nd file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b
-		stdin_for_diff='<<<"$test_cmp_b"'
-	elif test "$1" = - && test -s "$2"
-	then
-		# read 1st file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-		stdin_for_diff='<<<"$test_cmp_a"'
-	fi
-	test -n "$test_cmp_a" &&
-	test -n "$test_cmp_b" &&
-	test "$test_cmp_a" = "$test_cmp_b" ||
-	eval "diff -u \"\$@\" $stdin_for_diff"
-}
-
-# $1 is the name of the shell variable to fill in
-mingw_read_file_strip_cr_ () {
-	# Read line-wise using LF as the line separator
-	# and use IFS to strip CR.
-	local line
-	while :
-	do
-		if IFS=$'\r' read -r -d $'\n' line
-		then
-			# good
-			line=$line$'\n'
-		else
-			# we get here at EOF, but also if the last line
-			# was not terminated by LF; in the latter case,
-			# some text was read
-			if test -z "$line"
-			then
-				# EOF, really
-				break
-			fi
-		fi
-		eval "$1=\$$1\$line"
-	done
-}
-
 # Like "env FOO=BAR some-program", but run inside a subshell, which means
 # it also works for shell functions (though those functions cannot impact
 # the environment outside of the test_env invocation).
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 7726d1da88a..220c259e796 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1546,7 +1546,7 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
+	GIT_TEST_CMP="test-tool cmp"
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM

base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-07-29 14:53 [PATCH] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
@ 2022-07-29 14:54 ` Johannes Schindelin
  2022-07-29 16:44 ` Junio C Hamano
  2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
  2 siblings, 0 replies; 58+ messages in thread
From: Johannes Schindelin @ 2022-07-29 14:54 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget; +Cc: git

Hi,

On Fri, 29 Jul 2022, Johannes Schindelin via GitGitGadget wrote:

> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> This helper is more performant than running the `mingw_test_cmp` code
> with with MSYS2's Bash. And a lot more readable.

Ooops... s/with with/with/

Ciao,
Dscho

>
> To accommodate t1050, which wants to compare files weighing in with 3MB
> (falling outside of t1050's malloc limit of 1.5MB), we simply lift the
> allocation limit by setting the environment variable GIT_ALLOC_LIMIT to
> zero when calling the helper.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>     tests: replace mingw_test_cmp with a helper in C
>
>     On the heels of sending a patch to fix a performance regression due to a
>     mis-use of test_cmp
>     [https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/],
>     I was curious to see whether Git for Windows had the same issue. And it
>     does not
>     [https://github.com/git-for-windows/git/runs/7556381815?check_suite_focus=true#step:5:127]:
>     it passes t5351 in 22 seconds, even while using test_cmp to compare pack
>     files
>     [https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90].
>
>     The answer is of course that a test helper written in C is much faster
>     than writing the same in Bash, especially on Windows. This is especially
>     sad when said Bash code is only used on Windows. So I pulled out this
>     helper from the years-long effort to let Git for Windows use BusyBox'
>     ash to run the test suite. The result is this patch, which has been in
>     Git for Windows since June 2018.
>
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v1
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v1
> Pull-Request: https://github.com/gitgitgadget/git/pull/1309
>
>  Makefile                |  1 +
>  t/helper/test-cmp.c     | 73 +++++++++++++++++++++++++++++++++++++++++
>  t/helper/test-tool.c    |  1 +
>  t/helper/test-tool.h    |  1 +
>  t/test-lib-functions.sh | 68 +-------------------------------------
>  t/test-lib.sh           |  2 +-
>  6 files changed, 78 insertions(+), 68 deletions(-)
>  create mode 100644 t/helper/test-cmp.c
>
> diff --git a/Makefile b/Makefile
> index 1624471badc..45f108e43a1 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -723,6 +723,7 @@ TEST_BUILTINS_OBJS += test-advise.o
>  TEST_BUILTINS_OBJS += test-bitmap.o
>  TEST_BUILTINS_OBJS += test-bloom.o
>  TEST_BUILTINS_OBJS += test-chmtime.o
> +TEST_BUILTINS_OBJS += test-cmp.o
>  TEST_BUILTINS_OBJS += test-config.o
>  TEST_BUILTINS_OBJS += test-crontab.o
>  TEST_BUILTINS_OBJS += test-csprng.o
> diff --git a/t/helper/test-cmp.c b/t/helper/test-cmp.c
> new file mode 100644
> index 00000000000..1c646a54bf6
> --- /dev/null
> +++ b/t/helper/test-cmp.c
> @@ -0,0 +1,73 @@
> +#include "test-tool.h"
> +#include "git-compat-util.h"
> +#include "strbuf.h"
> +#include "gettext.h"
> +#include "parse-options.h"
> +#include "run-command.h"
> +
> +#ifdef WIN32
> +#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid"
> +#else
> +#define NO_SUCH_DIR "/dev/null"
> +#endif
> +
> +static int run_diff(const char *path1, const char *path2)
> +{
> +	const char *argv[] = {
> +		"diff", "--no-index", NULL, NULL, NULL
> +	};
> +	const char *env[] = {
> +		"GIT_PAGER=cat",
> +		"GIT_DIR=" NO_SUCH_DIR,
> +		"HOME=" NO_SUCH_DIR,
> +		NULL
> +	};
> +
> +	argv[2] = path1;
> +	argv[3] = path2;
> +	return run_command_v_opt_cd_env(argv,
> +					RUN_COMMAND_NO_STDIN | RUN_GIT_CMD,
> +					NULL, env);
> +}
> +
> +int cmd__cmp(int argc, const char **argv)
> +{
> +	FILE *f0, *f1;
> +	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
> +
> +	if (argc != 3)
> +		die("Require exactly 2 arguments, got %d", argc);
> +
> +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
> +		return error_errno("could not open '%s'", argv[1]);
> +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
> +		fclose(f0);
> +		return error_errno("could not open '%s'", argv[2]);
> +	}
> +
> +	for (;;) {
> +		int r0 = strbuf_getline(&b0, f0);
> +		int r1 = strbuf_getline(&b1, f1);
> +
> +		if (r0 == EOF) {
> +			fclose(f0);
> +			fclose(f1);
> +			strbuf_release(&b0);
> +			strbuf_release(&b1);
> +			if (r1 == EOF)
> +				return 0;
> +cmp_failed:
> +			if (!run_diff(argv[1], argv[2]))
> +				die("Huh? 'diff --no-index %s %s' succeeded",
> +				    argv[1], argv[2]);
> +			return 1;
> +		}
> +		if (r1 == EOF || strbuf_cmp(&b0, &b1)) {
> +			fclose(f0);
> +			fclose(f1);
> +			strbuf_release(&b0);
> +			strbuf_release(&b1);
> +			goto cmp_failed;
> +		}
> +	}
> +}
> diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
> index 318fdbab0c3..3334de248a1 100644
> --- a/t/helper/test-tool.c
> +++ b/t/helper/test-tool.c
> @@ -18,6 +18,7 @@ static struct test_cmd cmds[] = {
>  	{ "bitmap", cmd__bitmap },
>  	{ "bloom", cmd__bloom },
>  	{ "chmtime", cmd__chmtime },
> +	{ "cmp", cmd__cmp },
>  	{ "config", cmd__config },
>  	{ "crontab", cmd__crontab },
>  	{ "csprng", cmd__csprng },
> diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
> index bb799271631..e1104898cc3 100644
> --- a/t/helper/test-tool.h
> +++ b/t/helper/test-tool.h
> @@ -8,6 +8,7 @@ int cmd__advise_if_enabled(int argc, const char **argv);
>  int cmd__bitmap(int argc, const char **argv);
>  int cmd__bloom(int argc, const char **argv);
>  int cmd__chmtime(int argc, const char **argv);
> +int cmd__cmp(int argc, const char **argv);
>  int cmd__config(int argc, const char **argv);
>  int cmd__crontab(int argc, const char **argv);
>  int cmd__csprng(int argc, const char **argv);
> diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
> index 8c44856eaec..28eddbc8e36 100644
> --- a/t/test-lib-functions.sh
> +++ b/t/test-lib-functions.sh
> @@ -1240,7 +1240,7 @@ test_expect_code () {
>
>  test_cmp () {
>  	test "$#" -ne 2 && BUG "2 param"
> -	eval "$GIT_TEST_CMP" '"$@"'
> +	GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"'
>  }
>
>  # Check that the given config key has the expected value.
> @@ -1541,72 +1541,6 @@ test_skip_or_die () {
>  	error "$2"
>  }
>
> -# The following mingw_* functions obey POSIX shell syntax, but are actually
> -# bash scripts, and are meant to be used only with bash on Windows.
> -
> -# A test_cmp function that treats LF and CRLF equal and avoids to fork
> -# diff when possible.
> -mingw_test_cmp () {
> -	# Read text into shell variables and compare them. If the results
> -	# are different, use regular diff to report the difference.
> -	local test_cmp_a= test_cmp_b=
> -
> -	# When text came from stdin (one argument is '-') we must feed it
> -	# to diff.
> -	local stdin_for_diff=
> -
> -	# Since it is difficult to detect the difference between an
> -	# empty input file and a failure to read the files, we go straight
> -	# to diff if one of the inputs is empty.
> -	if test -s "$1" && test -s "$2"
> -	then
> -		# regular case: both files non-empty
> -		mingw_read_file_strip_cr_ test_cmp_a <"$1"
> -		mingw_read_file_strip_cr_ test_cmp_b <"$2"
> -	elif test -s "$1" && test "$2" = -
> -	then
> -		# read 2nd file from stdin
> -		mingw_read_file_strip_cr_ test_cmp_a <"$1"
> -		mingw_read_file_strip_cr_ test_cmp_b
> -		stdin_for_diff='<<<"$test_cmp_b"'
> -	elif test "$1" = - && test -s "$2"
> -	then
> -		# read 1st file from stdin
> -		mingw_read_file_strip_cr_ test_cmp_a
> -		mingw_read_file_strip_cr_ test_cmp_b <"$2"
> -		stdin_for_diff='<<<"$test_cmp_a"'
> -	fi
> -	test -n "$test_cmp_a" &&
> -	test -n "$test_cmp_b" &&
> -	test "$test_cmp_a" = "$test_cmp_b" ||
> -	eval "diff -u \"\$@\" $stdin_for_diff"
> -}
> -
> -# $1 is the name of the shell variable to fill in
> -mingw_read_file_strip_cr_ () {
> -	# Read line-wise using LF as the line separator
> -	# and use IFS to strip CR.
> -	local line
> -	while :
> -	do
> -		if IFS=$'\r' read -r -d $'\n' line
> -		then
> -			# good
> -			line=$line$'\n'
> -		else
> -			# we get here at EOF, but also if the last line
> -			# was not terminated by LF; in the latter case,
> -			# some text was read
> -			if test -z "$line"
> -			then
> -				# EOF, really
> -				break
> -			fi
> -		fi
> -		eval "$1=\$$1\$line"
> -	done
> -}
> -
>  # Like "env FOO=BAR some-program", but run inside a subshell, which means
>  # it also works for shell functions (though those functions cannot impact
>  # the environment outside of the test_env invocation).
> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 7726d1da88a..220c259e796 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1546,7 +1546,7 @@ case $uname_s in
>  	test_set_prereq SED_STRIPS_CR
>  	test_set_prereq GREP_STRIPS_CR
>  	test_set_prereq WINDOWS
> -	GIT_TEST_CMP=mingw_test_cmp
> +	GIT_TEST_CMP="test-tool cmp"
>  	;;
>  *CYGWIN*)
>  	test_set_prereq POSIXPERM
>
> base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
> --
> gitgitgadget
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-07-29 14:53 [PATCH] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
  2022-07-29 14:54 ` Johannes Schindelin
@ 2022-07-29 16:44 ` Junio C Hamano
  2022-09-06 13:10   ` Johannes Schindelin
  2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
  2 siblings, 1 reply; 58+ messages in thread
From: Junio C Hamano @ 2022-07-29 16:44 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget; +Cc: git, Johannes Schindelin

"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
writes:

> +	const char *argv[] = {
> +		"diff", "--no-index", NULL, NULL, NULL
> +	};

Don't we want to have "--" before the two paths?

> +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
> +		return error_errno("could not open '%s'", argv[1]);
> +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
> +		fclose(f0);
> +		return error_errno("could not open '%s'", argv[2]);
> +	}

It is tricky that you need to take "-" and treat it as the standard
input stream in either argv[1] or argv[2] (but not both).  If would
be a different story in an end-user facing program, but because this
is a test helper, feeding wrong input is developer's fault, and I do
not mind lack of attention to detail of error checking to make sure
we avoid comparing alternating lines of the standard input.

> +	for (;;) {
> +		int r0 = strbuf_getline(&b0, f0);
> +		int r1 = strbuf_getline(&b1, f1);
> +
> +		if (r0 == EOF) {
> +			fclose(f0);
> +			fclose(f1);
> +			strbuf_release(&b0);
> +			strbuf_release(&b1);
> +			if (r1 == EOF)
> +				return 0;

If both hit the EOF at the same time, we know they are the same, OK.

> +cmp_failed:
> +			if (!run_diff(argv[1], argv[2]))

If one of argv[] was "-", then this wouldn't work correctly, as the
other file is read from the beginning but the "-" side have consumed
the initial part of the input and we cannot unseek it.  This bug
needs to be fixed only if we expect a useful and reliable output
from the helper.

But otherwise the idea is sound.  We compare them line by line,
using strbuf_getline() to ignore differences in CRLF and LF that
originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
random LF <> CRLF conversions, 2013-10-26).  Only when we find the
input different, we use "git diff --no-index" to make the difference
(and unfortunately more, as it does not ignore CRLF <> LF
differences) visible.

> +				die("Huh? 'diff --no-index %s %s' succeeded",
> +				    argv[1], argv[2]);

Nice attention to (possibly irrelevant) detail here.  I would have
ignored the return value and reported "they are different" at this
point, though.  The line-by-line comparison we did was the
authoritative one, and "git diff --no-index" is merely used for
human readable output.

In any case, "test-tool mingwcmp" would be a better name that
highlights the spirit of 4d715ac0 to ignore CRLF <> LF issues.  IOW,
it does a lot more than "cmp" replacement, and we shouldn't mislead
users/developers into thinking it is a plain "cmp" replacement.

Thanks.

> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 7726d1da88a..220c259e796 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1546,7 +1546,7 @@ case $uname_s in
>  	test_set_prereq SED_STRIPS_CR
>  	test_set_prereq GREP_STRIPS_CR
>  	test_set_prereq WINDOWS
> -	GIT_TEST_CMP=mingw_test_cmp
> +	GIT_TEST_CMP="test-tool cmp"
>  	;;
>  *CYGWIN*)
>  	test_set_prereq POSIXPERM
>
> base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-07-29 16:44 ` Junio C Hamano
@ 2022-09-06 13:10   ` Johannes Schindelin
  2022-09-07 12:09     ` René Scharfe
  0 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin @ 2022-09-06 13:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Schindelin via GitGitGadget, git

Hi Junio,

On Fri, 29 Jul 2022, Junio C Hamano wrote:

> "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
> writes:
>
> > +	const char *argv[] = {
> > +		"diff", "--no-index", NULL, NULL, NULL
> > +	};
>
> Don't we want to have "--" before the two paths?

Yes!

> > +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
> > +		return error_errno("could not open '%s'", argv[1]);
> > +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
> > +		fclose(f0);
> > +		return error_errno("could not open '%s'", argv[2]);
> > +	}
>
> It is tricky that you need to take "-" and treat it as the standard
> input stream in either argv[1] or argv[2] (but not both).  If would
> be a different story in an end-user facing program, but because this
> is a test helper, feeding wrong input is developer's fault, and I do
> not mind lack of attention to detail of error checking to make sure
> we avoid comparing alternating lines of the standard input.

No, you're right, I've added a guard that prevents `test-tool cmp - -`
from failing in obscure ways.

> > +	for (;;) {
> > +		int r0 = strbuf_getline(&b0, f0);
> > +		int r1 = strbuf_getline(&b1, f1);
> > +
> > +		if (r0 == EOF) {
> > +			fclose(f0);
> > +			fclose(f1);
> > +			strbuf_release(&b0);
> > +			strbuf_release(&b1);
> > +			if (r1 == EOF)
> > +				return 0;
>
> If both hit the EOF at the same time, we know they are the same, OK.
>
> > +cmp_failed:
> > +			if (!run_diff(argv[1], argv[2]))
>
> If one of argv[] was "-", then this wouldn't work correctly, as the
> other file is read from the beginning but the "-" side have consumed
> the initial part of the input and we cannot unseek it.  This bug
> needs to be fixed only if we expect a useful and reliable output
> from the helper.

Right. I've added a clause that says that we cannot show the diff because
`stdin` has been consumed already.

> But otherwise the idea is sound.  We compare them line by line,
> using strbuf_getline() to ignore differences in CRLF and LF that
> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
> random LF <> CRLF conversions, 2013-10-26).  Only when we find the
> input different, we use "git diff --no-index" to make the difference
> (and unfortunately more, as it does not ignore CRLF <> LF
> differences) visible.
>
> > +				die("Huh? 'diff --no-index %s %s' succeeded",
> > +				    argv[1], argv[2]);
>
> Nice attention to (possibly irrelevant) detail here.  I would have
> ignored the return value and reported "they are different" at this
> point, though.  The line-by-line comparison we did was the
> authoritative one, and "git diff --no-index" is merely used for
> human readable output.
>
> In any case, "test-tool mingwcmp" would be a better name that
> highlights the spirit of 4d715ac0 to ignore CRLF <> LF issues.  IOW,
> it does a lot more than "cmp" replacement, and we shouldn't mislead
> users/developers into thinking it is a plain "cmp" replacement.

Fair point. The Unix tool `cmp` does not care about line endings at all,
so when you come from a Unix background you will expect the same to be
true for `test-tool cmp`.

On the other hand, you will expect the same to be true for `test_cmp`,
too, which is not the case, and the root cause of why I had to come up
with 32ed3314c10 (t5351: avoid using `test_cmp` for binary data,
2022-07-29).

Having said that, I agree that the test tool name should reflect better
what the subcommand does.

I do dislike the proposed name `mingwcmp`. Not only because it is
misleading, as the purpose is not to compare MINGW-specific files but
instead the purpose is to compare text files (and, in fact, the tool works
just fine on Linux and macOS, too). But also because it would contribute
to just how much of a second-class citizen the MINGW-based build is in Git
land: From choosing to implement large parts, including the entire test
suite as well as the performance benchmarks, in POSIX scripts (which plays
to Windows' weaknesses in a big way) to massively favoring spawned
processes over multi-threading (which plays to Linux' strengths and to
Windows' weaknesses), to a still-inherent assumption that the underlying
filesystem is case-sensitive (think: branch names), to an implicit
agreement in the core Git community that patch contributions need not take
care of working well on Windows (but that that's the job "of Windows folk"
instead). This is kind of at odds with the fact that we must assume that
half of Git's users are Windows-based (we can only assume, based on
surveys, because we successfully avoid any kind of even opt-in telemetry
that would give us hard data). I definitely want to stay away from making
that second-citizenry even worse.

So I am going with the name `test-tool text-cmp` instead.

Thank you for your review,
Dscho

>
> Thanks.
>
> > diff --git a/t/test-lib.sh b/t/test-lib.sh
> > index 7726d1da88a..220c259e796 100644
> > --- a/t/test-lib.sh
> > +++ b/t/test-lib.sh
> > @@ -1546,7 +1546,7 @@ case $uname_s in
> >  	test_set_prereq SED_STRIPS_CR
> >  	test_set_prereq GREP_STRIPS_CR
> >  	test_set_prereq WINDOWS
> > -	GIT_TEST_CMP=mingw_test_cmp
> > +	GIT_TEST_CMP="test-tool cmp"
> >  	;;
> >  *CYGWIN*)
> >  	test_set_prereq POSIXPERM
> >
> > base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v2 0/2] tests: replace mingw_test_cmp with a helper in C
  2022-07-29 14:53 [PATCH] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
  2022-07-29 14:54 ` Johannes Schindelin
  2022-07-29 16:44 ` Junio C Hamano
@ 2022-09-06 13:10 ` Johannes Schindelin via GitGitGadget
  2022-09-06 13:10   ` [PATCH v2 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
                     ` (3 more replies)
  2 siblings, 4 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-09-06 13:10 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin

On the heels of sending a patch to fix a performance regression due to a
mis-use of test_cmp
[https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/],
I was curious to see whether Git for Windows had the same issue. And it does
not
[https://github.com/git-for-windows/git/runs/7556381815?check_suite_focus=true#step:5:127]:
it passes t5351 in 22 seconds, even while using test_cmp to compare pack
files
[https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90].

The answer is of course that a test helper written in C is much faster than
writing the same in Bash, especially on Windows. This is especially sad when
said Bash code is only used on Windows. So I pulled out this helper from the
years-long effort to let Git for Windows use BusyBox' ash to run the test
suite. The result is this patch, which has been in Git for Windows since
June 2018.

Changes since v1:

 * Fixed double "with" in the commit message.

Johannes Schindelin (2):
  t0021: use Windows-friendly `pwd`
  tests: replace mingw_test_cmp with a helper in C

 Makefile                 |  1 +
 t/helper/test-text-cmp.c | 78 ++++++++++++++++++++++++++++++++++++++++
 t/helper/test-tool.c     |  1 +
 t/helper/test-tool.h     |  1 +
 t/t0021-conversion.sh    |  4 +--
 t/test-lib-functions.sh  | 68 +----------------------------------
 t/test-lib.sh            |  2 +-
 7 files changed, 85 insertions(+), 70 deletions(-)
 create mode 100644 t/helper/test-text-cmp.c


base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/1309

Range-diff vs v1:

 -:  ----------- > 1:  ad7c41401ee t0021: use Windows-friendly `pwd`
 1:  eacbbc53700 ! 2:  1f5366f1379 tests: replace mingw_test_cmp with a helper in C
     @@ Commit message
          tests: replace mingw_test_cmp with a helper in C
      
          This helper is more performant than running the `mingw_test_cmp` code
     -    with with MSYS2's Bash. And a lot more readable.
     +    with MSYS2's Bash. And a lot more readable.
      
          To accommodate t1050, which wants to compare files weighing in with 3MB
          (falling outside of t1050's malloc limit of 1.5MB), we simply lift the
     @@ Commit message
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
       ## Makefile ##
     -@@ Makefile: TEST_BUILTINS_OBJS += test-advise.o
     - TEST_BUILTINS_OBJS += test-bitmap.o
     - TEST_BUILTINS_OBJS += test-bloom.o
     - TEST_BUILTINS_OBJS += test-chmtime.o
     -+TEST_BUILTINS_OBJS += test-cmp.o
     - TEST_BUILTINS_OBJS += test-config.o
     - TEST_BUILTINS_OBJS += test-crontab.o
     - TEST_BUILTINS_OBJS += test-csprng.o
     +@@ Makefile: TEST_BUILTINS_OBJS += test-string-list.o
     + TEST_BUILTINS_OBJS += test-submodule-config.o
     + TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
     + TEST_BUILTINS_OBJS += test-subprocess.o
     ++TEST_BUILTINS_OBJS += test-text-cmp.o
     + TEST_BUILTINS_OBJS += test-trace2.o
     + TEST_BUILTINS_OBJS += test-urlmatch-normalization.o
     + TEST_BUILTINS_OBJS += test-userdiff.o
      
     - ## t/helper/test-cmp.c (new) ##
     + ## t/helper/test-text-cmp.c (new) ##
      @@
      +#include "test-tool.h"
      +#include "git-compat-util.h"
     @@ t/helper/test-cmp.c (new)
      +static int run_diff(const char *path1, const char *path2)
      +{
      +	const char *argv[] = {
     -+		"diff", "--no-index", NULL, NULL, NULL
     ++		"diff", "--no-index", "--", NULL, NULL, NULL
      +	};
      +	const char *env[] = {
      +		"GIT_PAGER=cat",
     @@ t/helper/test-cmp.c (new)
      +		NULL
      +	};
      +
     -+	argv[2] = path1;
     -+	argv[3] = path2;
     ++	argv[3] = path1;
     ++	argv[4] = path2;
      +	return run_command_v_opt_cd_env(argv,
      +					RUN_COMMAND_NO_STDIN | RUN_GIT_CMD,
      +					NULL, env);
      +}
      +
     -+int cmd__cmp(int argc, const char **argv)
     ++int cmd__text_cmp(int argc, const char **argv)
      +{
      +	FILE *f0, *f1;
      +	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
     @@ t/helper/test-cmp.c (new)
      +	if (argc != 3)
      +		die("Require exactly 2 arguments, got %d", argc);
      +
     ++	if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-"))
     ++		die("only one parameter can refer to `stdin` but not both");
     ++
      +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
      +		return error_errno("could not open '%s'", argv[1]);
      +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
     @@ t/helper/test-cmp.c (new)
      +			if (r1 == EOF)
      +				return 0;
      +cmp_failed:
     -+			if (!run_diff(argv[1], argv[2]))
     ++			if (!strcmp(argv[1], "-") || !strcmp(argv[2], "-"))
     ++				warning("cannot show diff because `stdin` was already consumed");
     ++			else if (!run_diff(argv[1], argv[2]))
      +				die("Huh? 'diff --no-index %s %s' succeeded",
      +				    argv[1], argv[2]);
      +			return 1;
     @@ t/helper/test-cmp.c (new)
      
       ## t/helper/test-tool.c ##
      @@ t/helper/test-tool.c: static struct test_cmd cmds[] = {
     - 	{ "bitmap", cmd__bitmap },
     - 	{ "bloom", cmd__bloom },
     - 	{ "chmtime", cmd__chmtime },
     -+	{ "cmp", cmd__cmp },
     - 	{ "config", cmd__config },
     - 	{ "crontab", cmd__crontab },
     - 	{ "csprng", cmd__csprng },
     + 	{ "submodule-config", cmd__submodule_config },
     + 	{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
     + 	{ "subprocess", cmd__subprocess },
     ++	{ "text-cmp", cmd__text_cmp },
     + 	{ "trace2", cmd__trace2 },
     + 	{ "userdiff", cmd__userdiff },
     + 	{ "urlmatch-normalization", cmd__urlmatch_normalization },
      
       ## t/helper/test-tool.h ##
     -@@ t/helper/test-tool.h: int cmd__advise_if_enabled(int argc, const char **argv);
     - int cmd__bitmap(int argc, const char **argv);
     - int cmd__bloom(int argc, const char **argv);
     - int cmd__chmtime(int argc, const char **argv);
     -+int cmd__cmp(int argc, const char **argv);
     - int cmd__config(int argc, const char **argv);
     - int cmd__crontab(int argc, const char **argv);
     - int cmd__csprng(int argc, const char **argv);
     +@@ t/helper/test-tool.h: int cmd__string_list(int argc, const char **argv);
     + int cmd__submodule_config(int argc, const char **argv);
     + int cmd__submodule_nested_repo_config(int argc, const char **argv);
     + int cmd__subprocess(int argc, const char **argv);
     ++int cmd__text_cmp(int argc, const char **argv);
     + int cmd__trace2(int argc, const char **argv);
     + int cmd__userdiff(int argc, const char **argv);
     + int cmd__urlmatch_normalization(int argc, const char **argv);
      
       ## t/test-lib-functions.sh ##
      @@ t/test-lib-functions.sh: test_expect_code () {
     @@ t/test-lib.sh: case $uname_s in
       	test_set_prereq GREP_STRIPS_CR
       	test_set_prereq WINDOWS
      -	GIT_TEST_CMP=mingw_test_cmp
     -+	GIT_TEST_CMP="test-tool cmp"
     ++	GIT_TEST_CMP="test-tool text-cmp"
       	;;
       *CYGWIN*)
       	test_set_prereq POSIXPERM

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v2 1/2] t0021: use Windows-friendly `pwd`
  2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
@ 2022-09-06 13:10   ` Johannes Schindelin via GitGitGadget
  2022-09-06 13:10   ` [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-09-06 13:10 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

In Git for Windows, when passing paths from shell scripts to regular
Win32 executables, thanks to the MSYS2 runtime a somewhat magic path
conversion happens that lets the shell script think that there is a file
at `/git/Makefile` and the Win32 process it spawned thinks that the
shell script said `C:/git-sdk-64/git/Makefile` instead.

This conversion is documented in detail over here:
https://www.msys2.org/docs/filesystem-paths/#automatic-unix-windows-path-conversion

As all automatic conversions, there are gaps. For example, to avoid
mistaking command-line options like `/LOG=log.txt` (which are quite
common in the Windows world) from being mistaken for a Unix-style
absolute path, the MSYS2 runtime specifically exempts arguments
containing a `=` character from that conversion.

We are about to change `test_cmp` to use a test helper, which is such a
Win32 process.

In combination, this would cause a failure in `t0021-conversion.sh`
where we pass an absolute path containing an equal character to the
`test_cmp` function.

Seeing as the Unix tools like `cp` and `diff` that are used by Git's
test suite in the Git for Windows SDK (thanks to the MSYS2 project)
understand both Unix-style as well as Windows-style paths, we can stave
off this problem by simply switching to Windows-style paths and
side-stepping the need for any automatic path conversion.

Note: The `PATH` variable is obviously special, as it is colon-separated
in the MSYS2 Bash used by Git for Windows, and therefore _cannot_
contain absolute Windows-style paths, lest the colon after the drive
letter is mistaken for a path separator. Therefore, we need to be
careful to keep the Unix-style when modifying the `PATH` variable.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/t0021-conversion.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index bad37abad2c..15482fa78e3 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -8,8 +8,8 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-terminal.sh

-TEST_ROOT="$PWD"
-PATH=$TEST_ROOT:$PATH
+PATH=$PWD:$PATH
+TEST_ROOT="$(pwd)"

 write_script <<\EOF "$TEST_ROOT/rot13.sh"
 tr \
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C
  2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
  2022-09-06 13:10   ` [PATCH v2 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
@ 2022-09-06 13:10   ` Johannes Schindelin via GitGitGadget
  2022-09-07 11:57     ` Ævar Arnfjörð Bjarmason
  2022-09-07  9:04   ` [PATCH v2 0/2] " Johannes Schindelin
  2022-11-12 22:07   ` [PATCH v3 " Johannes Schindelin via GitGitGadget
  3 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-09-06 13:10 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

This helper is more performant than running the `mingw_test_cmp` code
with MSYS2's Bash. And a lot more readable.

To accommodate t1050, which wants to compare files weighing in with 3MB
(falling outside of t1050's malloc limit of 1.5MB), we simply lift the
allocation limit by setting the environment variable GIT_ALLOC_LIMIT to
zero when calling the helper.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Makefile                 |  1 +
 t/helper/test-text-cmp.c | 78 ++++++++++++++++++++++++++++++++++++++++
 t/helper/test-tool.c     |  1 +
 t/helper/test-tool.h     |  1 +
 t/test-lib-functions.sh  | 68 +----------------------------------
 t/test-lib.sh            |  2 +-
 6 files changed, 83 insertions(+), 68 deletions(-)
 create mode 100644 t/helper/test-text-cmp.c

diff --git a/Makefile b/Makefile
index 1624471badc..73db55bba0f 100644
--- a/Makefile
+++ b/Makefile
@@ -786,6 +786,7 @@ TEST_BUILTINS_OBJS += test-string-list.o
 TEST_BUILTINS_OBJS += test-submodule-config.o
 TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
 TEST_BUILTINS_OBJS += test-subprocess.o
+TEST_BUILTINS_OBJS += test-text-cmp.o
 TEST_BUILTINS_OBJS += test-trace2.o
 TEST_BUILTINS_OBJS += test-urlmatch-normalization.o
 TEST_BUILTINS_OBJS += test-userdiff.o
diff --git a/t/helper/test-text-cmp.c b/t/helper/test-text-cmp.c
new file mode 100644
index 00000000000..7c26d925086
--- /dev/null
+++ b/t/helper/test-text-cmp.c
@@ -0,0 +1,78 @@
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "gettext.h"
+#include "parse-options.h"
+#include "run-command.h"
+
+#ifdef WIN32
+#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid"
+#else
+#define NO_SUCH_DIR "/dev/null"
+#endif
+
+static int run_diff(const char *path1, const char *path2)
+{
+	const char *argv[] = {
+		"diff", "--no-index", "--", NULL, NULL, NULL
+	};
+	const char *env[] = {
+		"GIT_PAGER=cat",
+		"GIT_DIR=" NO_SUCH_DIR,
+		"HOME=" NO_SUCH_DIR,
+		NULL
+	};
+
+	argv[3] = path1;
+	argv[4] = path2;
+	return run_command_v_opt_cd_env(argv,
+					RUN_COMMAND_NO_STDIN | RUN_GIT_CMD,
+					NULL, env);
+}
+
+int cmd__text_cmp(int argc, const char **argv)
+{
+	FILE *f0, *f1;
+	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
+
+	if (argc != 3)
+		die("Require exactly 2 arguments, got %d", argc);
+
+	if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-"))
+		die("only one parameter can refer to `stdin` but not both");
+
+	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
+		return error_errno("could not open '%s'", argv[1]);
+	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
+		fclose(f0);
+		return error_errno("could not open '%s'", argv[2]);
+	}
+
+	for (;;) {
+		int r0 = strbuf_getline(&b0, f0);
+		int r1 = strbuf_getline(&b1, f1);
+
+		if (r0 == EOF) {
+			fclose(f0);
+			fclose(f1);
+			strbuf_release(&b0);
+			strbuf_release(&b1);
+			if (r1 == EOF)
+				return 0;
+cmp_failed:
+			if (!strcmp(argv[1], "-") || !strcmp(argv[2], "-"))
+				warning("cannot show diff because `stdin` was already consumed");
+			else if (!run_diff(argv[1], argv[2]))
+				die("Huh? 'diff --no-index %s %s' succeeded",
+				    argv[1], argv[2]);
+			return 1;
+		}
+		if (r1 == EOF || strbuf_cmp(&b0, &b1)) {
+			fclose(f0);
+			fclose(f1);
+			strbuf_release(&b0);
+			strbuf_release(&b1);
+			goto cmp_failed;
+		}
+	}
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 318fdbab0c3..c6654ebc48b 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -81,6 +81,7 @@ static struct test_cmd cmds[] = {
 	{ "submodule-config", cmd__submodule_config },
 	{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
 	{ "subprocess", cmd__subprocess },
+	{ "text-cmp", cmd__text_cmp },
 	{ "trace2", cmd__trace2 },
 	{ "userdiff", cmd__userdiff },
 	{ "urlmatch-normalization", cmd__urlmatch_normalization },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index bb799271631..2acfd2bcabc 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -71,6 +71,7 @@ int cmd__string_list(int argc, const char **argv);
 int cmd__submodule_config(int argc, const char **argv);
 int cmd__submodule_nested_repo_config(int argc, const char **argv);
 int cmd__subprocess(int argc, const char **argv);
+int cmd__text_cmp(int argc, const char **argv);
 int cmd__trace2(int argc, const char **argv);
 int cmd__userdiff(int argc, const char **argv);
 int cmd__urlmatch_normalization(int argc, const char **argv);
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8c44856eaec..28eddbc8e36 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1240,7 +1240,7 @@ test_expect_code () {
 
 test_cmp () {
 	test "$#" -ne 2 && BUG "2 param"
-	eval "$GIT_TEST_CMP" '"$@"'
+	GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"'
 }
 
 # Check that the given config key has the expected value.
@@ -1541,72 +1541,6 @@ test_skip_or_die () {
 	error "$2"
 }
 
-# The following mingw_* functions obey POSIX shell syntax, but are actually
-# bash scripts, and are meant to be used only with bash on Windows.
-
-# A test_cmp function that treats LF and CRLF equal and avoids to fork
-# diff when possible.
-mingw_test_cmp () {
-	# Read text into shell variables and compare them. If the results
-	# are different, use regular diff to report the difference.
-	local test_cmp_a= test_cmp_b=
-
-	# When text came from stdin (one argument is '-') we must feed it
-	# to diff.
-	local stdin_for_diff=
-
-	# Since it is difficult to detect the difference between an
-	# empty input file and a failure to read the files, we go straight
-	# to diff if one of the inputs is empty.
-	if test -s "$1" && test -s "$2"
-	then
-		# regular case: both files non-empty
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-	elif test -s "$1" && test "$2" = -
-	then
-		# read 2nd file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b
-		stdin_for_diff='<<<"$test_cmp_b"'
-	elif test "$1" = - && test -s "$2"
-	then
-		# read 1st file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-		stdin_for_diff='<<<"$test_cmp_a"'
-	fi
-	test -n "$test_cmp_a" &&
-	test -n "$test_cmp_b" &&
-	test "$test_cmp_a" = "$test_cmp_b" ||
-	eval "diff -u \"\$@\" $stdin_for_diff"
-}
-
-# $1 is the name of the shell variable to fill in
-mingw_read_file_strip_cr_ () {
-	# Read line-wise using LF as the line separator
-	# and use IFS to strip CR.
-	local line
-	while :
-	do
-		if IFS=$'\r' read -r -d $'\n' line
-		then
-			# good
-			line=$line$'\n'
-		else
-			# we get here at EOF, but also if the last line
-			# was not terminated by LF; in the latter case,
-			# some text was read
-			if test -z "$line"
-			then
-				# EOF, really
-				break
-			fi
-		fi
-		eval "$1=\$$1\$line"
-	done
-}
-
 # Like "env FOO=BAR some-program", but run inside a subshell, which means
 # it also works for shell functions (though those functions cannot impact
 # the environment outside of the test_env invocation).
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 7726d1da88a..0be25ecbd59 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1546,7 +1546,7 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
+	GIT_TEST_CMP="test-tool text-cmp"
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 0/2] tests: replace mingw_test_cmp with a helper in C
  2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
  2022-09-06 13:10   ` [PATCH v2 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
  2022-09-06 13:10   ` [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
@ 2022-09-07  9:04   ` Johannes Schindelin
  2022-11-12 22:07   ` [PATCH v3 " Johannes Schindelin via GitGitGadget
  3 siblings, 0 replies; 58+ messages in thread
From: Johannes Schindelin @ 2022-09-07  9:04 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget; +Cc: git

Hi,

On Tue, 6 Sep 2022, Johannes Schindelin via GitGitGadget wrote:

> Changes since v1:
>
>  * Fixed double "with" in the commit message.

Whoops, I totally forgot to mention these two things:


- Renamed the test helper to `text-cmp`.
- Made the `diff --no-index` call more robust by using a double-dash
  separator.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C
  2022-09-06 13:10   ` [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
@ 2022-09-07 11:57     ` Ævar Arnfjörð Bjarmason
  2022-09-07 12:24       ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-09-07 11:57 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget; +Cc: git, Johannes Schindelin

On Tue, Sep 06 2022, Johannes Schindelin via GitGitGadget wrote:

> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> [...]
> +++ b/t/helper/test-text-cmp.c
> @@ -0,0 +1,78 @@
> +#include "test-tool.h"
> +#include "git-compat-util.h"
> +#include "strbuf.h"
> +#include "gettext.h"

Superflous header? Compiles without gettext.h for me (and we shouldn't
use i18n in test helpers).

> [...]
> +int cmd__text_cmp(int argc, const char **argv)
> +{
> +	FILE *f0, *f1;
> +	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
> +
> +	if (argc != 3)
> +		die("Require exactly 2 arguments, got %d", argc);

Here you conflate the argc v.s. arguments minus the "text-cmp",
resulting in:

	helper/test-tool text-cmp 2
        fatal: Require exactly 2 arguments, got 2

An argc-- argv++ at the beginning seems like the easiest way out of
this. Also s/Require/require/ per CodingGuidelines.

> +	if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-"))
> +		die("only one parameter can refer to `stdin` but not both");
> +
> +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
> +		return error_errno("could not open '%s'", argv[1]);
> +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
> +		fclose(f0);
> +		return error_errno("could not open '%s'", argv[2]);
> +	}

Faithfully emulating the old version. I do wonder if we couldn't simply
adjust the handful of tests that actually make use of the "-" diff(1)
feature. AFAICT there's around 10 of those at most, and they all seem
like cases where it would be easy to change:

	(echo foo) | test_cmp - actual

Or whatever, to:

	echo foo >expected &&
	test_cmp expected actual

...

> +			if (!strcmp(argv[1], "-") || !strcmp(argv[2], "-"))
> +				warning("cannot show diff because `stdin` was already consumed");

...

Which means we wouldn't need to punt on this.

> +			else if (!run_diff(argv[1], argv[2]))
> +				die("Huh? 'diff --no-index %s %s' succeeded",
> +				    argv[1], argv[2]);

I tried manually testing this with:

	GIT_TRACE=1 GIT_TEST_CMP="/home/avar/g/git/git diff --no-index --" ./t0021-conversion.sh  -vixd

v.s.:

	GIT_TRACE=1 GIT_TEST_CMP="$PWD/helper/test-tool text-cmp" ./t0021-conversion.sh  -vixd

Your version doesn't get confused by the same, but AFAICT this is by
fragile accident.

I.e. you run your own equivalent of "cmp", so because the files are the
same in that case we don't run the "diff --no-index".

But the "diff --no-index" in that t0021*.sh case *would* report
differences, even though the files are byte-for-byte identical.

So the "cmp"-a-like here isn't just an optimization to avoid forking the
"git diff" process, it's an entirely different comparison method in
cases where we have a "filter".

It just so happens that our test suite doesn't currently combine them in
a way that causes a current failure.

>  test_cmp () {
>  	test "$#" -ne 2 && BUG "2 param"
> -	eval "$GIT_TEST_CMP" '"$@"'
> +	GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"'
>  }

Further, we have a clear boundary in the test suite between "git" and
"test-tool" things we invoke, and third party tools. The former we put
in "test_must_fail_acceptable".

When using this new helper we'd hide potential segfaults and BUGs in any
"! test_cmp" invocation..

To avoid the introduction of such a blindspot we'd need to change
"test_cmp" to take an optional "!" as the 1st argument, and convert the
existing "! test_cmp" to "test_cmp !", then carry some flag to indicate
that our "GIT_TEST_CMP" is a git or test-tool invocation, and check it
appropriately.

> [...]
> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 7726d1da88a..0be25ecbd59 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1546,7 +1546,7 @@ case $uname_s in
>  	test_set_prereq SED_STRIPS_CR
>  	test_set_prereq GREP_STRIPS_CR
>  	test_set_prereq WINDOWS
> -	GIT_TEST_CMP=mingw_test_cmp
> +	GIT_TEST_CMP="test-tool text-cmp"
>  	;;
>  *CYGWIN*)
>  	test_set_prereq POSIXPERM

Not a new problem, but this is incompatible with
GIT_TEST_CMP_USE_COPIED_CONTEXT.

What is new though is that with this series there's no longer a good
reason AFAICT to carry GIT_TEST_CMP_USE_COPIED_CONTEXT at all. I.e. we
have it for a "diff" that doesn't understand "-u".

If (after getting past tho caveats noted above) we could simply invoke
our own test-tool we could drop that special-casing & just always invoke
our own test_cmp helper.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-06 13:10   ` Johannes Schindelin
@ 2022-09-07 12:09     ` René Scharfe
  2022-09-07 16:25       ` Junio C Hamano
  0 siblings, 1 reply; 58+ messages in thread
From: René Scharfe @ 2022-09-07 12:09 UTC (permalink / raw)
  To: Johannes Schindelin, Junio C Hamano
  Cc: Johannes Schindelin via GitGitGadget, git

Am 06.09.22 um 15:10 schrieb Johannes Schindelin:
> Hi Junio,
>
> On Fri, 29 Jul 2022, Junio C Hamano wrote:
>
>> "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
>> writes:
>>
>>> +	const char *argv[] = {
>>> +		"diff", "--no-index", NULL, NULL, NULL
>>> +	};
>>
>> Don't we want to have "--" before the two paths?
>
> Yes!
>
>>> +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
>>> +		return error_errno("could not open '%s'", argv[1]);
>>> +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
>>> +		fclose(f0);
>>> +		return error_errno("could not open '%s'", argv[2]);
>>> +	}
>>
>> It is tricky that you need to take "-" and treat it as the standard
>> input stream in either argv[1] or argv[2] (but not both).  If would
>> be a different story in an end-user facing program, but because this
>> is a test helper, feeding wrong input is developer's fault, and I do
>> not mind lack of attention to detail of error checking to make sure
>> we avoid comparing alternating lines of the standard input.

"git diff --no-index - -" also doesn't complain, by the way.

> No, you're right, I've added a guard that prevents `test-tool cmp - -`
> from failing in obscure ways.
>
>>> +	for (;;) {
>>> +		int r0 = strbuf_getline(&b0, f0);
>>> +		int r1 = strbuf_getline(&b1, f1);
>>> +
>>> +		if (r0 == EOF) {
>>> +			fclose(f0);
>>> +			fclose(f1);
>>> +			strbuf_release(&b0);
>>> +			strbuf_release(&b1);
>>> +			if (r1 == EOF)
>>> +				return 0;
>>
>> If both hit the EOF at the same time, we know they are the same, OK.
>>
>>> +cmp_failed:
>>> +			if (!run_diff(argv[1], argv[2]))
>>
>> If one of argv[] was "-", then this wouldn't work correctly, as the
>> other file is read from the beginning but the "-" side have consumed
>> the initial part of the input and we cannot unseek it.  This bug
>> needs to be fixed only if we expect a useful and reliable output
>> from the helper.
>
> Right. I've added a clause that says that we cannot show the diff because
> `stdin` has been consumed already.
>
>> But otherwise the idea is sound.  We compare them line by line,
>> using strbuf_getline() to ignore differences in CRLF and LF that
>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
>> random LF <> CRLF conversions, 2013-10-26).  Only when we find the
>> input different, we use "git diff --no-index" to make the difference
>> (and unfortunately more, as it does not ignore CRLF <> LF
>> differences) visible.

Why not use "git diff --no-index --ignore-cr-at-eol"?  Do you even need
to wrap it?

René


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 11:57     ` Ævar Arnfjörð Bjarmason
@ 2022-09-07 12:24       ` Ævar Arnfjörð Bjarmason
  2022-09-07 19:45         ` Junio C Hamano
  0 siblings, 1 reply; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-09-07 12:24 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget; +Cc: git, Johannes Schindelin


On Wed, Sep 07 2022, Ævar Arnfjörð Bjarmason wrote:

> On Tue, Sep 06 2022, Johannes Schindelin via GitGitGadget wrote:
>
>> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>> [...]
>> +++ b/t/helper/test-text-cmp.c
>> @@ -0,0 +1,78 @@
>> +#include "test-tool.h"
>> +#include "git-compat-util.h"
>> +#include "strbuf.h"
>> +#include "gettext.h"
>
> Superflous header? Compiles without gettext.h for me (and we shouldn't
> use i18n in test helpers).
>
>> [...]
>> +int cmd__text_cmp(int argc, const char **argv)
>> +{
>> +	FILE *f0, *f1;
>> +	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
>> +
>> +	if (argc != 3)
>> +		die("Require exactly 2 arguments, got %d", argc);
>
> Here you conflate the argc v.s. arguments minus the "text-cmp",
> resulting in:
>
> 	helper/test-tool text-cmp 2
>         fatal: Require exactly 2 arguments, got 2
>
> An argc-- argv++ at the beginning seems like the easiest way out of
> this. Also s/Require/require/ per CodingGuidelines.
>
>> +	if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-"))
>> +		die("only one parameter can refer to `stdin` but not both");
>> +
>> +	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
>> +		return error_errno("could not open '%s'", argv[1]);
>> +	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
>> +		fclose(f0);
>> +		return error_errno("could not open '%s'", argv[2]);
>> +	}
>
> Faithfully emulating the old version. I do wonder if we couldn't simply
> adjust the handful of tests that actually make use of the "-" diff(1)
> feature. AFAICT there's around 10 of those at most, and they all seem
> like cases where it would be easy to change:
>
> 	(echo foo) | test_cmp - actual
>
> Or whatever, to:
>
> 	echo foo >expected &&
> 	test_cmp expected actual
>
> ...
>
>> +			if (!strcmp(argv[1], "-") || !strcmp(argv[2], "-"))
>> +				warning("cannot show diff because `stdin` was already consumed");
>
> ...
>
> Which means we wouldn't need to punt on this.
>
>> +			else if (!run_diff(argv[1], argv[2]))
>> +				die("Huh? 'diff --no-index %s %s' succeeded",
>> +				    argv[1], argv[2]);
>
> I tried manually testing this with:
>
> 	GIT_TRACE=1 GIT_TEST_CMP="/home/avar/g/git/git diff --no-index --" ./t0021-conversion.sh  -vixd
>
> v.s.:
>
> 	GIT_TRACE=1 GIT_TEST_CMP="$PWD/helper/test-tool text-cmp" ./t0021-conversion.sh  -vixd
>
> Your version doesn't get confused by the same, but AFAICT this is by
> fragile accident.
>
> I.e. you run your own equivalent of "cmp", so because the files are the
> same in that case we don't run the "diff --no-index".
>
> But the "diff --no-index" in that t0021*.sh case *would* report
> differences, even though the files are byte-for-byte identical.
>
> So the "cmp"-a-like here isn't just an optimization to avoid forking the
> "git diff" process, it's an entirely different comparison method in
> cases where we have a "filter".
>
> It just so happens that our test suite doesn't currently combine them in
> a way that causes a current failure.

Ah (partially?) I spoke too soon on this part. I.e. the
GIT_DIR=/dev/null precludes reading the filter/repo in this case. So I
*think* we're out of the woods as far as this is concerned.

Still, it would be nice to document in a code comment or commit message
that the "not read the local repo's filter" is absolutely critical here.

But I think that re-raises the point René had in:
https://lore.kernel.org/git/b21d2b60-428f-58ec-28b6-3c617b9f2e45@web.de/

I ran the full test suite with:

	GIT_TEST_CMP='GIT_DIR=/dev/null HOME=/dev/null /usr/bin/git diff --no-index --ignore-cr-at-eol --'

And all of it passes, except for a test in t0001-init.sh which we could
fix up as:
	
	diff --git a/t/t0001-init.sh b/t/t0001-init.sh
	index d479303efa0..d65afe7cceb 100755
	--- a/t/t0001-init.sh
	+++ b/t/t0001-init.sh
	@@ -426,7 +426,7 @@ test_expect_success SYMLINKS 're-init to move gitdir symlink' '
	 	git init --separate-git-dir ../realgitdir
	 	) &&
	 	echo "gitdir: $(pwd)/realgitdir" >expected &&
	-	test_cmp expected newdir/.git &&
	+	test "$(test_readlink newdir/.git)" = here &&
	 	test_cmp expected newdir/here &&
	 	test_path_is_dir realgitdir/refs
	 '

Which without this series is more correct, as all we're re-testing there
is whether the symlink is pointing to what we expect. A hypothetical
"--dereference" to "git diff" would also take care of it (the equivalent
of "--no-dereference" being the default).

But with that all tests pass for me, so I'm puzzled as to the need for
the new helper, as opposed to just constructing the command above and
sticking it in GIT_TEST_CMP ...
	

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 12:09     ` René Scharfe
@ 2022-09-07 16:25       ` Junio C Hamano
  2022-09-07 21:45         ` Re* " Junio C Hamano
                           ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-09-07 16:25 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git

René Scharfe <l.s.r@web.de> writes:

> "git diff --no-index - -" also doesn't complain, by the way.

True, but in this case hopefully it is worth to call it out, as both
this code that uses "diff --no-index" and "diff --no-index" itself
came from the same author ;-)

I think "git diff --no-index - -" should just exit 0 after slurping
all its input (i.e. allow it to be placed downstream of a pipe
without blocking the upstream), but it is also fine to exit with 0
without reading a single byte from the standard input.  Of course
the latter is easier to implement ;-)

>>> But otherwise the idea is sound.  We compare them line by line,
>>> using strbuf_getline() to ignore differences in CRLF and LF that
>>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
>>> random LF <> CRLF conversions, 2013-10-26).  Only when we find the
>>> input different, we use "git diff --no-index" to make the difference
>>> (and unfortunately more, as it does not ignore CRLF <> LF
>>> differences) visible.
>
> Why not use "git diff --no-index --ignore-cr-at-eol"?  Do you even need
> to wrap it?

Hmph.  That surely sounds sensible if it works, and I offhand do not
see why it shouldn't work.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 12:24       ` Ævar Arnfjörð Bjarmason
@ 2022-09-07 19:45         ` Junio C Hamano
  0 siblings, 0 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-09-07 19:45 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Johannes Schindelin via GitGitGadget, git, Johannes Schindelin

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> But I think that re-raises the point René had in:
> https://lore.kernel.org/git/b21d2b60-428f-58ec-28b6-3c617b9f2e45@web.de/

As the primary point of no-index mode was to expose fancy options
"git diff" has to comparisons of files outside version control,
without having to go through the trouble of upstreaming changes to
GNU diff, I do think "--ignore-cr-at-eol" should work fine with it,
and René's idea sounds like the best implementation for the
test-text-cmp helper command.

Thanks.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re* [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 16:25       ` Junio C Hamano
@ 2022-09-07 21:45         ` Junio C Hamano
  2022-09-07 22:39           ` René Scharfe
  2022-09-08  8:59         ` René Scharfe
  2022-09-08 20:54         ` Johannes Schindelin
  2 siblings, 1 reply; 58+ messages in thread
From: Junio C Hamano @ 2022-09-07 21:45 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git

Junio C Hamano <gitster@pobox.com> writes:

> René Scharfe <l.s.r@web.de> writes:
>
>> "git diff --no-index - -" also doesn't complain, by the way.
>
> True, but in this case hopefully it is worth to call it out, as both
> this code that uses "diff --no-index" and "diff --no-index" itself
> came from the same author ;-)
>
> I think "git diff --no-index - -" should just exit 0 after slurping
> all its input (i.e. allow it to be placed downstream of a pipe
> without blocking the upstream), but it is also fine to exit with 0
> without reading a single byte from the standard input.  Of course
> the latter is easier to implement ;-)


----- >8 --------- >8 --------- >8 --------- >8 --------- >8 -----
Subject: [PATCH] diff: "--no-index - -" compares the same standard input

Conceptually, "git diff --no-index - A" slurps the standard input to
the EOF and compares it with the contents of A, so it is a natural
extension for "git diff --no-index - -" to slurp the standard input
to the end, and compares it with itself, which should always yield
"true".

There can be two plausible implementations.  We could read and
discard the input to the end and exit 0.  It would allow us to avoid
sigpipe on the upstream command

    $ dd if=/dev/zero bs=1m count=1 | git diff --no-index - -

As we can tell the outcome without even consuming a single byte from
the standard input, we can just notice the two input files specified
are "-" and immediately exit.  It would allow us to give a correct
answer to

    $ git diff --no-index - - </dev/full

but can kill the command on the upstream side of a pipe that feeds
us.

We pick the latter (i.e. not touching the input at all), simply
because it is more efficient.  Data producer that can be placed on
the upstream side of a pipe should always be prepared for a consumer
that does not consume all its output (e.g. "head" and pager) anyway.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff-no-index.c          | 11 +++++++++++
 t/t4053-diff-no-index.sh | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/diff-no-index.c b/diff-no-index.c
index 18edbdf4b5..940b66b4c8 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -278,6 +278,17 @@ int diff_no_index(struct rev_info *revs,
 			p = to_free[i] = prefix_filename(prefix, p);
 		paths[i] = p;
 	}
+	if (paths[0] == file_from_standard_input &&
+	    paths[1] == file_from_standard_input) {
+		/*
+		 * "git diff --no-index - -".  We are asked to compare
+		 * what came from the standard input with itself; we
+		 * know they are the same without even reading a
+		 * single byte.
+		 */
+		ret = 0;
+		goto out;
+	}
 
 	fixup_paths(paths, &replacement);
 
diff --git a/t/t4053-diff-no-index.sh b/t/t4053-diff-no-index.sh
index 3feadf0e35..d089d9c2b1 100755
--- a/t/t4053-diff-no-index.sh
+++ b/t/t4053-diff-no-index.sh
@@ -156,6 +156,16 @@ test_expect_success 'diff --no-index normalizes mode: no changes' '
 	test_must_be_empty out
 '
 
+test_expect_success 'diff --no-index with standard input' '
+	echo foo >x &&
+	echo bar >z &&
+	git diff --no-index - x <x &&
+	git diff --no-index x - <x &&
+	test_must_fail git diff --no-index - x <z &&
+	test_must_fail git diff --no-index x - <z &&
+	git diff --no-index - - <x
+'
+
 test_expect_success POSIXPERM 'diff --no-index normalizes mode: chmod +x' '
 	chmod +x y &&
 	cat >expected <<-\EOF &&
@@ -180,6 +190,7 @@ test_expect_success POSIXPERM 'diff --no-index normalizes: mode not like git mod
 '
 
 test_expect_success POSIXPERM,SYMLINKS 'diff --no-index normalizes: mode not like git mode (symlink)' '
+	rm -f z &&
 	ln -s y z &&
 	X_OID=$(git hash-object --stdin <x) &&
 	Z_OID=$(printf y | git hash-object --stdin) &&
-- 
2.37.3-732-ge93f299430


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: Re* [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 21:45         ` Re* " Junio C Hamano
@ 2022-09-07 22:39           ` René Scharfe
  2022-09-08  0:03             ` Junio C Hamano
  0 siblings, 1 reply; 58+ messages in thread
From: René Scharfe @ 2022-09-07 22:39 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git

Am 07.09.2022 um 23:45 schrieb Junio C Hamano:
> Junio C Hamano <gitster@pobox.com> writes:
>
>> René Scharfe <l.s.r@web.de> writes:
>>
>>> "git diff --no-index - -" also doesn't complain, by the way.
>>
>> True, but in this case hopefully it is worth to call it out, as both
>> this code that uses "diff --no-index" and "diff --no-index" itself
>> came from the same author ;-)
>>
>> I think "git diff --no-index - -" should just exit 0 after slurping
>> all its input (i.e. allow it to be placed downstream of a pipe
>> without blocking the upstream), but it is also fine to exit with 0
>> without reading a single byte from the standard input.  Of course
>> the latter is easier to implement ;-)
>
>
> ----- >8 --------- >8 --------- >8 --------- >8 --------- >8 -----
> Subject: [PATCH] diff: "--no-index - -" compares the same standard input
>
> Conceptually, "git diff --no-index - A" slurps the standard input to
> the EOF and compares it with the contents of A, so it is a natural
> extension for "git diff --no-index - -" to slurp the standard input
> to the end, and compares it with itself, which should always yield
> "true".
>
> There can be two plausible implementations.  We could read and
> discard the input to the end and exit 0.  It would allow us to avoid
> sigpipe on the upstream command
>
>     $ dd if=/dev/zero bs=1m count=1 | git diff --no-index - -
>
> As we can tell the outcome without even consuming a single byte from
> the standard input, we can just notice the two input files specified
> are "-" and immediately exit.  It would allow us to give a correct
> answer to
>
>     $ git diff --no-index - - </dev/full
>
> but can kill the command on the upstream side of a pipe that feeds
> us.
>
> We pick the latter (i.e. not touching the input at all), simply
> because it is more efficient.  Data producer that can be placed on
> the upstream side of a pipe should always be prepared for a consumer
> that does not consume all its output (e.g. "head" and pager) anyway.
>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
>  diff-no-index.c          | 11 +++++++++++
>  t/t4053-diff-no-index.sh | 11 +++++++++++
>  2 files changed, 22 insertions(+)
>
> diff --git a/diff-no-index.c b/diff-no-index.c
> index 18edbdf4b5..940b66b4c8 100644
> --- a/diff-no-index.c
> +++ b/diff-no-index.c
> @@ -278,6 +278,17 @@ int diff_no_index(struct rev_info *revs,
>  			p = to_free[i] = prefix_filename(prefix, p);
>  		paths[i] = p;
>  	}
> +	if (paths[0] == file_from_standard_input &&
> +	    paths[1] == file_from_standard_input) {
> +		/*
> +		 * "git diff --no-index - -".  We are asked to compare
> +		 * what came from the standard input with itself; we
> +		 * know they are the same without even reading a
> +		 * single byte.
> +		 */
> +		ret = 0;
> +		goto out;
> +	}
>
>  	fixup_paths(paths, &replacement);
>
> diff --git a/t/t4053-diff-no-index.sh b/t/t4053-diff-no-index.sh
> index 3feadf0e35..d089d9c2b1 100755
> --- a/t/t4053-diff-no-index.sh
> +++ b/t/t4053-diff-no-index.sh
> @@ -156,6 +156,16 @@ test_expect_success 'diff --no-index normalizes mode: no changes' '
>  	test_must_be_empty out
>  '
>
> +test_expect_success 'diff --no-index with standard input' '
> +	echo foo >x &&
> +	echo bar >z &&
> +	git diff --no-index - x <x &&
> +	git diff --no-index x - <x &&
> +	test_must_fail git diff --no-index - x <z &&
> +	test_must_fail git diff --no-index x - <z &&
> +	git diff --no-index - - <x
> +'

This test passes even without the changes to diff-no-index.c above.
Because diff.c::diff_fill_oid_info() sets an all-zero object ID for
stdin on both sides, I guess, so the comparison is skipped even though
the data is read already for the first dash and the data size of the
second one is thus zero?

> +
>  test_expect_success POSIXPERM 'diff --no-index normalizes mode: chmod +x' '
>  	chmod +x y &&
>  	cat >expected <<-\EOF &&
> @@ -180,6 +190,7 @@ test_expect_success POSIXPERM 'diff --no-index normalizes: mode not like git mod
>  '
>
>  test_expect_success POSIXPERM,SYMLINKS 'diff --no-index normalizes: mode not like git mode (symlink)' '
> +	rm -f z &&
>  	ln -s y z &&
>  	X_OID=$(git hash-object --stdin <x) &&
>  	Z_OID=$(printf y | git hash-object --stdin) &&

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: Re* [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 22:39           ` René Scharfe
@ 2022-09-08  0:03             ` Junio C Hamano
  0 siblings, 0 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-09-08  0:03 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git

René Scharfe <l.s.r@web.de> writes:

> This test passes even without the changes to diff-no-index.c above.
> Because diff.c::diff_fill_oid_info() sets an all-zero object ID for
> stdin on both sides, I guess, so the comparison is skipped even though
> the data is read already for the first dash and the data size of the
> second one is thus zero?

OK, that sounds like a happy accident ;-)

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 16:25       ` Junio C Hamano
  2022-09-07 21:45         ` Re* " Junio C Hamano
@ 2022-09-08  8:59         ` René Scharfe
  2022-09-08 15:26           ` Ævar Arnfjörð Bjarmason
  2022-09-08 20:54         ` Johannes Schindelin
  2 siblings, 1 reply; 58+ messages in thread
From: René Scharfe @ 2022-09-08  8:59 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git

Am 07.09.22 um 18:25 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
>
>>>> But otherwise the idea is sound.  We compare them line by line,
>>>> using strbuf_getline() to ignore differences in CRLF and LF that
>>>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
>>>> random LF <> CRLF conversions, 2013-10-26).  Only when we find the
>>>> input different, we use "git diff --no-index" to make the difference
>>>> (and unfortunately more, as it does not ignore CRLF <> LF
>>>> differences) visible.
>>
>> Why not use "git diff --no-index --ignore-cr-at-eol"?  Do you even need
>> to wrap it?
>
> Hmph.  That surely sounds sensible if it works, and I offhand do not
> see why it shouldn't work.

Using git diff in test_cmp and using test_cmp to check if git diff works
would become a cyclical dependency.  Only doing that on one platform
limits the potential blind spot to platform-specific bugs, though.
Enough to go wrapper-less?  Not sure.

René

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-08  8:59         ` René Scharfe
@ 2022-09-08 15:26           ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-09-08 15:26 UTC (permalink / raw)
  To: René Scharfe
  Cc: Junio C Hamano, Johannes Schindelin,
	Johannes Schindelin via GitGitGadget, git

On Thu, Sep 08 2022, René Scharfe wrote:

> Am 07.09.22 um 18:25 schrieb Junio C Hamano:
>> René Scharfe <l.s.r@web.de> writes:
>>
>>>>> But otherwise the idea is sound.  We compare them line by line,
>>>>> using strbuf_getline() to ignore differences in CRLF and LF that
>>>>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
>>>>> random LF <> CRLF conversions, 2013-10-26).  Only when we find the
>>>>> input different, we use "git diff --no-index" to make the difference
>>>>> (and unfortunately more, as it does not ignore CRLF <> LF
>>>>> differences) visible.
>>>
>>> Why not use "git diff --no-index --ignore-cr-at-eol"?  Do you even need
>>> to wrap it?
>>
>> Hmph.  That surely sounds sensible if it works, and I offhand do not
>> see why it shouldn't work.
>
> Using git diff in test_cmp and using test_cmp to check if git diff works
> would become a cyclical dependency.  Only doing that on one platform
> limits the potential blind spot to platform-specific bugs, though.
> Enough to go wrapper-less?  Not sure.

I don't see how being wrapper-less is less of a "cyclical dependency"
than using "git diff" directly.

If we are to postulate some bug where "git diff" thwarts us for the use
of "test_cmp" it's going to be *very* broken.

I don't see how such a "git diff" would pass the rest of the test suite
(some of which involves comparing its exact output), but still be
functional enough to work as a GIT_TEST_CMP.

Even one where it just returns 0 unconditionally wouldn't pass, as we
rely on "! test_cmp" in some cases.

And any such breakage we imagine might just as well affect a wrapper for
it, and I'd think that would be the more likely of two unlikely
possibilities, as that wrapper would only be used for the test suite,
whereas "git diff" is more widely tested.

In any case, as long as we preserve the ability to set a
GIT_TEST_CMP=cmp we can sanity check any such wrapper or dogfodding of
"git diff" with an external program.

Needing to deal just with "git diff" and "cmp" would be a step forward,
as we'd be able to drop the current shellscript "mingw" wrapper, as well
as the special support for a "diff" that doesn't understand "-u".

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-07 16:25       ` Junio C Hamano
  2022-09-07 21:45         ` Re* " Junio C Hamano
  2022-09-08  8:59         ` René Scharfe
@ 2022-09-08 20:54         ` Johannes Schindelin
  2022-09-08 21:09           ` Junio C Hamano
  2 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin @ 2022-09-08 20:54 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: René Scharfe, Johannes Schindelin via GitGitGadget, git

[-- Attachment #1: Type: text/plain, Size: 2589 bytes --]

Hi Junio,

On Wed, 7 Sep 2022, Junio C Hamano wrote:

> René Scharfe <l.s.r@web.de> writes:
>
> > "git diff --no-index - -" also doesn't complain, by the way.
>
> True, but in this case hopefully it is worth to call it out, as both
> this code that uses "diff --no-index" and "diff --no-index" itself
> came from the same author ;-)
>
> I think "git diff --no-index - -" should just exit 0 after slurping
> all its input (i.e. allow it to be placed downstream of a pipe
> without blocking the upstream), but it is also fine to exit with 0
> without reading a single byte from the standard input.  Of course
> the latter is easier to implement ;-)
>
> >>> But otherwise the idea is sound.  We compare them line by line,
> >>> using strbuf_getline() to ignore differences in CRLF and LF that
> >>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to
> >>> random LF <> CRLF conversions, 2013-10-26).  Only when we find the
> >>> input different, we use "git diff --no-index" to make the difference
> >>> (and unfortunately more, as it does not ignore CRLF <> LF
> >>> differences) visible.
> >
> > Why not use "git diff --no-index --ignore-cr-at-eol"?  Do you even need
> > to wrap it?
>
> Hmph.  That surely sounds sensible if it works, and I offhand do not
> see why it shouldn't work.

Is this a reversal of the stance you took in your reply in
https://lore.kernel.org/git/7vps7xrfxa.fsf@assigned-by-dhcp.cox.net/ to my
suggestion to replace `cmp` by `diff --no-index` (in that mail referred to
as patch [7/8])?

If I recall correctly, you clarified outside of that thread that "I do not
think it is a good enough reason to make the tests slower" was you being
concerned about employing the entire diff machinery instead of doing a
simple byte-for-byte comparison.

And while it is no longer _that_ simple a comparison (it now
special-handles Carriage Returns), the speed and simplicity concern is
still valid: `test-tool text-cmp` is vastly simpler (and provably faster)
than `diff --no-index`.

Just because it is easier to review a one-liner to switch from essentially
`cmp` to `git diff --no-index --ignore-cr-at-eol` does not mean that it is
reasonable: it would cause us to blast out that much more CO2, just for
our one-time convenience.

Or for that matter, it would willfully slow down the `windows-test` jobs
that already is mired by sloooow performance (mostly due to running a
shell script-based test suite).

Can you please let me make the Windows situation better rather than worse?

Thank you,
Dscho

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] tests: replace mingw_test_cmp with a helper in C
  2022-09-08 20:54         ` Johannes Schindelin
@ 2022-09-08 21:09           ` Junio C Hamano
  0 siblings, 0 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-09-08 21:09 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: René Scharfe, Johannes Schindelin via GitGitGadget, git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> > Why not use "git diff --no-index --ignore-cr-at-eol"?  Do you even need
>> > to wrap it?
>>
>> Hmph.  That surely sounds sensible if it works, and I offhand do not
>> see why it shouldn't work.
>
> Is this a reversal of the stance you took in your reply in
> https://lore.kernel.org/git/7vps7xrfxa.fsf@assigned-by-dhcp.cox.net/ to my
> suggestion to replace `cmp` by `diff --no-index` (in that mail referred to
> as patch [7/8])?

cox.net?  It is a lifetime ago and the world has changed.  Hopefully
that "diff --no-index" has matured a lot to earn more confidence by
us than it had back then.

> If I recall correctly, you clarified outside of that thread that "I do not
> think it is a good enough reason to make the tests slower" was you being
> concerned about employing the entire diff machinery instead of doing a
> simple byte-for-byte comparison.

Is it still relevant, now that we are talking about text-cmp that
ignores cr-at-eol, to bring a random remark about byte-for-byte
comparison from more than 10 years ago?

> Just because it is easier to review a one-liner to switch from essentially
> `cmp` to `git diff --no-index --ignore-cr-at-eol` does not mean that it is
> reasonable: it would cause us to blast out that much more CO2, just for
> our one-time convenience.

Measurement, in tons per year, is needed, or please stop talking
about CO2.  It is not that funny.

Developer cycle time is easier to measure and more meaningful.  

It would be much faster to run a byte-for-byte-with-ignore-cr-at-eol
comparison than running "diff --no-index --ignore-cr-at-eol" on
files with meaningful sizes.  But comparison between "expect" and
"actual", which are typically at most several lines long?  Wouldn't
the overhead to spawn a process dwarf everything else, especially on
Windows, whether it is your byte-for-byte-with-ignore-cr-at-eol
program or "git diff"?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v3 0/2] tests: replace mingw_test_cmp with a helper in C
  2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
                     ` (2 preceding siblings ...)
  2022-09-07  9:04   ` [PATCH v2 0/2] " Johannes Schindelin
@ 2022-11-12 22:07   ` Johannes Schindelin via GitGitGadget
  2022-11-12 22:07     ` [PATCH v3 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
                       ` (2 more replies)
  3 siblings, 3 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-11-12 22:07 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

A few months ago, directly after sending a patch to fix a performance
regression due to a mis-use of test_cmp
[https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/],
I got curious to see whether Git for Windows had the same issue. And it did
not: it passes t5351 in 22 seconds, even while using test_cmp to compare
pack files
[https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90].

The explanation is of course that Git for Windows uses a test helper for
test_cmp that is written in C, instead of the Bash function. And C is much
faster than a Bash function, especially on Windows. This is especially sad
when said Bash code is only used on Windows. So I originally had pulled out
this helper from the years-long effort to let Git for Windows use BusyBox'
ash to run the test suite. The result is this patch, which has been in Git
for Windows since June 2018.

Unfortunately, this tried-and-tested code was rejected by the Git
maintainer.

Let's fall back to the next-best solution: git diff --no-index, which the
Git maintainer seems to like. The downside is that the diff machinery does a
lot more than a simple cmp clone, and therefore a lot more things can go
wrong that might make it look like a test case is failing when the fault is
somewhere else entirely. There is one way to find out whether this is a
valid concern.

Changes since v2:

 * Dropped the test helper, using diff --no-index instead.

Changes since v1:

 * Fixed double "with" in the commit message.
 * Renamed the test helper to text-cmp.
 * Made the diff --no-index call more robust by using a double-dash
   separator.

Johannes Schindelin (2):
  t0021: use Windows-friendly `pwd`
  tests(mingw): avoid very slow `mingw_test_cmp`

 t/t0021-conversion.sh   |  4 +--
 t/test-lib-functions.sh | 66 -----------------------------------------
 t/test-lib.sh           |  2 +-
 3 files changed, 3 insertions(+), 69 deletions(-)


base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1309

Range-diff vs v2:

 1:  ad7c41401ee ! 1:  b38b8fb5a85 t0021: use Windows-friendly `pwd`
     @@ Commit message
          absolute path, the MSYS2 runtime specifically exempts arguments
          containing a `=` character from that conversion.
      
     -    We are about to change `test_cmp` to use a test helper, which is such a
     -    Win32 process.
     +    We are about to change `test_cmp` to use `git diff --no-index`, which
     +    involves spawning precisely such a Win32 process.
      
          In combination, this would cause a failure in `t0021-conversion.sh`
          where we pass an absolute path containing an equal character to the
 2:  1f5366f1379 ! 2:  a7f4265ceb2 tests: replace mingw_test_cmp with a helper in C
     @@ Metadata
      Author: Johannes Schindelin <Johannes.Schindelin@gmx.de>
      
       ## Commit message ##
     -    tests: replace mingw_test_cmp with a helper in C
     +    tests(mingw): avoid very slow `mingw_test_cmp`
      
     -    This helper is more performant than running the `mingw_test_cmp` code
     -    with MSYS2's Bash. And a lot more readable.
     +    It is more performant to run `git diff --no-index` than running the
     +    `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
     +    Windows uses. And a lot more readable.
      
     -    To accommodate t1050, which wants to compare files weighing in with 3MB
     -    (falling outside of t1050's malloc limit of 1.5MB), we simply lift the
     -    allocation limit by setting the environment variable GIT_ALLOC_LIMIT to
     -    zero when calling the helper.
     +    Note: Earlier attempts at fixing this involved a test helper that avoids
     +    the overhead of the diff machinery, in favor of implementing a behavior
     +    that is more in line with what `mingw_test_cmp` does now, but that
     +    attempt saw a lot of backlash and distractions during review and was
     +    therefore abandoned.
      
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
     - ## Makefile ##
     -@@ Makefile: TEST_BUILTINS_OBJS += test-string-list.o
     - TEST_BUILTINS_OBJS += test-submodule-config.o
     - TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
     - TEST_BUILTINS_OBJS += test-subprocess.o
     -+TEST_BUILTINS_OBJS += test-text-cmp.o
     - TEST_BUILTINS_OBJS += test-trace2.o
     - TEST_BUILTINS_OBJS += test-urlmatch-normalization.o
     - TEST_BUILTINS_OBJS += test-userdiff.o
     -
     - ## t/helper/test-text-cmp.c (new) ##
     -@@
     -+#include "test-tool.h"
     -+#include "git-compat-util.h"
     -+#include "strbuf.h"
     -+#include "gettext.h"
     -+#include "parse-options.h"
     -+#include "run-command.h"
     -+
     -+#ifdef WIN32
     -+#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid"
     -+#else
     -+#define NO_SUCH_DIR "/dev/null"
     -+#endif
     -+
     -+static int run_diff(const char *path1, const char *path2)
     -+{
     -+	const char *argv[] = {
     -+		"diff", "--no-index", "--", NULL, NULL, NULL
     -+	};
     -+	const char *env[] = {
     -+		"GIT_PAGER=cat",
     -+		"GIT_DIR=" NO_SUCH_DIR,
     -+		"HOME=" NO_SUCH_DIR,
     -+		NULL
     -+	};
     -+
     -+	argv[3] = path1;
     -+	argv[4] = path2;
     -+	return run_command_v_opt_cd_env(argv,
     -+					RUN_COMMAND_NO_STDIN | RUN_GIT_CMD,
     -+					NULL, env);
     -+}
     -+
     -+int cmd__text_cmp(int argc, const char **argv)
     -+{
     -+	FILE *f0, *f1;
     -+	struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT;
     -+
     -+	if (argc != 3)
     -+		die("Require exactly 2 arguments, got %d", argc);
     -+
     -+	if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-"))
     -+		die("only one parameter can refer to `stdin` but not both");
     -+
     -+	if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")))
     -+		return error_errno("could not open '%s'", argv[1]);
     -+	if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) {
     -+		fclose(f0);
     -+		return error_errno("could not open '%s'", argv[2]);
     -+	}
     -+
     -+	for (;;) {
     -+		int r0 = strbuf_getline(&b0, f0);
     -+		int r1 = strbuf_getline(&b1, f1);
     -+
     -+		if (r0 == EOF) {
     -+			fclose(f0);
     -+			fclose(f1);
     -+			strbuf_release(&b0);
     -+			strbuf_release(&b1);
     -+			if (r1 == EOF)
     -+				return 0;
     -+cmp_failed:
     -+			if (!strcmp(argv[1], "-") || !strcmp(argv[2], "-"))
     -+				warning("cannot show diff because `stdin` was already consumed");
     -+			else if (!run_diff(argv[1], argv[2]))
     -+				die("Huh? 'diff --no-index %s %s' succeeded",
     -+				    argv[1], argv[2]);
     -+			return 1;
     -+		}
     -+		if (r1 == EOF || strbuf_cmp(&b0, &b1)) {
     -+			fclose(f0);
     -+			fclose(f1);
     -+			strbuf_release(&b0);
     -+			strbuf_release(&b1);
     -+			goto cmp_failed;
     -+		}
     -+	}
     -+}
     -
     - ## t/helper/test-tool.c ##
     -@@ t/helper/test-tool.c: static struct test_cmd cmds[] = {
     - 	{ "submodule-config", cmd__submodule_config },
     - 	{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
     - 	{ "subprocess", cmd__subprocess },
     -+	{ "text-cmp", cmd__text_cmp },
     - 	{ "trace2", cmd__trace2 },
     - 	{ "userdiff", cmd__userdiff },
     - 	{ "urlmatch-normalization", cmd__urlmatch_normalization },
     -
     - ## t/helper/test-tool.h ##
     -@@ t/helper/test-tool.h: int cmd__string_list(int argc, const char **argv);
     - int cmd__submodule_config(int argc, const char **argv);
     - int cmd__submodule_nested_repo_config(int argc, const char **argv);
     - int cmd__subprocess(int argc, const char **argv);
     -+int cmd__text_cmp(int argc, const char **argv);
     - int cmd__trace2(int argc, const char **argv);
     - int cmd__userdiff(int argc, const char **argv);
     - int cmd__urlmatch_normalization(int argc, const char **argv);
     -
       ## t/test-lib-functions.sh ##
     -@@ t/test-lib-functions.sh: test_expect_code () {
     - 
     - test_cmp () {
     - 	test "$#" -ne 2 && BUG "2 param"
     --	eval "$GIT_TEST_CMP" '"$@"'
     -+	GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"'
     - }
     - 
     - # Check that the given config key has the expected value.
      @@ t/test-lib-functions.sh: test_skip_or_die () {
       	error "$2"
       }
     @@ t/test-lib.sh: case $uname_s in
       	test_set_prereq GREP_STRIPS_CR
       	test_set_prereq WINDOWS
      -	GIT_TEST_CMP=mingw_test_cmp
     -+	GIT_TEST_CMP="test-tool text-cmp"
     ++	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"
       	;;
       *CYGWIN*)
       	test_set_prereq POSIXPERM

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v3 1/2] t0021: use Windows-friendly `pwd`
  2022-11-12 22:07   ` [PATCH v3 " Johannes Schindelin via GitGitGadget
@ 2022-11-12 22:07     ` Johannes Schindelin via GitGitGadget
  2022-11-12 22:07     ` [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2022-11-14 14:06     ` [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2 siblings, 0 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-11-12 22:07 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

In Git for Windows, when passing paths from shell scripts to regular
Win32 executables, thanks to the MSYS2 runtime a somewhat magic path
conversion happens that lets the shell script think that there is a file
at `/git/Makefile` and the Win32 process it spawned thinks that the
shell script said `C:/git-sdk-64/git/Makefile` instead.

This conversion is documented in detail over here:
https://www.msys2.org/docs/filesystem-paths/#automatic-unix-windows-path-conversion

As all automatic conversions, there are gaps. For example, to avoid
mistaking command-line options like `/LOG=log.txt` (which are quite
common in the Windows world) from being mistaken for a Unix-style
absolute path, the MSYS2 runtime specifically exempts arguments
containing a `=` character from that conversion.

We are about to change `test_cmp` to use `git diff --no-index`, which
involves spawning precisely such a Win32 process.

In combination, this would cause a failure in `t0021-conversion.sh`
where we pass an absolute path containing an equal character to the
`test_cmp` function.

Seeing as the Unix tools like `cp` and `diff` that are used by Git's
test suite in the Git for Windows SDK (thanks to the MSYS2 project)
understand both Unix-style as well as Windows-style paths, we can stave
off this problem by simply switching to Windows-style paths and
side-stepping the need for any automatic path conversion.

Note: The `PATH` variable is obviously special, as it is colon-separated
in the MSYS2 Bash used by Git for Windows, and therefore _cannot_
contain absolute Windows-style paths, lest the colon after the drive
letter is mistaken for a path separator. Therefore, we need to be
careful to keep the Unix-style when modifying the `PATH` variable.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/t0021-conversion.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index bad37abad2c..15482fa78e3 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -8,8 +8,8 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-terminal.sh

-TEST_ROOT="$PWD"
-PATH=$TEST_ROOT:$PATH
+PATH=$PWD:$PATH
+TEST_ROOT="$(pwd)"

 write_script <<\EOF "$TEST_ROOT/rot13.sh"
 tr \
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-12 22:07   ` [PATCH v3 " Johannes Schindelin via GitGitGadget
  2022-11-12 22:07     ` [PATCH v3 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
@ 2022-11-12 22:07     ` Johannes Schindelin via GitGitGadget
  2022-11-13  4:51       ` Taylor Blau
                         ` (2 more replies)
  2022-11-14 14:06     ` [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2 siblings, 3 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-11-12 22:07 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

It is more performant to run `git diff --no-index` than running the
`mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
Windows uses. And a lot more readable.

Note: Earlier attempts at fixing this involved a test helper that avoids
the overhead of the diff machinery, in favor of implementing a behavior
that is more in line with what `mingw_test_cmp` does now, but that
attempt saw a lot of backlash and distractions during review and was
therefore abandoned.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/test-lib-functions.sh | 66 -----------------------------------------
 t/test-lib.sh           |  2 +-
 2 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8c44856eaec..452fe9bc8aa 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1541,72 +1541,6 @@ test_skip_or_die () {
 	error "$2"
 }
 
-# The following mingw_* functions obey POSIX shell syntax, but are actually
-# bash scripts, and are meant to be used only with bash on Windows.
-
-# A test_cmp function that treats LF and CRLF equal and avoids to fork
-# diff when possible.
-mingw_test_cmp () {
-	# Read text into shell variables and compare them. If the results
-	# are different, use regular diff to report the difference.
-	local test_cmp_a= test_cmp_b=
-
-	# When text came from stdin (one argument is '-') we must feed it
-	# to diff.
-	local stdin_for_diff=
-
-	# Since it is difficult to detect the difference between an
-	# empty input file and a failure to read the files, we go straight
-	# to diff if one of the inputs is empty.
-	if test -s "$1" && test -s "$2"
-	then
-		# regular case: both files non-empty
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-	elif test -s "$1" && test "$2" = -
-	then
-		# read 2nd file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b
-		stdin_for_diff='<<<"$test_cmp_b"'
-	elif test "$1" = - && test -s "$2"
-	then
-		# read 1st file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-		stdin_for_diff='<<<"$test_cmp_a"'
-	fi
-	test -n "$test_cmp_a" &&
-	test -n "$test_cmp_b" &&
-	test "$test_cmp_a" = "$test_cmp_b" ||
-	eval "diff -u \"\$@\" $stdin_for_diff"
-}
-
-# $1 is the name of the shell variable to fill in
-mingw_read_file_strip_cr_ () {
-	# Read line-wise using LF as the line separator
-	# and use IFS to strip CR.
-	local line
-	while :
-	do
-		if IFS=$'\r' read -r -d $'\n' line
-		then
-			# good
-			line=$line$'\n'
-		else
-			# we get here at EOF, but also if the last line
-			# was not terminated by LF; in the latter case,
-			# some text was read
-			if test -z "$line"
-			then
-				# EOF, really
-				break
-			fi
-		fi
-		eval "$1=\$$1\$line"
-	done
-}
-
 # Like "env FOO=BAR some-program", but run inside a subshell, which means
 # it also works for shell functions (though those functions cannot impact
 # the environment outside of the test_env invocation).
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 7726d1da88a..f8c6205e08f 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1546,7 +1546,7 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
+	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-12 22:07     ` [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
@ 2022-11-13  4:51       ` Taylor Blau
  2022-11-14 13:34         ` Johannes Schindelin
  2022-11-18 23:15         ` Junio C Hamano
  2022-11-14  9:53       ` Phillip Wood
  2022-11-14 11:55       ` Ævar Arnfjörð Bjarmason
  2 siblings, 2 replies; 58+ messages in thread
From: Taylor Blau @ 2022-11-13  4:51 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget
  Cc: git, René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

On Sat, Nov 12, 2022 at 10:07:34PM +0000, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> It is more performant to run `git diff --no-index` than running the
> `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> Windows uses. And a lot more readable.

This makes sense, and motivates the changes below.

> Note: Earlier attempts at fixing this involved a test helper that avoids
> the overhead of the diff machinery, in favor of implementing a behavior
> that is more in line with what `mingw_test_cmp` does now, but that
> attempt saw a lot of backlash and distractions during review and was
> therefore abandoned.

But I do not think that this paragraph adds as much as we'd like to the
historical record.

What about the original approach did you have trouble pushing forward?
Perhaps framing that as an alternative approach, along with why it
didn't ultimately get merged would be a more useful recollection of
previous attempts here.

One thing that the commit message doesn't allude to (that is covered in
the earlier discussion) is why it is important to pass
`--ignore-cr-at-eol`. I think that is worth mentioning here.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-12 22:07     ` [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2022-11-13  4:51       ` Taylor Blau
@ 2022-11-14  9:53       ` Phillip Wood
  2022-11-14 13:47         ` Johannes Schindelin
  2022-11-14 11:55       ` Ævar Arnfjörð Bjarmason
  2 siblings, 1 reply; 58+ messages in thread
From: Phillip Wood @ 2022-11-14  9:53 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget, git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

Hi Dscho

On 12/11/2022 22:07, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> 
> It is more performant to run `git diff --no-index` than running the
> `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> Windows uses. And a lot more readable.

This makes me wonder about the implications for our diff tests. We have 
~200 calls to `test_cmp` in t*-diff-*.sh. I had a look at 
t4053-diff-no-index.sh and non of the tests that use `test_cmp` look 
critical for functionality that is used by `test_cmp` and there are 
tests for the exit code. I suspect that if `diff --no-index` breaks 
we'll end up with confusing test failures rather than misleading passes.

A side effect of this change is that on windows `test_cmp` will now 
except directories as well as files but I don't think that matters.

> Note: Earlier attempts at fixing this involved a test helper that avoids
> the overhead of the diff machinery, in favor of implementing a behavior
> that is more in line with what `mingw_test_cmp` does now, but that
> attempt saw a lot of backlash and distractions during review and was
> therefore abandoned.

Hopefully most of the files we feed into `test_cmp` are small enough 
that the absolute difference in run-time is not too big. There is an 
optimization for -U0 that trims the common tail from the files before 
calling xdl_diff() but that does not help here because we need to use 
--ignore-cr-at-eol (otherwise `git diff --no-index -U0 || git diff 
--no-index` might speed up the common case of matching files).

Best Wishes

Phillip

> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>   t/test-lib-functions.sh | 66 -----------------------------------------
>   t/test-lib.sh           |  2 +-
>   2 files changed, 1 insertion(+), 67 deletions(-)
> 
> diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
> index 8c44856eaec..452fe9bc8aa 100644
> --- a/t/test-lib-functions.sh
> +++ b/t/test-lib-functions.sh
> @@ -1541,72 +1541,6 @@ test_skip_or_die () {
>   	error "$2"
>   }
>   
> -# The following mingw_* functions obey POSIX shell syntax, but are actually
> -# bash scripts, and are meant to be used only with bash on Windows.
> -
> -# A test_cmp function that treats LF and CRLF equal and avoids to fork
> -# diff when possible.
> -mingw_test_cmp () {
> -	# Read text into shell variables and compare them. If the results
> -	# are different, use regular diff to report the difference.
> -	local test_cmp_a= test_cmp_b=
> -
> -	# When text came from stdin (one argument is '-') we must feed it
> -	# to diff.
> -	local stdin_for_diff=
> -
> -	# Since it is difficult to detect the difference between an
> -	# empty input file and a failure to read the files, we go straight
> -	# to diff if one of the inputs is empty.
> -	if test -s "$1" && test -s "$2"
> -	then
> -		# regular case: both files non-empty
> -		mingw_read_file_strip_cr_ test_cmp_a <"$1"
> -		mingw_read_file_strip_cr_ test_cmp_b <"$2"
> -	elif test -s "$1" && test "$2" = -
> -	then
> -		# read 2nd file from stdin
> -		mingw_read_file_strip_cr_ test_cmp_a <"$1"
> -		mingw_read_file_strip_cr_ test_cmp_b
> -		stdin_for_diff='<<<"$test_cmp_b"'
> -	elif test "$1" = - && test -s "$2"
> -	then
> -		# read 1st file from stdin
> -		mingw_read_file_strip_cr_ test_cmp_a
> -		mingw_read_file_strip_cr_ test_cmp_b <"$2"
> -		stdin_for_diff='<<<"$test_cmp_a"'
> -	fi
> -	test -n "$test_cmp_a" &&
> -	test -n "$test_cmp_b" &&
> -	test "$test_cmp_a" = "$test_cmp_b" ||
> -	eval "diff -u \"\$@\" $stdin_for_diff"
> -}
> -
> -# $1 is the name of the shell variable to fill in
> -mingw_read_file_strip_cr_ () {
> -	# Read line-wise using LF as the line separator
> -	# and use IFS to strip CR.
> -	local line
> -	while :
> -	do
> -		if IFS=$'\r' read -r -d $'\n' line
> -		then
> -			# good
> -			line=$line$'\n'
> -		else
> -			# we get here at EOF, but also if the last line
> -			# was not terminated by LF; in the latter case,
> -			# some text was read
> -			if test -z "$line"
> -			then
> -				# EOF, really
> -				break
> -			fi
> -		fi
> -		eval "$1=\$$1\$line"
> -	done
> -}
> -
>   # Like "env FOO=BAR some-program", but run inside a subshell, which means
>   # it also works for shell functions (though those functions cannot impact
>   # the environment outside of the test_env invocation).
> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 7726d1da88a..f8c6205e08f 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1546,7 +1546,7 @@ case $uname_s in
>   	test_set_prereq SED_STRIPS_CR
>   	test_set_prereq GREP_STRIPS_CR
>   	test_set_prereq WINDOWS
> -	GIT_TEST_CMP=mingw_test_cmp
> +	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"
>   	;;
>   *CYGWIN*)
>   	test_set_prereq POSIXPERM

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-12 22:07     ` [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2022-11-13  4:51       ` Taylor Blau
  2022-11-14  9:53       ` Phillip Wood
@ 2022-11-14 11:55       ` Ævar Arnfjörð Bjarmason
  2022-11-14 14:02         ` Johannes Schindelin
  2 siblings, 1 reply; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-11-14 11:55 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget
  Cc: git, René Scharfe, Johannes Schindelin, Eric Sunshine

On Sat, Nov 12 2022, Johannes Schindelin via GitGitGadget wrote:

> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> It is more performant to run `git diff --no-index` than running the
> `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> Windows uses. And a lot more readable.
>
> Note: Earlier attempts at fixing this involved a test helper that avoids
> the overhead of the diff machinery, in favor of implementing a behavior
> that is more in line with what `mingw_test_cmp` does now, but that
> attempt saw a lot of backlash and distractions during review and was
> therefore abandoned.

I don't know if you counted this under "backlash and distractions", or
if you just didn't see it, but I think part of the comments I left on
the v2[1][2] are still applicable.

Namely that before this we've been assuming that GIT_TEST_CMP is a
not-ours binary. I.e. "diff", "cmp" etc. If it is a "that's ours" this
change should be changing e.g. '! test_cmp' to 'test_cmp !', as the
former would now hide segfaults.

Which isn't a theoretical issue b.t.w., e.g. the recent d00113ec347
(t/Makefile: apply chainlint.pl to existing self-tests, 2022-09-01)
will invoke "git diff --no-index", which has some interesting results
(that I've run into) when testing a tree where "git diff" happens to
segfault.

From grepping our tests that doesn't seem to be too hard, certainly a
lot easier than a new "not-quite-diff" test helper.

Additionally: We don't *need* this for an initial implementation, but
having e.g. one of the Ubuntu CI targets run with "git diff --no-index"
would be a nice cherry on top, as:

 * It would show that the "--ignore-cr-at-eol" is only needed for MinGW,
   and is also the reason for why "GIT_TEST_CMP" is still
   unconditionally clobbered there, unlike other platforms. See
   4d715ac05cf (Windows: a test_cmp that is agnostic to random LF <>
   CRLF conversions, 2013-10-26).

 * We do pass this elsewhere, except for the one trivial case which you
   aren't running into on MINGW because it doesn't have SYMLINKS, but
   presumably would if the test were adjusted to use "SYMLINKS_WINDOWS".

 * If we're trusting "git diff --no-index" to run the tests, we could
   also get rid of "GIT_TEST_CMP_USE_COPIED_CONTEXT", whose only reason
   for existing is to support a system "diff" that doesn't understand
   "-u" (squashable diff below)

1. https://lore.kernel.org/git/220907.86v8pzl6jz.gmgdl@evledraar.gmail.com/
2. https://lore.kernel.org/git/220907.86r10nl63s.gmgdl@evledraar.gmail.com/

diff --git a/Makefile b/Makefile
index 4927379184c..dea6069b5fe 100644
--- a/Makefile
+++ b/Makefile
@@ -1950,10 +1950,6 @@ ifdef OVERRIDE_STRDUP
 	COMPAT_OBJS += compat/strdup.o
 endif

-ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
-	export GIT_TEST_CMP_USE_COPIED_CONTEXT
-endif
-
 ifndef NO_MSGFMT_EXTENDED_OPTIONS
 	MSGFMT += --check
 endif
@@ -3008,9 +3004,6 @@ endif
 ifdef GIT_TEST_CMP
 	@echo GIT_TEST_CMP=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_CMP)))'\' >>$@+
 endif
-ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
-	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
-endif
 ifdef GIT_TEST_UTF8_LOCALE
 	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
 endif
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 4fab1c1984c..cd6e9f797b6 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1503,12 +1503,7 @@ export PATH GIT_EXEC_PATH GIT_TEMPLATE_DIR GIT_CONFIG_NOSYSTEM GIT_ATTR_NOSYSTEM

 if test -z "$GIT_TEST_CMP"
 then
-	if test -n "$GIT_TEST_CMP_USE_COPIED_CONTEXT"
-	then
-		GIT_TEST_CMP="$DIFF -c"
-	else
-		GIT_TEST_CMP="$DIFF -u"
-	fi
+	GIT_TEST_CMP="$DIFF -u"
 fi

 GITPERLLIB="$GIT_BUILD_DIR"/perl/build/lib

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-13  4:51       ` Taylor Blau
@ 2022-11-14 13:34         ` Johannes Schindelin
  2022-11-18 23:15         ` Junio C Hamano
  1 sibling, 0 replies; 58+ messages in thread
From: Johannes Schindelin @ 2022-11-14 13:34 UTC (permalink / raw)
  To: Taylor Blau
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason

Hi Taylor,

On Sat, 12 Nov 2022, Taylor Blau wrote:

> On Sat, Nov 12, 2022 at 10:07:34PM +0000, Johannes Schindelin via GitGitGadget wrote:
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > It is more performant to run `git diff --no-index` than running the
> > `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> > Windows uses. And a lot more readable.
>
> This makes sense, and motivates the changes below.
>
> > Note: Earlier attempts at fixing this involved a test helper that avoids
> > the overhead of the diff machinery, in favor of implementing a behavior
> > that is more in line with what `mingw_test_cmp` does now, but that
> > attempt saw a lot of backlash and distractions during review and was
> > therefore abandoned.
>
> But I do not think that this paragraph adds as much as we'd like to the
> historical record.

It explains why we replace a relatively simple logic with a relatively
complex one.

> What about the original approach did you have trouble pushing forward?
> Perhaps framing that as an alternative approach, along with why it
> didn't ultimately get merged would be a more useful recollection of
> previous attempts here.
>
> One thing that the commit message doesn't allude to (that is covered in
> the earlier discussion) is why it is important to pass
> `--ignore-cr-at-eol`. I think that is worth mentioning here.

I elaborated on both in the new commit message.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14  9:53       ` Phillip Wood
@ 2022-11-14 13:47         ` Johannes Schindelin
  0 siblings, 0 replies; 58+ messages in thread
From: Johannes Schindelin @ 2022-11-14 13:47 UTC (permalink / raw)
  To: phillip.wood
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason

Hi Phillip,

On Mon, 14 Nov 2022, Phillip Wood wrote:

> On 12/11/2022 22:07, Johannes Schindelin via GitGitGadget wrote:
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > It is more performant to run `git diff --no-index` than running the
> > `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> > Windows uses. And a lot more readable.
>
> This makes me wonder about the implications for our diff tests. We have
> ~200 calls to `test_cmp` in t*-diff-*.sh. I had a look at
> t4053-diff-no-index.sh and non of the tests that use `test_cmp` look critical
> for functionality that is used by `test_cmp` and there are tests for the exit
> code. I suspect that if `diff --no-index` breaks we'll end up with confusing
> test failures rather than misleading passes.

I was surprised how little of a dent the overhead makes (it was not even
measurable within the noise) when I tested this a couple of weeks ago (but
then ran out of time and motivation to send a new iteration).

It would probably still be better to use less complex code, but Junio made
his preference abundantly clear, and if we wanted to save on CI time,
there are much bigger construction sites that we have to open. Point in
case: our CI runtime goes up, up, up, with no real restraint going on. A
randomly picked run from 6 weeks ago took 7h14m of build time
(https://github.com/git/git/actions/runs/3175983767/usage), the current
`seen` run took 8h48m
(https://github.com/git/git/actions/runs/3448435246/usage). If there is
considerate use of resources going on then please help me because I cannot
see it.

> A side effect of this change is that on windows `test_cmp` will now except
> directories as well as files but I don't think that matters.

Indeed, I would believe that the non-Windows test jobs would catch that
failure, so we do not need any extra code to check specifically for that.

> > Note: Earlier attempts at fixing this involved a test helper that avoids
> > the overhead of the diff machinery, in favor of implementing a behavior
> > that is more in line with what `mingw_test_cmp` does now, but that
> > attempt saw a lot of backlash and distractions during review and was
> > therefore abandoned.
>
> Hopefully most of the files we feed into `test_cmp` are small enough that the
> absolute difference in run-time is not too big. There is an optimization for
> -U0 that trims the common tail from the files before calling xdl_diff() but
> that does not help here because we need to use --ignore-cr-at-eol (otherwise
> `git diff --no-index -U0 || git diff --no-index` might speed up the common
> case of matching files).

There is similar code in xdiff to "shrink the box" to avoid unnecessary
work, i.e. skipping identical lines at both ends of the line range:
https://github.com/git/git/blob/v2.38.1/xdiff/xdiffi.c#L261-L265

I share your concern, but my current concern is to cut the loss of time
spent on this patch series on my part, and that concern is currently even
bigger.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14 11:55       ` Ævar Arnfjörð Bjarmason
@ 2022-11-14 14:02         ` Johannes Schindelin
  2022-11-14 15:23           ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin @ 2022-11-14 14:02 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Eric Sunshine

[-- Attachment #1: Type: text/plain, Size: 3511 bytes --]

Hi Ævar,

On Mon, 14 Nov 2022, Ævar Arnfjörð Bjarmason wrote:

> [...] before this we've been assuming that GIT_TEST_CMP is a not-ours
> binary. I.e. "diff", "cmp" etc. If it is a "that's ours" this change
> should be changing e.g. '! test_cmp' to 'test_cmp !', as the former
> would now hide segfaults.

The implied assumption of `test_cmp` is that the command is reliable, i.e.
we are not expecting to test the code that runs the comparison itself.

Originally, Junio shared your concern that `diff --no-index` might not be
reliable enough, but that was "a lifetime ago". In
https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/ he hinted that the
`diff --no-index` code has matured enough to be considered reliable. That
is far from the endorsement I would wish to have received (it would have
been nice to see "I consider `git diff --no-index` to be reliable enough
to serve as `mingw_test_cmp`", for example), but I believe it is the
clearest statement I will get in that regard.

> Additionally: We don't *need* this for an initial implementation, but
> having e.g. one of the Ubuntu CI targets run with "git diff --no-index"
> would be a nice cherry on top,

Why would this be a nice cherry on top?

From my perspective, it would co-opt unrelated test cases into the task of
validating `diff --no-index`' correctness.

Such a loss of focus in test cases makes it harder to diagnose, debug and
fix breakages. And it would mean that a single bug could make gazillions
of test cases fail. That would be bad practice, of course.

>  * If we're trusting "git diff --no-index" to run the tests, we could
>    also get rid of "GIT_TEST_CMP_USE_COPIED_CONTEXT", whose only reason
>    for existing is to support a system "diff" that doesn't understand
>    "-u" (squashable diff below)

Be my guest to contribute this once the current patch has made it to
`next`. But please only then, we have enough friction on the Git mailing
list and do not need to go out of our way to add more.

Thanks,
Johannes

>
> 1. https://lore.kernel.org/git/220907.86v8pzl6jz.gmgdl@evledraar.gmail.com/
> 2. https://lore.kernel.org/git/220907.86r10nl63s.gmgdl@evledraar.gmail.com/
>
> diff --git a/Makefile b/Makefile
> index 4927379184c..dea6069b5fe 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1950,10 +1950,6 @@ ifdef OVERRIDE_STRDUP
>  	COMPAT_OBJS += compat/strdup.o
>  endif
>
> -ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
> -	export GIT_TEST_CMP_USE_COPIED_CONTEXT
> -endif
> -
>  ifndef NO_MSGFMT_EXTENDED_OPTIONS
>  	MSGFMT += --check
>  endif
> @@ -3008,9 +3004,6 @@ endif
>  ifdef GIT_TEST_CMP
>  	@echo GIT_TEST_CMP=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_CMP)))'\' >>$@+
>  endif
> -ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
> -	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
> -endif
>  ifdef GIT_TEST_UTF8_LOCALE
>  	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
>  endif
> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 4fab1c1984c..cd6e9f797b6 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1503,12 +1503,7 @@ export PATH GIT_EXEC_PATH GIT_TEMPLATE_DIR GIT_CONFIG_NOSYSTEM GIT_ATTR_NOSYSTEM
>
>  if test -z "$GIT_TEST_CMP"
>  then
> -	if test -n "$GIT_TEST_CMP_USE_COPIED_CONTEXT"
> -	then
> -		GIT_TEST_CMP="$DIFF -c"
> -	else
> -		GIT_TEST_CMP="$DIFF -u"
> -	fi
> +	GIT_TEST_CMP="$DIFF -u"
>  fi
>
>  GITPERLLIB="$GIT_BUILD_DIR"/perl/build/lib
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp
  2022-11-12 22:07   ` [PATCH v3 " Johannes Schindelin via GitGitGadget
  2022-11-12 22:07     ` [PATCH v3 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
  2022-11-12 22:07     ` [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
@ 2022-11-14 14:06     ` Johannes Schindelin via GitGitGadget
  2022-11-14 14:06       ` [PATCH v4 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
                         ` (2 more replies)
  2 siblings, 3 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-11-14 14:06 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

A few months ago, directly after sending a patch to fix a performance
regression due to a mis-use of test_cmp
[https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/],
I got curious to see whether Git for Windows had the same issue. And it did
not: it passes t5351 in 22 seconds, even while using test_cmp to compare
pack files
[https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90].

The explanation is of course that Git for Windows uses a test helper for
test_cmp that is written in C, instead of the Bash function. And C is much
faster than a Bash function, especially on Windows. This is especially sad
when said Bash code is only used on Windows. So I originally had pulled out
this helper from the years-long effort to let Git for Windows use BusyBox'
ash to run the test suite. The result was a single-patch contribution of a
change that had been in Git for Windows since June 2018. Unfortunately, this
tried-and-tested code was rejected by the Git maintainer.

Let's fall back to the next-best solution: git diff --no-index, which the
Git maintainer seems to like. The downside is that the diff machinery does a
lot more than a simple cmp clone, and therefore a lot more things can go
wrong that might make it look like a test case is failing when the fault is
somewhere else entirely. There is one way to find out whether this is a
valid concern.

Changes since v3:

 * Fixed the subject of the cover letter (which should have been adjusted in
   v3)
 * Elaborated the paragraph about the historical context of this patch

Changes since v2:

 * Dropped the test helper, using diff --no-index instead.

Changes since v1:

 * Fixed double "with" in the commit message.
 * Renamed the test helper to text-cmp.
 * Made the diff --no-index call more robust by using a double-dash
   separator.

Johannes Schindelin (2):
  t0021: use Windows-friendly `pwd`
  tests(mingw): avoid very slow `mingw_test_cmp`

 t/t0021-conversion.sh   |  4 +--
 t/test-lib-functions.sh | 66 -----------------------------------------
 t/test-lib.sh           |  2 +-
 3 files changed, 3 insertions(+), 69 deletions(-)

base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v4
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v4
Pull-Request: https://github.com/gitgitgadget/git/pull/1309

Range-diff vs v3:

 1:  b38b8fb5a85 = 1:  b38b8fb5a85 t0021: use Windows-friendly `pwd`
 2:  a7f4265ceb2 ! 2:  128b1f348d8 tests(mingw): avoid very slow `mingw_test_cmp`
     @@ Commit message
          `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
          Windows uses. And a lot more readable.

     -    Note: Earlier attempts at fixing this involved a test helper that avoids
     -    the overhead of the diff machinery, in favor of implementing a behavior
     -    that is more in line with what `mingw_test_cmp` does now, but that
     -    attempt saw a lot of backlash and distractions during review and was
     -    therefore abandoned.
     +    The original reason why Git's test suite needs the `mingw_test_cmp`
     +    function at all (and why `cmp` is not good enough) is that Git's test
     +    suite is not actually trying to compare binary files when it calls
     +    `test_cmp`, but it compares text files. And those text files can contain
     +    CR/LF line endings depending on the circumstances.
     +
     +    Note: The original fix in the Git for Windows project implemented a test
     +    helper that avoids the overhead of the diff machinery, in favor of
     +    implementing a behavior that is more in line with what `mingw_test_cmp`
     +    does now. This was done to minimize the risk in using something as
     +    complex as the diff machinery to perform something as simple as
     +    determining whether text output is identical to the expected output or
     +    not. This approach has served Git for Windows well for years, but the
     +    attempt to upstream this saw a lot of backlash and distractions during
     +    the review, was disliked by the Git maintainer and was therefore
     +    abandoned. For full details, see the thread at
     +    https://lore.kernel.org/git/pull.1309.git.1659106382128.gitgitgadget@gmail.com/t

          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v4 1/2] t0021: use Windows-friendly `pwd`
  2022-11-14 14:06     ` [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
@ 2022-11-14 14:06       ` Johannes Schindelin via GitGitGadget
  2022-11-14 14:06       ` [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2022-12-06 15:07       ` [PATCH v5 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2 siblings, 0 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-11-14 14:06 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

In Git for Windows, when passing paths from shell scripts to regular
Win32 executables, thanks to the MSYS2 runtime a somewhat magic path
conversion happens that lets the shell script think that there is a file
at `/git/Makefile` and the Win32 process it spawned thinks that the
shell script said `C:/git-sdk-64/git/Makefile` instead.

This conversion is documented in detail over here:
https://www.msys2.org/docs/filesystem-paths/#automatic-unix-windows-path-conversion

As all automatic conversions, there are gaps. For example, to avoid
mistaking command-line options like `/LOG=log.txt` (which are quite
common in the Windows world) from being mistaken for a Unix-style
absolute path, the MSYS2 runtime specifically exempts arguments
containing a `=` character from that conversion.

We are about to change `test_cmp` to use `git diff --no-index`, which
involves spawning precisely such a Win32 process.

In combination, this would cause a failure in `t0021-conversion.sh`
where we pass an absolute path containing an equal character to the
`test_cmp` function.

Seeing as the Unix tools like `cp` and `diff` that are used by Git's
test suite in the Git for Windows SDK (thanks to the MSYS2 project)
understand both Unix-style as well as Windows-style paths, we can stave
off this problem by simply switching to Windows-style paths and
side-stepping the need for any automatic path conversion.

Note: The `PATH` variable is obviously special, as it is colon-separated
in the MSYS2 Bash used by Git for Windows, and therefore _cannot_
contain absolute Windows-style paths, lest the colon after the drive
letter is mistaken for a path separator. Therefore, we need to be
careful to keep the Unix-style when modifying the `PATH` variable.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/t0021-conversion.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index bad37abad2c..15482fa78e3 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -8,8 +8,8 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-terminal.sh

-TEST_ROOT="$PWD"
-PATH=$TEST_ROOT:$PATH
+PATH=$PWD:$PATH
+TEST_ROOT="$(pwd)"

 write_script <<\EOF "$TEST_ROOT/rot13.sh"
 tr \
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14 14:06     ` [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2022-11-14 14:06       ` [PATCH v4 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
@ 2022-11-14 14:06       ` Johannes Schindelin via GitGitGadget
  2022-11-14 22:40         ` Taylor Blau
  2022-12-06 15:07       ` [PATCH v5 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-11-14 14:06 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

It is more performant to run `git diff --no-index` than running the
`mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
Windows uses. And a lot more readable.

The original reason why Git's test suite needs the `mingw_test_cmp`
function at all (and why `cmp` is not good enough) is that Git's test
suite is not actually trying to compare binary files when it calls
`test_cmp`, but it compares text files. And those text files can contain
CR/LF line endings depending on the circumstances.

Note: The original fix in the Git for Windows project implemented a test
helper that avoids the overhead of the diff machinery, in favor of
implementing a behavior that is more in line with what `mingw_test_cmp`
does now. This was done to minimize the risk in using something as
complex as the diff machinery to perform something as simple as
determining whether text output is identical to the expected output or
not. This approach has served Git for Windows well for years, but the
attempt to upstream this saw a lot of backlash and distractions during
the review, was disliked by the Git maintainer and was therefore
abandoned. For full details, see the thread at
https://lore.kernel.org/git/pull.1309.git.1659106382128.gitgitgadget@gmail.com/t

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/test-lib-functions.sh | 66 -----------------------------------------
 t/test-lib.sh           |  2 +-
 2 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8c44856eaec..452fe9bc8aa 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1541,72 +1541,6 @@ test_skip_or_die () {
 	error "$2"
 }
 
-# The following mingw_* functions obey POSIX shell syntax, but are actually
-# bash scripts, and are meant to be used only with bash on Windows.
-
-# A test_cmp function that treats LF and CRLF equal and avoids to fork
-# diff when possible.
-mingw_test_cmp () {
-	# Read text into shell variables and compare them. If the results
-	# are different, use regular diff to report the difference.
-	local test_cmp_a= test_cmp_b=
-
-	# When text came from stdin (one argument is '-') we must feed it
-	# to diff.
-	local stdin_for_diff=
-
-	# Since it is difficult to detect the difference between an
-	# empty input file and a failure to read the files, we go straight
-	# to diff if one of the inputs is empty.
-	if test -s "$1" && test -s "$2"
-	then
-		# regular case: both files non-empty
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-	elif test -s "$1" && test "$2" = -
-	then
-		# read 2nd file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b
-		stdin_for_diff='<<<"$test_cmp_b"'
-	elif test "$1" = - && test -s "$2"
-	then
-		# read 1st file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-		stdin_for_diff='<<<"$test_cmp_a"'
-	fi
-	test -n "$test_cmp_a" &&
-	test -n "$test_cmp_b" &&
-	test "$test_cmp_a" = "$test_cmp_b" ||
-	eval "diff -u \"\$@\" $stdin_for_diff"
-}
-
-# $1 is the name of the shell variable to fill in
-mingw_read_file_strip_cr_ () {
-	# Read line-wise using LF as the line separator
-	# and use IFS to strip CR.
-	local line
-	while :
-	do
-		if IFS=$'\r' read -r -d $'\n' line
-		then
-			# good
-			line=$line$'\n'
-		else
-			# we get here at EOF, but also if the last line
-			# was not terminated by LF; in the latter case,
-			# some text was read
-			if test -z "$line"
-			then
-				# EOF, really
-				break
-			fi
-		fi
-		eval "$1=\$$1\$line"
-	done
-}
-
 # Like "env FOO=BAR some-program", but run inside a subshell, which means
 # it also works for shell functions (though those functions cannot impact
 # the environment outside of the test_env invocation).
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 7726d1da88a..f8c6205e08f 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1546,7 +1546,7 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
+	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14 14:02         ` Johannes Schindelin
@ 2022-11-14 15:23           ` Ævar Arnfjörð Bjarmason
  2022-11-18 23:19             ` Junio C Hamano
  0 siblings, 1 reply; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-11-14 15:23 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Eric Sunshine


On Mon, Nov 14 2022, Johannes Schindelin wrote:

> Hi Ævar,
>
> On Mon, 14 Nov 2022, Ævar Arnfjörð Bjarmason wrote:
>
>> [...] before this we've been assuming that GIT_TEST_CMP is a not-ours
>> binary. I.e. "diff", "cmp" etc. If it is a "that's ours" this change
>> should be changing e.g. '! test_cmp' to 'test_cmp !', as the former
>> would now hide segfaults.
>
> The implied assumption of `test_cmp` is that the command is reliable, i.e.
> we are not expecting to test the code that runs the comparison itself.
>
> Originally, Junio shared your concern that `diff --no-index` might not be
> reliable enough, but that was "a lifetime ago". In
> https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/ he hinted that the
> `diff --no-index` code has matured enough to be considered reliable. That
> is far from the endorsement I would wish to have received (it would have
> been nice to see "I consider `git diff --no-index` to be reliable enough
> to serve as `mingw_test_cmp`", for example), but I believe it is the
> clearest statement I will get in that regard.

I don't see a mention of reliability in the original 2007 thread, just
optimization v.s. "cmp". But the list archive seems to only carry the
CL, perhaps it was in that 7/8 or 8/8 patch?

In any case, that's different than what I'm pointing out, which is:

I fully trust "git diff --no-index" these days *in general* to serve as
a "/usr/bin/diff -u" replacement, but *only* as long as it's the
"/usr/bin/git" version of "git diff", and not the "git" we just built
in-tree.

IOW this bit in t/README:

 - Don't use '! git cmd' when you want to make sure the git command
   exits with failure in a controlled way by calling "die()".[...]

So, before this series a test like this on Windows, with
GIT_TEST_CMP=mingw_test_cmp:

	! test_cmp foo bar

Would only succeed if "foo" and "bar" weren't identical, now it'll also
succeed if 'git diff --no-index' happens to segfault on those inputs.

Now, the good thing is that the fallout of this should be relatively
contained in this case. We'd only get those cases sneaking in if it's in
the Windows-only code, as we use "/usr/bin/diff -u" elsewhere.

I think the diff here at the end (on top af master, but you get the
idea) is what you'd need to stop conflating those. Most of it's just
's/! test_cmp/test_cmp !/g'.

The "test_cmp" then learns to distinguish between "a git" command and
"not a git". I guess we could use test_must_fail_acceptable() there to
to make this DWYM...

>> Additionally: We don't *need* this for an initial implementation, but
>> having e.g. one of the Ubuntu CI targets run with "git diff --no-index"
>> would be a nice cherry on top,
>
> Why would this be a nice cherry on top?
>
> From my perspective, it would co-opt unrelated test cases into the task of
> validating `diff --no-index`' correctness.
>
> Such a loss of focus in test cases makes it harder to diagnose, debug and
> fix breakages. And it would mean that a single bug could make gazillions
> of test cases fail. That would be bad practice, of course.

Yeah, probably not worth it. It would illustrate which
--ignore-cr-at-eol is just due to test_cmp v.s. test_cmp_bin semantics
on *nix though. E.g. in addition to the below this makes things pass
without --ignore-cr-at-eol for me (aside from the t0001-init.sh failure
I noted in the v2 feedback):

	diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh
	index 7b5423eebda..f2e6b7a078c 100755
	--- a/t/t0061-run-command.sh
	+++ b/t/t0061-run-command.sh
	@@ -72,7 +72,7 @@ test_expect_success 'run_command does not try to execute a directory' '
	 
	 	PATH=$PWD/bin1:$PWD/bin2:$PATH \
	 		test-tool run-command run-command greet >actual 2>err &&
	-	test_cmp bin2/greet actual &&
	+	test_cmp_bin bin2/greet actual &&
	 	test_must_be_empty err
	 '
	 
	@@ -89,7 +89,7 @@ test_expect_success POSIXPERM 'run_command passes over non-executable file' '
	 
	 	PATH=$PWD/bin1:$PWD/bin2:$PATH \
	 		test-tool run-command run-command greet >actual 2>err &&
	-	test_cmp bin2/greet actual &&
	+	test_cmp_bin bin2/greet actual &&
	 	test_must_be_empty err
	 '
	 
	diff --git a/t/t4120-apply-popt.sh b/t/t4120-apply-popt.sh
	index 497b62868d4..4001ac24adf 100755
	--- a/t/t4120-apply-popt.sh
	+++ b/t/t4120-apply-popt.sh
	@@ -83,7 +83,7 @@ test_expect_success 'apply (-p2) diff, rename' '
	 	cp file1.saved file1 &&
	 	rm -f file2 &&
	 	git apply -p2 patch.rename &&
	-	test_cmp expected file2
	+	test_cmp_bin expected file2
	 '
	 
	 test_done
	diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
	index 5d853225a93..4c64396ed15 100644
	--- a/t/test-lib-functions.sh
	+++ b/t/test-lib-functions.sh
	@@ -1163,9 +1163,9 @@ test_cmp () {
	 		if test -n "$negate"
	 		then
	 			test_expect_code 1 env GIT_DIR=/dev/null \
	-					 git diff --no-index --ignore-cr-at-eol -- "$@"
	+					 git diff --no-index -- "$@"
	 		else
	-			GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol -- "$@"
	+			GIT_DIR=/dev/null git diff --no-index -- "$@"
	 		fi
	 	else
	 		if test -n "$negate"


>>  * If we're trusting "git diff --no-index" to run the tests, we could
>>    also get rid of "GIT_TEST_CMP_USE_COPIED_CONTEXT", whose only reason
>>    for existing is to support a system "diff" that doesn't understand
>>    "-u" (squashable diff below)
>
> Be my guest to contribute this once the current patch has made it to
> `next`. But please only then, we have enough friction on the Git mailing
> list and do not need to go out of our way to add more.

Yeah, we can leave the nice-to-haves for later, a "conflates files being
different with segfaults" really should be fixed though. And hopefully
the below helps.

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index 1f6b9b08d1d..d26ed200cbf 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -304,7 +304,7 @@ check_access_log() {
 	sort "$1" >"$1".sorted &&
 	strip_access_log >access.log.stripped &&
 	sort access.log.stripped >access.log.sorted &&
-	if ! test_cmp "$1".sorted access.log.sorted
+	if test_cmp ! "$1".sorted access.log.sorted
 	then
 		test_cmp "$1" access.log.stripped
 	fi
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index abecd75e4e4..6e1e6c056ff 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -751,7 +751,7 @@ test_expect_success 'process filter should restart after unexpected write failur
 		test_cmp_committed_rot13 "$TEST_ROOT/test2.o" test2.r &&
 
 		# Smudge failed
-		! test_cmp smudge-write-fail.o smudge-write-fail.r &&
+		test_cmp ! smudge-write-fail.o smudge-write-fail.r &&
 		rot13.sh <smudge-write-fail.o >expected &&
 		git cat-file blob :smudge-write-fail.r >actual &&
 		test_cmp expected actual
diff --git a/t/t0450-txt-doc-vs-help.sh b/t/t0450-txt-doc-vs-help.sh
index cd3969e852b..dfab275dbe6 100755
--- a/t/t0450-txt-doc-vs-help.sh
+++ b/t/t0450-txt-doc-vs-help.sh
@@ -14,7 +14,7 @@ test_expect_success 'setup: list of builtins' '
 
 test_expect_success 'list of txt and help mismatches is sorted' '
 	sort -u "$TEST_DIRECTORY"/t0450/txt-help-mismatches >expect &&
-	if ! test_cmp expect "$TEST_DIRECTORY"/t0450/txt-help-mismatches
+	if test_cmp ! expect "$TEST_DIRECTORY"/t0450/txt-help-mismatches
 	then
 		BUG "please keep the list of txt and help mismatches sorted"
 	fi
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 801919009e1..d11980916b0 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -1214,7 +1214,7 @@ test_expect_success 'checkout-index with folders' '
 	# entry is a sparse directory.
 	run_on_all test_must_fail git checkout-index -f -- folder1/ &&
 	test_cmp full-checkout-err sparse-checkout-err &&
-	! test_cmp full-checkout-err sparse-index-err &&
+	test_cmp ! full-checkout-err sparse-index-err &&
 	grep "is a sparse directory" sparse-index-err
 '
 
diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
index cf58cf025cd..14b6b58f531 100755
--- a/t/t1400-update-ref.sh
+++ b/t/t1400-update-ref.sh
@@ -1403,7 +1403,7 @@ test_expect_success 'handle per-worktree refs in refs/bisect' '
 	test_must_fail git rev-parse refs/bisect/something &&
 	git update-ref refs/bisect/something HEAD &&
 	git rev-parse refs/bisect/something >main-head &&
-	! test_cmp main-head worktree-head
+	test_cmp ! main-head worktree-head
 '
 
 test_expect_success 'transaction handles empty commit' '
diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh
index 7f605f865b3..dafb7860451 100755
--- a/t/t3200-branch.sh
+++ b/t/t3200-branch.sh
@@ -778,7 +778,7 @@ test_expect_success SYMLINKS 'git branch -m with symlinked .git/refs' '
 
 	git -C subdir rev-parse --absolute-git-dir >subdir.dir &&
 	git rev-parse --absolute-git-dir >our.dir &&
-	! test_cmp subdir.dir our.dir &&
+	test_cmp ! subdir.dir our.dir &&
 
 	git -C subdir log &&
 	git -C subdir branch rename-src &&
diff --git a/t/t3207-branch-submodule.sh b/t/t3207-branch-submodule.sh
index fe72b247164..d2fba236a19 100755
--- a/t/t3207-branch-submodule.sh
+++ b/t/t3207-branch-submodule.sh
@@ -155,7 +155,7 @@ test_expect_success 'should create branches if branch exists and --force is give
 		test_cmp expected actual-new-branch-a &&
 		# assert that branch --force actually moved the sub
 		# branch
-		! test_cmp expected actual-old-branch-a
+		test_cmp ! expected actual-old-branch-a
 	)
 '
 
diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
index de1da4673da..3e7ceb27f3c 100755
--- a/t/t4014-format-patch.sh
+++ b/t/t4014-format-patch.sh
@@ -2136,8 +2136,8 @@ test_expect_success 'format-patch --base' '
 	awk "{print \"prerequisite-patch-id:\", \$1}" <patch.id.raw >>fail &&
 
 	signature >>fail &&
-	! test_cmp fail actual1 &&
-	! test_cmp fail actual2
+	test_cmp ! fail actual1 &&
+	test_cmp ! fail actual2
 '
 
 test_expect_success 'format-patch --base errors out when base commit is in revision list' '
diff --git a/t/t4125-apply-ws-fuzz.sh b/t/t4125-apply-ws-fuzz.sh
index 090987c89b2..d2e61579d0a 100755
--- a/t/t4125-apply-ws-fuzz.sh
+++ b/t/t4125-apply-ws-fuzz.sh
@@ -93,7 +93,7 @@ test_expect_success 'withfix (backward)' '
 	sed -n -e /h/p file-fixed >fixed-tail &&
 	sed -n -e /h/p file >file-tail &&
 
-	! test_cmp fixed-tail file-tail
+	test_cmp ! fixed-tail file-tail
 
 '
 
diff --git a/t/t4150-am.sh b/t/t4150-am.sh
index cdad4b68807..8028aa2b712 100755
--- a/t/t4150-am.sh
+++ b/t/t4150-am.sh
@@ -837,7 +837,7 @@ test_expect_success 'am without --committer-date-is-author-date' '
 	git cat-file commit HEAD | sed -e "/^\$/q" >head1 &&
 	sed -ne "/^author /s/.*> //p" head1 >at &&
 	sed -ne "/^committer /s/.*> //p" head1 >ct &&
-	! test_cmp at ct
+	test_cmp ! at ct
 '
 
 # This checks for +0000 because TZ is set to UTC and that should
diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
index a7fa94ce0a2..edc11213a10 100755
--- a/t/t4204-patch-id.sh
+++ b/t/t4204-patch-id.sh
@@ -74,7 +74,7 @@ test_expect_success 'patch-id detects equality' '
 test_expect_success 'patch-id detects inequality' '
 	get_patch_id main &&
 	get_patch_id notsame &&
-	! test_cmp patch-id_main patch-id_notsame
+	test_cmp ! patch-id_main patch-id_notsame
 '
 test_expect_success 'patch-id detects equality binary' '
 	cat >.gitattributes <<-\EOF &&
@@ -100,7 +100,7 @@ test_expect_success 'patch-id detects inequality binary' '
 	EOF
 	get_patch_id main &&
 	get_patch_id notsame &&
-	! test_cmp patch-id_main patch-id_notsame &&
+	test_cmp ! patch-id_main patch-id_notsame &&
 	test_when_finished "rm .gitattributes"
 '
 
@@ -126,7 +126,7 @@ cmp_patch_id () {
 	if
 		test "$1" = "relevant"
 	then
-		! test_cmp patch-id_"$2" patch-id_"$3"
+		test_cmp ! patch-id_"$2" patch-id_"$3"
 	else
 		test_cmp patch-id_"$2" patch-id_"$3"
 	fi
@@ -279,7 +279,7 @@ test_expect_success 'patch-id handles no-nl-at-eof markers' '
 	test_cmp patch-id_nonl patch-id_withnl &&
 	calc_patch_id nonl-inc-ws --verbatim <nonl &&
 	calc_patch_id withnl-inc-ws --verbatim <withnl &&
-	! test_cmp patch-id_nonl-inc-ws patch-id_withnl-inc-ws
+	test_cmp ! patch-id_nonl-inc-ws patch-id_withnl-inc-ws
 '
 
 test_expect_success 'patch-id handles diffs with one line of before/after' '
diff --git a/t/t5200-update-server-info.sh b/t/t5200-update-server-info.sh
index ed9dfd624c7..06e4ca7bdca 100755
--- a/t/t5200-update-server-info.sh
+++ b/t/t5200-update-server-info.sh
@@ -26,7 +26,7 @@ test_expect_success 'info/refs is not needlessly overwritten' '
 test_expect_success 'info/refs can be forced to update' '
 	git update-server-info -f &&
 	test-tool chmtime --get .git/info/refs >b &&
-	! test_cmp a b
+	test_cmp ! a b
 '
 
 test_expect_success 'info/refs updates when changes are made' '
@@ -36,7 +36,7 @@ test_expect_success 'info/refs updates when changes are made' '
 	git update-ref refs/heads/foo HEAD &&
 	git update-server-info &&
 	test-tool chmtime --get .git/info/refs >b &&
-	! test_cmp a b
+	test_cmp ! a b
 '
 
 test_done
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index 6d693eef82f..9718f0ab12a 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -400,7 +400,7 @@ test_bitmap_cases () {
 			test-tool bitmap list-commits | sort >bitmaps &&
 			comm -13 bitmaps commits >after &&
 
-			! test_cmp before after
+			test_cmp ! before after
 		)
 	'
 
diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh
index 0882cbb6e4a..338ccd32fac 100755
--- a/t/t5326-multi-pack-bitmaps.sh
+++ b/t/t5326-multi-pack-bitmaps.sh
@@ -186,7 +186,7 @@ test_midx_bitmap_cases () {
 			test-tool bitmap list-commits | sort >bitmaps &&
 			comm -13 bitmaps commits >after &&
 
-			! test_cmp before after
+			test_cmp ! before after
 		)
 	'
 
@@ -268,7 +268,7 @@ test_midx_bitmap_cases () {
 			test-tool bitmap list-commits | sort >bitmaps &&
 			comm -13 bitmaps commits >after &&
 
-			! test_cmp before after
+			test_cmp ! before after
 		)
 	'
 
diff --git a/t/t5512-ls-remote.sh b/t/t5512-ls-remote.sh
index 20d063fb9ae..7037335533c 100755
--- a/t/t5512-ls-remote.sh
+++ b/t/t5512-ls-remote.sh
@@ -111,7 +111,7 @@ test_expect_success 'use "origin" when no remote specified and multiple found' '
 
 test_expect_success 'suppress "From <url>" with -q' '
 	git ls-remote -q 2>actual_err &&
-	! test_cmp exp_err actual_err
+	test_cmp ! exp_err actual_err
 '
 
 test_expect_success 'use branch.<name>.remote if possible' '
diff --git a/t/t5612-clone-refspec.sh b/t/t5612-clone-refspec.sh
index 3126cfd7e9d..eeef68c9f3d 100755
--- a/t/t5612-clone-refspec.sh
+++ b/t/t5612-clone-refspec.sh
@@ -90,7 +90,7 @@ test_expect_success 'by default no tags will be kept updated' '
 		git for-each-ref refs/tags >../actual
 	) &&
 	git for-each-ref refs/tags >expect &&
-	! test_cmp expect actual &&
+	test_cmp ! expect actual &&
 	test_line_count = 2 actual
 '
 
diff --git a/t/t6006-rev-list-format.sh b/t/t6006-rev-list-format.sh
index 41d0ca00b1c..c927b0d9e62 100755
--- a/t/t6006-rev-list-format.sh
+++ b/t/t6006-rev-list-format.sh
@@ -538,7 +538,7 @@ test_expect_success '--abbrev' '
 	sed -e "s/$OID_REGEX/LONG/g" -e "s/$_x05/SHORT/g" <actual3 >fuzzy3 &&
 	test_cmp expect2 fuzzy2 &&
 	test_cmp expect3 fuzzy3 &&
-	! test_cmp actual1 actual2
+	test_cmp ! actual1 actual2
 '
 
 test_expect_success '%H is not affected by --abbrev-commit' '
diff --git a/t/t7107-reset-pathspec-file.sh b/t/t7107-reset-pathspec-file.sh
index 523efbecde1..d4819317649 100755
--- a/t/t7107-reset-pathspec-file.sh
+++ b/t/t7107-reset-pathspec-file.sh
@@ -24,7 +24,7 @@ verify_expect () {
 	git status --porcelain -- fileA.t fileB.t fileC.t fileD.t >actual &&
 	if test "x$1" = 'x!'
 	then
-		! test_cmp expect actual
+		test_cmp ! expect actual
 	else
 		test_cmp expect actual
 	fi
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index eae6a46ef3d..904f91fc4bc 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -451,7 +451,7 @@ test_expect_success 'status from subdirectory should have the same SHA1' '
 		git submodule status >output &&
 		awk "{print \$1}" <output >expect2 &&
 		test_cmp expect2 actual2 &&
-		! test_cmp actual actual2
+		test_cmp ! actual actual2
 	)
 '
 
diff --git a/t/t7508-status.sh b/t/t7508-status.sh
index 2b7ef6c41a4..fb3d51de13d 100755
--- a/t/t7508-status.sh
+++ b/t/t7508-status.sh
@@ -1510,7 +1510,7 @@ test_expect_success '"status.branch=true" same as "-b"' '
 test_expect_success '"status.branch=true" different from "--no-branch"' '
 	git status -s --no-branch  >expected_nobranch &&
 	git -c status.branch=true status -s >actual &&
-	! test_cmp expected_nobranch actual
+	test_cmp ! expected_nobranch actual
 '
 
 test_expect_success '"status.branch=true" weaker than "--no-branch"' '
diff --git a/t/t7600-merge.sh b/t/t7600-merge.sh
index 7c3f6ed9943..acdf91a77a2 100755
--- a/t/t7600-merge.sh
+++ b/t/t7600-merge.sh
@@ -1059,7 +1059,7 @@ test_expect_success 'merge --quit' '
 		test_path_is_missing .git/MERGE_MSG &&
 		git rerere status >rerere.after &&
 		test_must_be_empty rerere.after &&
-		! test_cmp rerere.after rerere.before
+		test_cmp ! rerere.after rerere.before
 	)
 '
 
diff --git a/t/t7606-merge-custom.sh b/t/t7606-merge-custom.sh
index 81fb7c474c1..687dbcc9d23 100755
--- a/t/t7606-merge-custom.sh
+++ b/t/t7606-merge-custom.sh
@@ -55,7 +55,7 @@ test_expect_success 'merge c2 with a custom strategy' '
 	git diff --exit-code c2 HEAD &&
 	git diff --exit-code c2 &&
 
-	! test_cmp head.old head.new &&
+	test_cmp ! head.old head.new &&
 	test_cmp head.old first-parent &&
 	test_cmp second-parent.expected second-parent &&
 	test_cmp tree.expected tree &&
@@ -81,7 +81,7 @@ test_expect_success 'trivial merge with custom strategy' '
 	git diff --exit-code c3 HEAD &&
 	git diff --exit-code c3 &&
 
-	! test_cmp head.old head.new &&
+	test_cmp ! head.old head.new &&
 	test_cmp head.old first-parent &&
 	test_cmp second-parent.expected second-parent &&
 	test_cmp tree.expected tree &&
diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh
index 5be483bf887..a1d62e8566c 100755
--- a/t/t7700-repack.sh
+++ b/t/t7700-repack.sh
@@ -365,7 +365,7 @@ test_expect_success '--write-midx with preferred bitmap tips' '
 		test-tool bitmap list-commits | sort >bitmaps &&
 		comm -13 bitmaps commits >after &&
 
-		! test_cmp before after
+		test_cmp ! before after
 	)
 '
 
diff --git a/t/t8013-blame-ignore-revs.sh b/t/t8013-blame-ignore-revs.sh
index b18633dee1b..d8f26d1197b 100755
--- a/t/t8013-blame-ignore-revs.sh
+++ b/t/t8013-blame-ignore-revs.sh
@@ -184,10 +184,10 @@ test_expect_success mark_ignored_lines '
 	test_cmp expect actual &&
 
 	sed -n "3p" blame_raw | cut -c1 >actual &&
-	! test_cmp expect actual &&
+	test_cmp ! expect actual &&
 
 	sed -n "4p" blame_raw | cut -c1 >actual &&
-	! test_cmp expect actual
+	test_cmp ! expect actual
 '
 
 # For ignored revs that added 'unblamable' lines and more recent commits changed
diff --git a/t/t9164-git-svn-dcommit-concurrent.sh b/t/t9164-git-svn-dcommit-concurrent.sh
index 1465156072e..53f240e45cf 100755
--- a/t/t9164-git-svn-dcommit-concurrent.sh
+++ b/t/t9164-git-svn-dcommit-concurrent.sh
@@ -94,7 +94,7 @@ test_expect_success 'check if post-commit hook creates a concurrent commit' '
 		echo 1 >> file &&
 		svn_cmd commit -m "changing file" &&
 		svn_cmd up &&
-		! test_cmp auto_updated_file au_file_saved
+		test_cmp ! auto_updated_file au_file_saved
 	)
 '
 
diff --git a/t/t9351-fast-export-anonymize.sh b/t/t9351-fast-export-anonymize.sh
index 77047e250dc..29a2d3a03a7 100755
--- a/t/t9351-fast-export-anonymize.sh
+++ b/t/t9351-fast-export-anonymize.sh
@@ -133,7 +133,7 @@ test_expect_success 'idents are shared' '
 	git log --all --format="%cn <%ce>" >committers &&
 	sort -u committers >unique &&
 	test_line_count = 1 unique &&
-	! test_cmp authors committers
+	test_cmp ! authors committers
 '
 
 test_done
diff --git a/t/t9810-git-p4-rcs.sh b/t/t9810-git-p4-rcs.sh
index 5fe83315ecd..6fdfa7d5855 100755
--- a/t/t9810-git-p4-rcs.sh
+++ b/t/t9810-git-p4-rcs.sh
@@ -68,7 +68,7 @@ scrub_k_check () {
 	file="$1" &&
 	scrub="$TRASH_DIRECTORY/$file" &&
 	"$PYTHON_PATH" "$TRASH_DIRECTORY/scrub_k.py" <"$git/$file" >"$scrub" &&
-	! test_cmp "$cli/$file" "$scrub" &&
+	test_cmp ! "$cli/$file" "$scrub" &&
 	test_cmp "$git/$file" "$scrub" &&
 	rm "$scrub"
 }
@@ -76,7 +76,7 @@ scrub_ko_check () {
 	file="$1" &&
 	scrub="$TRASH_DIRECTORY/$file" &&
 	"$PYTHON_PATH" "$TRASH_DIRECTORY/scrub_ko.py" <"$git/$file" >"$scrub" &&
-	! test_cmp "$cli/$file" "$scrub" &&
+	test_cmp ! "$cli/$file" "$scrub" &&
 	test_cmp "$git/$file" "$scrub" &&
 	rm "$scrub"
 }
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 796093a7b32..5d853225a93 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1148,8 +1148,33 @@ test_expect_code () {
 # - not all diff versions understand "-u"
 
 test_cmp () {
-	test "$#" -ne 2 && BUG "2 param"
-	eval "$GIT_TEST_CMP" '"$@"'
+	test "$#" -lt 2 && BUG "2..3 param"
+	test "$#" -gt 3 && BUG "2..3 param"
+
+	local negate=
+	if test "$#" = 3 && test "$1" = "!"
+	then
+		negate=t
+		shift
+	fi
+
+	if test "$GIT_TEST_CMP" = "builtin:git-no-index"
+	then
+		if test -n "$negate"
+		then
+			test_expect_code 1 env GIT_DIR=/dev/null \
+					 git diff --no-index --ignore-cr-at-eol -- "$@"
+		else
+			GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol -- "$@"
+		fi
+	else
+		if test -n "$negate"
+		then
+			! "$GIT_TEST_CMP" "$@"
+		else
+			"$GIT_TEST_CMP" "$@"
+		fi
+	fi
 }
 
 # Check that the given config key has the expected value.
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 6db377f68b8..3028283f307 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1721,7 +1721,7 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
+	GIT_TEST_CMP=builtin:git-no-index
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14 14:06       ` [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
@ 2022-11-14 22:40         ` Taylor Blau
  2022-11-18 13:32           ` Johannes Schindelin
  0 siblings, 1 reply; 58+ messages in thread
From: Taylor Blau @ 2022-11-14 22:40 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget
  Cc: git, René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

On Mon, Nov 14, 2022 at 02:06:52PM +0000, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> It is more performant to run `git diff --no-index` than running the
> `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> Windows uses. And a lot more readable.
>
> The original reason why Git's test suite needs the `mingw_test_cmp`
> function at all (and why `cmp` is not good enough) is that Git's test
> suite is not actually trying to compare binary files when it calls
> `test_cmp`, but it compares text files. And those text files can contain
> CR/LF line endings depending on the circumstances.
>
> Note: The original fix in the Git for Windows project implemented a test
> helper that avoids the overhead of the diff machinery, in favor of
> implementing a behavior that is more in line with what `mingw_test_cmp`
> does now. This was done to minimize the risk in using something as
> complex as the diff machinery to perform something as simple as
> determining whether text output is identical to the expected output or
> not. This approach has served Git for Windows well for years, but the
> attempt to upstream this saw a lot of backlash and distractions during
> the review, was disliked by the Git maintainer and was therefore
> abandoned. For full details, see the thread at
> https://lore.kernel.org/git/pull.1309.git.1659106382128.gitgitgadget@gmail.com/t

In the previous round, you wrote that this paragraph:

    It explains why we replace a relatively simple logic with a relatively
    complex one.

But I'm not sure the rewritten version does what you claim, at least in
my own personal opinion.

It is not helpful to say the original approach "saw a lot of backlash".
It is helpful, on the other hand, to say what about the original
approach gave reviewers pause, and why that alternative approach isn't
pursued here.

Maybe I'm splitting hairs here, but I really do stand by my original
suggestion from back in [1].

Thanks,
Taylor

[1]: https://lore.kernel.org/git/Y3B36HjDJhIY5jNz@nand.local/

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14 22:40         ` Taylor Blau
@ 2022-11-18 13:32           ` Johannes Schindelin
  2022-11-18 18:14             ` Taylor Blau
  0 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin @ 2022-11-18 13:32 UTC (permalink / raw)
  To: Taylor Blau
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason

Hi Taylor,

On Mon, 14 Nov 2022, Taylor Blau wrote:

> On Mon, Nov 14, 2022 at 02:06:52PM +0000, Johannes Schindelin via GitGitGadget wrote:
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > It is more performant to run `git diff --no-index` than running the
> > `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
> > Windows uses. And a lot more readable.
> >
> > The original reason why Git's test suite needs the `mingw_test_cmp`
> > function at all (and why `cmp` is not good enough) is that Git's test
> > suite is not actually trying to compare binary files when it calls
> > `test_cmp`, but it compares text files. And those text files can contain
> > CR/LF line endings depending on the circumstances.
> >
> > Note: The original fix in the Git for Windows project implemented a test
> > helper that avoids the overhead of the diff machinery, in favor of
> > implementing a behavior that is more in line with what `mingw_test_cmp`
> > does now. This was done to minimize the risk in using something as
> > complex as the diff machinery to perform something as simple as
> > determining whether text output is identical to the expected output or
> > not. This approach has served Git for Windows well for years, but the
> > attempt to upstream this saw a lot of backlash and distractions during
> > the review, was disliked by the Git maintainer and was therefore
> > abandoned. For full details, see the thread at
> > https://lore.kernel.org/git/pull.1309.git.1659106382128.gitgitgadget@gmail.com/t
>
> In the previous round, you wrote that this paragraph:
>
>     It explains why we replace a relatively simple logic with a relatively
>     complex one.

Yes. And I obviously got myself misunderstood.

The simple logic I refer to is the `t/helper/test-cmp.c` that I abandoned.

The complex logic is the diff machinery we now call, which is a lot more
involved and goes through a lot more code paths.

> But I'm not sure the rewritten version does what you claim, at least in
> my own personal opinion.
>
> It is not helpful to say the original approach "saw a lot of backlash".

It is the nicest thing I can say about it.

And I want people who are curious what happened to Git for Windows' custom
code to have an explanation in the commit messages.

> It is helpful, on the other hand, to say what about the original
> approach gave reviewers pause, and why that alternative approach isn't
> pursued here.
>
> Maybe I'm splitting hairs here, but I really do stand by my original
> suggestion from back in [1].

We can also keep hitting a dead horse, but I don't think that will make
anything any better.

Thanks,
Johannes

>
> Thanks,
> Taylor
>
> [1]: https://lore.kernel.org/git/Y3B36HjDJhIY5jNz@nand.local/
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-18 13:32           ` Johannes Schindelin
@ 2022-11-18 18:14             ` Taylor Blau
  2022-11-20 23:36               ` Johannes Schindelin
  0 siblings, 1 reply; 58+ messages in thread
From: Taylor Blau @ 2022-11-18 18:14 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason

On Fri, Nov 18, 2022 at 02:32:28PM +0100, Johannes Schindelin wrote:
> > But I'm not sure the rewritten version does what you claim, at least in
> > my own personal opinion.
> >
> > It is not helpful to say the original approach "saw a lot of backlash".
>
> It is the nicest thing I can say about it.

I don't think you have to or should refer to the earlier round of review
at all.

> > It is helpful, on the other hand, to say what about the original
> > approach gave reviewers pause, and why that alternative approach isn't
> > pursued here.
> >
> > Maybe I'm splitting hairs here, but I really do stand by my original
> > suggestion from back in [1].
>
> We can also keep hitting a dead horse, but I don't think that will make
> anything any better.

Instead, it would be much more helpful to explain what you tried before,
and why you are taking a different approach now.

I am simply not comfortable with taking the patch with the way the body
is currently written.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-13  4:51       ` Taylor Blau
  2022-11-14 13:34         ` Johannes Schindelin
@ 2022-11-18 23:15         ` Junio C Hamano
  2022-11-19  2:53           ` Taylor Blau
  2022-11-19  8:18           ` Johannes Sixt
  1 sibling, 2 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-11-18 23:15 UTC (permalink / raw)
  To: Taylor Blau
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason, Johannes Schindelin

Taylor Blau <me@ttaylorr.com> writes:

> One thing that the commit message doesn't allude to (that is covered in
> the earlier discussion) is why it is important to pass
> `--ignore-cr-at-eol`. I think that is worth mentioning here.

Isn't it because Git on the platform is expected to use CRLF in
certain places, unlike on other platforms where LF is used, but the
platform port hasn't adjusted tests to match that expectation?  And
vice versa, where Git is expected to produce LF terminated text
everywhere but the expected output is not "ported" to force LF
termination and instead produces CRLF terminated text on platforms
whose native line ending is CRLF?

Use of "ignore-cr-at-eol" may allow such tests that are not ported
correctly to prepare expected output with a "wrong" line ending and
still pass, and I do think it may be an expedite way to make tests
appear to pass.

But I worry that it may not be a good thing for the health of the
Windows port in the longer term.

When Git is expected to produce platform-native line endings, if a
test is not adjusted to expect platform-native output and instead
uses the ignore-cr-at-eol workaround, a future breakage that makes
Git to produce LF terminated lines everywhere would not be caught,
would it?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-14 15:23           ` Ævar Arnfjörð Bjarmason
@ 2022-11-18 23:19             ` Junio C Hamano
  2022-11-19  2:56               ` Taylor Blau
  0 siblings, 1 reply; 58+ messages in thread
From: Junio C Hamano @ 2022-11-18 23:19 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Johannes Schindelin, Johannes Schindelin via GitGitGadget, git,
	René Scharfe, Eric Sunshine

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> IOW this bit in t/README:
>
>  - Don't use '! git cmd' when you want to make sure the git command
>    exits with failure in a controlled way by calling "die()".[...]
>
> So, before this series a test like this on Windows, with
> GIT_TEST_CMP=mingw_test_cmp:
>
> 	! test_cmp foo bar
>
> Would only succeed if "foo" and "bar" weren't identical, now it'll also
> succeed if 'git diff --no-index' happens to segfault on those inputs.

Well "! test_cmp" is wrong anyway, because it _expects_ two files
are the same and gives more detailed diagnosis when they differ by
giving "diff" output.

If you expect them to be different, "! test_cmp" would give
"detailed diagnosis" in the wrong case, i.e. the outcome is what we
expect.

So the caller must do "test_cmp !" whether the underlying
implementation of test_cmp uses "diff -u" or "diff --no-index".

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-18 23:15         ` Junio C Hamano
@ 2022-11-19  2:53           ` Taylor Blau
  2022-11-19 12:03             ` Ævar Arnfjörð Bjarmason
  2022-11-19  8:18           ` Johannes Sixt
  1 sibling, 1 reply; 58+ messages in thread
From: Taylor Blau @ 2022-11-19  2:53 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Taylor Blau, Johannes Schindelin via GitGitGadget, git,
	René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

On Fri, Nov 18, 2022 at 03:15:08PM -0800, Junio C Hamano wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > One thing that the commit message doesn't allude to (that is covered in
> > the earlier discussion) is why it is important to pass
> > `--ignore-cr-at-eol`. I think that is worth mentioning here.
>
> Isn't it because Git on the platform is expected to use CRLF in
> certain places, unlike on other platforms where LF is used, but the
> platform port hasn't adjusted tests to match that expectation?  And
> vice versa, where Git is expected to produce LF terminated text
> everywhere but the expected output is not "ported" to force LF
> termination and instead produces CRLF terminated text on platforms
> whose native line ending is CRLF?

Yes, I think that's right. My suggestion to Johannes was to (a) make
sure that your and my understanding is correct, and (b) to memorialize
that understanding in the commit message itself.

> Use of "ignore-cr-at-eol" may allow such tests that are not ported
> correctly to prepare expected output with a "wrong" line ending and
> still pass, and I do think it may be an expedite way to make tests
> appear to pass.
>
> But I worry that it may not be a good thing for the health of the
> Windows port in the longer term.

I share your concerns, too.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-18 23:19             ` Junio C Hamano
@ 2022-11-19  2:56               ` Taylor Blau
  2022-11-19 11:54                 ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 58+ messages in thread
From: Taylor Blau @ 2022-11-19  2:56 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Johannes Schindelin,
	Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Eric Sunshine

On Fri, Nov 18, 2022 at 03:19:55PM -0800, Junio C Hamano wrote:
> Well "! test_cmp" is wrong anyway, because it _expects_ two files
> are the same and gives more detailed diagnosis when they differ by
> giving "diff" output.
>
> If you expect them to be different, "! test_cmp" would give
> "detailed diagnosis" in the wrong case, i.e. the outcome is what we
> expect.

I agree that "! test_cmp foo bar" will give output about how "foo" is
different from "bar" (and halt the test only when the two have the same
contents).

> So the caller must do "test_cmp !" whether the underlying
> implementation of test_cmp uses "diff -u" or "diff --no-index".

But this confuses me. "git grep 'test_cmp !'" turns up no results, and
furthermore, test_cmp() itself begins with:

    test "$#" -ne 2 && BUG "2 param"

So I am not sure what you are referring to.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-18 23:15         ` Junio C Hamano
  2022-11-19  2:53           ` Taylor Blau
@ 2022-11-19  8:18           ` Johannes Sixt
  2022-11-19 17:50             ` René Scharfe
  2022-11-21  3:13             ` Junio C Hamano
  1 sibling, 2 replies; 58+ messages in thread
From: Johannes Sixt @ 2022-11-19  8:18 UTC (permalink / raw)
  To: Junio C Hamano, Taylor Blau
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason, Johannes Schindelin

Am 19.11.22 um 00:15 schrieb Junio C Hamano:
> Taylor Blau <me@ttaylorr.com> writes:
> 
>> One thing that the commit message doesn't allude to (that is covered in
>> the earlier discussion) is why it is important to pass
>> `--ignore-cr-at-eol`. I think that is worth mentioning here.
> 
> Isn't it because Git on the platform is expected to use CRLF in
> certain places, unlike on other platforms where LF is used, but the
> platform port hasn't adjusted tests to match that expectation?  And
> vice versa, where Git is expected to produce LF terminated text
> everywhere but the expected output is not "ported" to force LF
> termination and instead produces CRLF terminated text on platforms
> whose native line ending is CRLF?
> 
> Use of "ignore-cr-at-eol" may allow such tests that are not ported
> correctly to prepare expected output with a "wrong" line ending and
> still pass, and I do think it may be an expedite way to make tests
> appear to pass.

The reason that mingw_test_cmp exists is not that Git isn't ported
correctly, or that tests aren't ported correctly. The reason is that
tests assume Unix LF line endings everywhere, but there are some tools
that are outside our control that randomly -- to the layman's eye --
produce CRLF line endings even when their input has LF style.

For example, when we post-process Git output with `sed`, the result
suddenly has CRLF line endings instead of LF that the input had.

> When Git is expected to produce platform-native line endings,...
There is no such requirement or expectation on Windows. LF style is
acceptable.

-- Hannes


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19  2:56               ` Taylor Blau
@ 2022-11-19 11:54                 ` Ævar Arnfjörð Bjarmason
  2022-11-21  3:17                   ` Junio C Hamano
  0 siblings, 1 reply; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-11-19 11:54 UTC (permalink / raw)
  To: Taylor Blau
  Cc: Junio C Hamano, Johannes Schindelin,
	Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Eric Sunshine

On Fri, Nov 18 2022, Taylor Blau wrote:

> On Fri, Nov 18, 2022 at 03:19:55PM -0800, Junio C Hamano wrote:
>> Well "! test_cmp" is wrong anyway, because it _expects_ two files
>> are the same and gives more detailed diagnosis when they differ by
>> giving "diff" output.
>>
>> If you expect them to be different, "! test_cmp" would give
>> "detailed diagnosis" in the wrong case, i.e. the outcome is what we
>> expect.
>
> I agree that "! test_cmp foo bar" will give output about how "foo" is
> different from "bar" (and halt the test only when the two have the same
> contents).
>
>> So the caller must do "test_cmp !" whether the underlying
>> implementation of test_cmp uses "diff -u" or "diff --no-index".
>
> But this confuses me. "git grep 'test_cmp !'" turns up no results, and
> furthermore, test_cmp() itself begins with:
>
>     test "$#" -ne 2 && BUG "2 param"
>
> So I am not sure what you are referring to.

I think we've got got a bit sidetracked here.

Junio's pointing out that a test that would do:

	! diff a b

Is pretty stupid, why would you want to show a diff of differences, if
you're expecting it to be different? Just use:

	! cmp a b

Or something. That's correct. So I think probably those "! test_cmp"
uses would be good candidates for some clean-up to make the test suite
pretty.

But what I'm pointing out is that by having "test_cmp" be something that
invokes "git" (or a helper) we *must not* negate it using the shell's
negation, we have to use "test_must_fail" for anything we build
ourselves.

So, the proposed patch would make the existing cases where we do:

	! test_cmp

Cases where we'd hide a segfault, whereas before it was maybe a bit odd,
but harmless, and working as intended in the sense of reliably passing
if we had differences.

So no, nothing currently does "test_cmp !", I was suggesting that
upthread as the least invasive way of having "test_cmp" safe'd under
negation, now that we were having it call out to "git diff" (or a
helper...). It's the pattern we use for our own test helpers that invoke
our own binaries (and if we don't, that's a bug).

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19  2:53           ` Taylor Blau
@ 2022-11-19 12:03             ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-11-19 12:03 UTC (permalink / raw)
  To: Taylor Blau
  Cc: Junio C Hamano, Johannes Schindelin via GitGitGadget, git,
	René Scharfe, Johannes Schindelin

On Fri, Nov 18 2022, Taylor Blau wrote:

> On Fri, Nov 18, 2022 at 03:15:08PM -0800, Junio C Hamano wrote:
>> Taylor Blau <me@ttaylorr.com> writes:
>>
>> > One thing that the commit message doesn't allude to (that is covered in
>> > the earlier discussion) is why it is important to pass
>> > `--ignore-cr-at-eol`. I think that is worth mentioning here.
>>
>> Isn't it because Git on the platform is expected to use CRLF in
>> certain places, unlike on other platforms where LF is used, but the
>> platform port hasn't adjusted tests to match that expectation?  And
>> vice versa, where Git is expected to produce LF terminated text
>> everywhere but the expected output is not "ported" to force LF
>> termination and instead produces CRLF terminated text on platforms
>> whose native line ending is CRLF?
>
> Yes, I think that's right. My suggestion to Johannes was to (a) make
> sure that your and my understanding is correct, and (b) to memorialize
> that understanding in the commit message itself.

It's not right, but mostly right. I.e. the "--ignore-cr-at-eol" is
surely needed to avoid a deluge of errors on Windows, but as I noted in
[1] you'll also find that the tests don't pass on a *nix box without it.

Which is apparently because "--ignore-cr-at-eol" is also conflating "is
binary?" with "should I replace \r\n", or something.

>> Use of "ignore-cr-at-eol" may allow such tests that are not ported
>> correctly to prepare expected output with a "wrong" line ending and
>> still pass, and I do think it may be an expedite way to make tests
>> appear to pass.
>>
>> But I worry that it may not be a good thing for the health of the
>> Windows port in the longer term.
>
> I share your concerns, too.

I don't really care about that, and would tend to defer to one or more
Johannes in this thread on that question.

But as noted in [1] the upthread patch seems to be replacing some
existing newline munging with not-quite-the-same "diff" behavior.

Which I think is something that needs addressing, either by the commit
explaining why that's desired & OK.

Which is also a good reason to pick one of our *nix CI jobs and run it
with "git diff --no-index", but without "--ignore-cr-at-eol", as I
suggested. It would tease out exactly that difference, both for current
and future tests.

1. https://lore.kernel.org/git/221114.86pmdplbs5.gmgdl@evledraar.gmail.com/

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19  8:18           ` Johannes Sixt
@ 2022-11-19 17:50             ` René Scharfe
  2022-11-20  9:29               ` Torsten Bögershausen
  2022-11-21 17:49               ` Johannes Sixt
  2022-11-21  3:13             ` Junio C Hamano
  1 sibling, 2 replies; 58+ messages in thread
From: René Scharfe @ 2022-11-19 17:50 UTC (permalink / raw)
  To: Johannes Sixt, Junio C Hamano, Taylor Blau
  Cc: Johannes Schindelin via GitGitGadget, git,
	Ævar Arnfjörð Bjarmason, Johannes Schindelin

Am 19.11.2022 um 09:18 schrieb Johannes Sixt:
>
> The reason that mingw_test_cmp exists is not that Git isn't ported
> correctly, or that tests aren't ported correctly. The reason is that
> tests assume Unix LF line endings everywhere, but there are some tools
> that are outside our control that randomly -- to the layman's eye --
> produce CRLF line endings even when their input has LF style.
>
> For example, when we post-process Git output with `sed`, the result
> suddenly has CRLF line endings instead of LF that the input had.

Actually I see the opposite behavior -- sed eats CRs on an up-to-date
Git for Windows SDK:

   $ uname -s
   MINGW64_NT-10.0-22621

   $ printf 'a\r\n' | hexdump.exe -C
   00000000  61 0d 0a                                          |a..|
   00000003

   $ printf 'a\r\n' | sed '' | hexdump.exe -C
   00000000  61 0a                                             |a.|
   00000002

And with the following patch on top of eea7033409 (The twelfth batch,
2022-11-14) the test suite passes for me -- just one case of grep
stealing CRs seems to need adjustment to make mingw_test_cmp
unnecessary:

 t/t3920-crlf-messages.sh | 2 +-
 t/test-lib.sh            | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/t/t3920-crlf-messages.sh b/t/t3920-crlf-messages.sh
index 4c661d4d54..353b1a550e 100755
--- a/t/t3920-crlf-messages.sh
+++ b/t/t3920-crlf-messages.sh
@@ -12,7 +12,7 @@ create_crlf_ref () {
 	cat >.crlf-orig-$branch.txt &&
 	cat .crlf-orig-$branch.txt | append_cr >.crlf-message-$branch.txt &&
 	grep 'Subject' .crlf-orig-$branch.txt | tr '\n' ' ' | sed 's/[ ]*$//' | tr -d '\n' >.crlf-subject-$branch.txt &&
-	grep 'Body' .crlf-message-$branch.txt >.crlf-body-$branch.txt || true &&
+	grep 'Body' .crlf-orig-$branch.txt | append_cr >.crlf-body-$branch.txt || true &&
 	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}" &&
 	test_tick &&
 	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 6db377f68b..af5ec357e5 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1721,7 +1721,6 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19 17:50             ` René Scharfe
@ 2022-11-20  9:29               ` Torsten Bögershausen
  2022-11-21 17:49               ` Johannes Sixt
  1 sibling, 0 replies; 58+ messages in thread
From: Torsten Bögershausen @ 2022-11-20  9:29 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Sixt, Junio C Hamano, Taylor Blau,
	Johannes Schindelin via GitGitGadget, git,
	Ævar Arnfjörð Bjarmason, Johannes Schindelin

On Sat, Nov 19, 2022 at 06:50:28PM +0100, René Scharfe wrote:
> Am 19.11.2022 um 09:18 schrieb Johannes Sixt:
> >
> > The reason that mingw_test_cmp exists is not that Git isn't ported
> > correctly, or that tests aren't ported correctly. The reason is that
> > tests assume Unix LF line endings everywhere, but there are some tools
> > that are outside our control that randomly -- to the layman's eye --
> > produce CRLF line endings even when their input has LF style.
> >
> > For example, when we post-process Git output with `sed`, the result
> > suddenly has CRLF line endings instead of LF that the input had.
>
> Actually I see the opposite behavior -- sed eats CRs on an up-to-date
> Git for Windows SDK:
>
>    $ uname -s
>    MINGW64_NT-10.0-22621
>
>    $ printf 'a\r\n' | hexdump.exe -C
>    00000000  61 0d 0a                                          |a..|
>    00000003
>
>    $ printf 'a\r\n' | sed '' | hexdump.exe -C
>    00000000  61 0a                                             |a.|
>    00000002


There is a "-b" option for sed under MINGW;
 -b, --binary
                  open files in binary mode (CR+LFs are not processed specially)

The CRLF handling for sed (and probably grep and awk) had beed changed in cygwin
https://cygwin.com/pipermail/cygwin/2017-June/233133.html

(And I suspect that this rippled into MINGW some day)


>
> And with the following patch on top of eea7033409 (The twelfth batch,
> 2022-11-14) the test suite passes for me -- just one case of grep
> stealing CRs seems to need adjustment to make mingw_test_cmp
> unnecessary:
>
>  t/t3920-crlf-messages.sh | 2 +-
>  t/test-lib.sh            | 1 -
>  2 files changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/t/t3920-crlf-messages.sh b/t/t3920-crlf-messages.sh
> index 4c661d4d54..353b1a550e 100755
> --- a/t/t3920-crlf-messages.sh
> +++ b/t/t3920-crlf-messages.sh
> @@ -12,7 +12,7 @@ create_crlf_ref () {
>  	cat >.crlf-orig-$branch.txt &&
>  	cat .crlf-orig-$branch.txt | append_cr >.crlf-message-$branch.txt &&
>  	grep 'Subject' .crlf-orig-$branch.txt | tr '\n' ' ' | sed 's/[ ]*$//' | tr -d '\n' >.crlf-subject-$branch.txt &&
> -	grep 'Body' .crlf-message-$branch.txt >.crlf-body-$branch.txt || true &&
> +	grep 'Body' .crlf-orig-$branch.txt | append_cr >.crlf-body-$branch.txt || true &&


Talking about grep:
Both grep under Linux, MacOs and MINGW (git bash) seem to have the -U option:
  -U, --binary              do not strip CR characters at EOL (MSDOS/Windows)


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-18 18:14             ` Taylor Blau
@ 2022-11-20 23:36               ` Johannes Schindelin
  2022-11-21  0:07                 ` Taylor Blau
  0 siblings, 1 reply; 58+ messages in thread
From: Johannes Schindelin @ 2022-11-20 23:36 UTC (permalink / raw)
  To: Taylor Blau
  Cc: Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Ævar Arnfjörð Bjarmason

Hi Taylor,

On Fri, 18 Nov 2022, Taylor Blau wrote:

> On Fri, Nov 18, 2022 at 02:32:28PM +0100, Johannes Schindelin wrote:
> > > But I'm not sure the rewritten version does what you claim, at least in
> > > my own personal opinion.
> > >
> > > It is not helpful to say the original approach "saw a lot of backlash".
> >
> > It is the nicest thing I can say about it.
>
> I don't think you have to or should refer to the earlier round of review
> at all.

You misunderstand.

I am referring to the fact that Git for Windows has run with a very
different solution for this problem, for years, yet it was rejected upon
upstreaming, and had to be replaced by a completely different workaround.

It's not just a simple "earlier round of review" at all that is the issue
I am describing.

It is a very real concern of future readers who know what patches are
currently in Git for Windows and who all of a sudden do not find the `git
test-tool cmp` code anymore in Git for Windows and then see that `git diff
--no-index` is used and naturally want to know what the heck happened.

This is context relevant to understand why the particular approach
implemented in the patch was chosen and another one was discarded (when
that other approach has served Git for Windows very well for several
years), for which the commit message is precisely the appropriate place. I
am quite lost trying to understand why I am asked to remove said context,
leaving future readers puzzled e.g. in the case that it should turn out to
have been a terrible idea to use the quite complex diff machinery for as
simple a task as `test_cmp`. It sounds to me like I am asked to make my
contribution worse ("worsimprove" is the term I recently learned to
describe this) instead of helping me to improve it.

The advice you provided directly contradicts what is written in
https://git-scm.com/docs/SubmittingPatches#describe-changes, after all
(ignore the funny grammar for now unless you want to add a tangent to this
already long thread):

	The body should provide a meaningful commit message, which:

	    [...]

	    3. alternate solutions considered but discarded, if any.

Thanks,
Johannes

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-20 23:36               ` Johannes Schindelin
@ 2022-11-21  0:07                 ` Taylor Blau
  0 siblings, 0 replies; 58+ messages in thread
From: Taylor Blau @ 2022-11-21  0:07 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Taylor Blau, Johannes Schindelin via GitGitGadget, git,
	René Scharfe, Ævar Arnfjörð Bjarmason

On Mon, Nov 21, 2022 at 12:36:25AM +0100, Johannes Schindelin wrote:
> I am referring to the fact that Git for Windows has run with a very
> different solution for this problem, for years, yet it was rejected upon
> upstreaming, and had to be replaced by a completely different workaround.
>
> It's not just a simple "earlier round of review" at all that is the issue
> I am describing.

Right. Having read the earlier thread myself, I am familiar with the
context. So I'm not trying to dismiss it as just another round of
review, but instead try to steer the commit message in a more
constructive direction.

> It is a very real concern of future readers who know what patches are
> currently in Git for Windows and who all of a sudden do not find the `git
> test-tool cmp` code anymore in Git for Windows and then see that `git diff
> --no-index` is used and naturally want to know what the heck happened.
>
> This is context relevant to understand why the particular approach
> implemented in the patch was chosen and another one was discarded (when
> that other approach has served Git for Windows very well for several
> years), for which the commit message is precisely the appropriate place. I
> am quite lost trying to understand why I am asked to remove said context,
> leaving future readers puzzled e.g. in the case that it should turn out to
> have been a terrible idea to use the quite complex diff machinery for as
> simple a task as `test_cmp`. It sounds to me like I am asked to make my
> contribution worse ("worsimprove" is the term I recently learned to
> describe this) instead of helping me to improve it.

No. I am not suggesting you remove context at all. But what I am saying
is that describing the last attempt at upstreaming by saying it "saw a
lot of backlash and distractions during review and was therefore
abandoned" is not helpful.

If it saw backlash and distraction: why? What about the approach caused
backlash? Describing that thing as an alternative approach and
explaining concretely why it was disliked is the sort of context that I
think we should aim for in our commit messages.

But characterizing the review outright as full of backlash and
distractions is not helpful to future readers, and it is not a kind way
to treat others on the list who may have participated in that review.

> The advice you provided directly contradicts what is written in
> https://git-scm.com/docs/SubmittingPatches#describe-changes, after all
> (ignore the funny grammar for now unless you want to add a tangent to this
> already long thread):
>
> 	The body should provide a meaningful commit message, which:
>
> 	    [...]
>
> 	    3. alternate solutions considered but discarded, if any.

I am saying that, as written, the commit message does not explain what
the alternative approaches were in great detail.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19  8:18           ` Johannes Sixt
  2022-11-19 17:50             ` René Scharfe
@ 2022-11-21  3:13             ` Junio C Hamano
  1 sibling, 0 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-11-21  3:13 UTC (permalink / raw)
  To: Johannes Sixt
  Cc: Taylor Blau, Johannes Schindelin via GitGitGadget, git,
	René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

Johannes Sixt <j6t@kdbg.org> writes:

>> Use of "ignore-cr-at-eol" may allow such tests that are not ported
>> correctly to prepare expected output with a "wrong" line ending and
>> still pass, and I do think it may be an expedite way to make tests
>> appear to pass.
>
> The reason that mingw_test_cmp exists is not that Git isn't ported
> correctly, or that tests aren't ported correctly. The reason is that
> tests assume Unix LF line endings everywhere, but there are some tools
> that are outside our control that randomly -- to the layman's eye --
> produce CRLF line endings even when their input has LF style.
>
> For example, when we post-process Git output with `sed`, the result
> suddenly has CRLF line endings instead of LF that the input had.

That's exactly the kind of thing I meant by "porting the tests".

If we _know_ that the tests want to munge something (either the
hardcoded expectation or command output) with LF line ending, and
produce output with LF line ending, then platform tools like 'sed'
used for that task should be told not to turn LF into CRLF, with
something like:

    case $uname_s in
    *MINGW*)
        sed () {
            command sed --binary "$@"
        }
	... other workarounds come here ...
	;;
    easc

before we can call that we ported our tests to the platform.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19 11:54                 ` Ævar Arnfjörð Bjarmason
@ 2022-11-21  3:17                   ` Junio C Hamano
  0 siblings, 0 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-11-21  3:17 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Taylor Blau, Johannes Schindelin,
	Johannes Schindelin via GitGitGadget, git, René Scharfe,
	Eric Sunshine

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> Junio's pointing out that a test that would do:
>
> 	! diff a b
>
> Is pretty stupid, why would you want to show a diff of differences, if
> you're expecting it to be different? Just use:
>
> 	! cmp a b
>
> Or something. That's correct. So I think probably those "! test_cmp"
> uses would be good candidates for some clean-up to make the test suite
> pretty.

Yes.

Besides, "the outcome must be different from this other file" is a
very sloppy thing to say in a test.  It rarely is OK for our output
to be merely different from one "wrong" output.  Tests should
specify in what way the output must be different from the "wrong"
example, at least.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-11-19 17:50             ` René Scharfe
  2022-11-20  9:29               ` Torsten Bögershausen
@ 2022-11-21 17:49               ` Johannes Sixt
  1 sibling, 0 replies; 58+ messages in thread
From: Johannes Sixt @ 2022-11-21 17:49 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Schindelin via GitGitGadget, git,
	Ævar Arnfjörð Bjarmason, Johannes Schindelin,
	Taylor Blau, Junio C Hamano

Am 19.11.22 um 18:50 schrieb René Scharfe:
> And with the following patch on top of eea7033409 (The twelfth batch,
> 2022-11-14) the test suite passes for me -- just one case of grep
> stealing CRs seems to need adjustment to make mingw_test_cmp
> unnecessary:

Wow, nice catch! Just let enough time pass, and a liability such as
mingw_test_cmp becomes obsolete by mere magic. ;)

> 
>  t/t3920-crlf-messages.sh | 2 +-
>  t/test-lib.sh            | 1 -
>  2 files changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/t/t3920-crlf-messages.sh b/t/t3920-crlf-messages.sh
> index 4c661d4d54..353b1a550e 100755
> --- a/t/t3920-crlf-messages.sh
> +++ b/t/t3920-crlf-messages.sh
> @@ -12,7 +12,7 @@ create_crlf_ref () {
>  	cat >.crlf-orig-$branch.txt &&
>  	cat .crlf-orig-$branch.txt | append_cr >.crlf-message-$branch.txt &&
>  	grep 'Subject' .crlf-orig-$branch.txt | tr '\n' ' ' | sed 's/[ ]*$//' | tr -d '\n' >.crlf-subject-$branch.txt &&
> -	grep 'Body' .crlf-message-$branch.txt >.crlf-body-$branch.txt || true &&
> +	grep 'Body' .crlf-orig-$branch.txt | append_cr >.crlf-body-$branch.txt || true &&

This || true looks suspicious. I'll submit a patch in a new thread.

>  	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}" &&
>  	test_tick &&
>  	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 6db377f68b..af5ec357e5 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1721,7 +1721,6 @@ case $uname_s in
>  	test_set_prereq SED_STRIPS_CR
>  	test_set_prereq GREP_STRIPS_CR
>  	test_set_prereq WINDOWS
> -	GIT_TEST_CMP=mingw_test_cmp
>  	;;
>  *CYGWIN*)
>  	test_set_prereq POSIXPERM


^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v5 0/2] tests(mingw): avoid super-slow mingw_test_cmp
  2022-11-14 14:06     ` [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2022-11-14 14:06       ` [PATCH v4 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
  2022-11-14 14:06       ` [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
@ 2022-12-06 15:07       ` Johannes Schindelin via GitGitGadget
  2022-12-06 15:07         ` [PATCH v5 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
  2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2 siblings, 2 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-12-06 15:07 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

Half a year ago, directly after sending a patch to fix a performance
regression due to a mis-use of test_cmp
[https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/],
I got curious to see whether Git for Windows had the same issue. And it did
not: it passed t5351 in 22 seconds, even while using test_cmp to compare
pack files
[https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90].
This motivated me to upstream Git for Windows' changes.

Changes since v4:

 * The commit message was rewritten almost completely.

Changes since v3:

 * Fixed the subject of the cover letter (which should have been adjusted in
   v3)
 * Elaborated the paragraph about the historical context of this patch

Changes since v2:

 * Dropped the test helper, using diff --no-index instead.

Changes since v1:

 * Fixed double "with" in the commit message.
 * Renamed the test helper to text-cmp.
 * Made the diff --no-index call more robust by using a double-dash
   separator.

Johannes Schindelin (2):
  t0021: use Windows-friendly `pwd`
  tests(mingw): avoid very slow `mingw_test_cmp`

 t/t0021-conversion.sh   |  4 +--
 t/test-lib-functions.sh | 66 -----------------------------------------
 t/test-lib.sh           |  2 +-
 3 files changed, 3 insertions(+), 69 deletions(-)


base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/1309

Range-diff vs v4:

 1:  b38b8fb5a85 = 1:  b38b8fb5a85 t0021: use Windows-friendly `pwd`
 2:  128b1f348d8 ! 2:  6a80fab7e39 tests(mingw): avoid very slow `mingw_test_cmp`
     @@ Metadata
       ## Commit message ##
          tests(mingw): avoid very slow `mingw_test_cmp`
      
     -    It is more performant to run `git diff --no-index` than running the
     -    `mingw_test_cmp` code with MSYS2's Bash, i.e. the Bash that Git for
     -    Windows uses. And a lot more readable.
     +    When Git's test suite uses `test_cmp`, it is not actually trying to
     +    compare binary files as the name `cmp` would suggest to users familiar
     +    with Unix' tools, but the tests instead verify that actual output
     +    matches the expected text.
      
     -    The original reason why Git's test suite needs the `mingw_test_cmp`
     -    function at all (and why `cmp` is not good enough) is that Git's test
     -    suite is not actually trying to compare binary files when it calls
     -    `test_cmp`, but it compares text files. And those text files can contain
     -    CR/LF line endings depending on the circumstances.
     +    On Unix, `cmp` works well enough for Git's purposes because only Line
     +    Feed characters are used as line endings. However, on Windows, while
     +    most tools accept Line Feeds as line endings, many tools produce
     +    Carriage Return + Line Feed line endings, including some of the tools
     +    used by the test suite (which are therefore provided via Git for Windows
     +    SDK). Therefore, `cmp` would frequently fail merely due to different
     +    line endings.
      
     -    Note: The original fix in the Git for Windows project implemented a test
     -    helper that avoids the overhead of the diff machinery, in favor of
     -    implementing a behavior that is more in line with what `mingw_test_cmp`
     -    does now. This was done to minimize the risk in using something as
     -    complex as the diff machinery to perform something as simple as
     -    determining whether text output is identical to the expected output or
     -    not. This approach has served Git for Windows well for years, but the
     -    attempt to upstream this saw a lot of backlash and distractions during
     -    the review, was disliked by the Git maintainer and was therefore
     -    abandoned. For full details, see the thread at
     -    https://lore.kernel.org/git/pull.1309.git.1659106382128.gitgitgadget@gmail.com/t
     +    To accommodate for that, the `mingw_test_cmp` function was introduced
     +    into Git's test suite to perform a line-by-line comparison that ignores
     +    line endings. This function is a Bash function that is only used on
     +    Windows, everywhere else `cmp` is used.
     +
     +    This is a double whammy because `cmp` is fast, and `mingw_test_cmp` is
     +    slow, even more so on Windows because it is a Bash script function, and
     +    Bash scripts are known to run particularly slowly on Windows due to
     +    Bash's need for the POSIX emulation layer provided by the MSYS2 runtime.
     +
     +    The commit message of 32ed3314c104 (t5351: avoid using `test_cmp` for
     +    binary data, 2022-07-29) provides an illuminating account of the
     +    consequences: On Windows, the platform on which Git could really use all
     +    the help it can get to improve its performance, the time spent on one
     +    entire test script was reduced from half an hour to less than half a
     +    minute merely by avoiding a single call to `mingw_test_cmp` in but a
     +    single test case.
     +
     +    Learning the lesson to avoid shell scripting wherever possible, the Git
     +    for Windows project implemented a minimal replacement for
     +    `mingw_test_cmp` in the form of a `test-tool` subcommand that parses the
     +    input files line by line, ignoring line endings, and compares them.
     +    Essentially the same thing as `mingw_test_cmp`, but implemented in
     +    C instead of Bash. This solution served the Git for Windows project
     +    well, over years.
     +
     +    However, when this solution was finally upstreamed, the conclusion was
     +    reached that a change to use `git diff --no-index` instead of
     +    `mingw_test_cmp` was more easily reviewed and hence should be used
     +    instead.
     +
     +    The reason why this approach was not even considered in Git for Windows
     +    is that in 2007, there was already a motion on the table to use Git's
     +    own diff machinery to perform comparisons in Git's test suite, but it
     +    was dismissed in https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/
     +    as undesirable because tests might potentially succeed due to bugs in
     +    the diff machinery when they should not succeed, and those bugs could
     +    therefore hide regressions that the tests try to prevent.
     +
     +    By the time Git for Windows' `mingw-test-cmp` in C was finally
     +    contributed to the Git mailing list, reviewers agreed that the diff
     +    machinery had matured enough and should be used instead.
     +
     +    When the concern was raised that the diff machinery, due to its
     +    complexity, would perform substantially worse than the test helper
     +    originally implemented in the Git for Windows project, a test
     +    demonstrated that these performance differences are well lost within the
     +    100+ minutes it takes to run Git's test suite on Windows.
      
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v5 1/2] t0021: use Windows-friendly `pwd`
  2022-12-06 15:07       ` [PATCH v5 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
@ 2022-12-06 15:07         ` Johannes Schindelin via GitGitGadget
  2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  1 sibling, 0 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-12-06 15:07 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

In Git for Windows, when passing paths from shell scripts to regular
Win32 executables, thanks to the MSYS2 runtime a somewhat magic path
conversion happens that lets the shell script think that there is a file
at `/git/Makefile` and the Win32 process it spawned thinks that the
shell script said `C:/git-sdk-64/git/Makefile` instead.

This conversion is documented in detail over here:
https://www.msys2.org/docs/filesystem-paths/#automatic-unix-windows-path-conversion

As all automatic conversions, there are gaps. For example, to avoid
mistaking command-line options like `/LOG=log.txt` (which are quite
common in the Windows world) from being mistaken for a Unix-style
absolute path, the MSYS2 runtime specifically exempts arguments
containing a `=` character from that conversion.

We are about to change `test_cmp` to use `git diff --no-index`, which
involves spawning precisely such a Win32 process.

In combination, this would cause a failure in `t0021-conversion.sh`
where we pass an absolute path containing an equal character to the
`test_cmp` function.

Seeing as the Unix tools like `cp` and `diff` that are used by Git's
test suite in the Git for Windows SDK (thanks to the MSYS2 project)
understand both Unix-style as well as Windows-style paths, we can stave
off this problem by simply switching to Windows-style paths and
side-stepping the need for any automatic path conversion.

Note: The `PATH` variable is obviously special, as it is colon-separated
in the MSYS2 Bash used by Git for Windows, and therefore _cannot_
contain absolute Windows-style paths, lest the colon after the drive
letter is mistaken for a path separator. Therefore, we need to be
careful to keep the Unix-style when modifying the `PATH` variable.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/t0021-conversion.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index bad37abad2c..15482fa78e3 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -8,8 +8,8 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/lib-terminal.sh

-TEST_ROOT="$PWD"
-PATH=$TEST_ROOT:$PATH
+PATH=$PWD:$PATH
+TEST_ROOT="$(pwd)"

 write_script <<\EOF "$TEST_ROOT/rot13.sh"
 tr \
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-12-06 15:07       ` [PATCH v5 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
  2022-12-06 15:07         ` [PATCH v5 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
@ 2022-12-06 15:07         ` Johannes Schindelin via GitGitGadget
  2022-12-06 18:55           ` Ævar Arnfjörð Bjarmason
                             ` (3 more replies)
  1 sibling, 4 replies; 58+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2022-12-06 15:07 UTC (permalink / raw)
  To: git
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When Git's test suite uses `test_cmp`, it is not actually trying to
compare binary files as the name `cmp` would suggest to users familiar
with Unix' tools, but the tests instead verify that actual output
matches the expected text.

On Unix, `cmp` works well enough for Git's purposes because only Line
Feed characters are used as line endings. However, on Windows, while
most tools accept Line Feeds as line endings, many tools produce
Carriage Return + Line Feed line endings, including some of the tools
used by the test suite (which are therefore provided via Git for Windows
SDK). Therefore, `cmp` would frequently fail merely due to different
line endings.

To accommodate for that, the `mingw_test_cmp` function was introduced
into Git's test suite to perform a line-by-line comparison that ignores
line endings. This function is a Bash function that is only used on
Windows, everywhere else `cmp` is used.

This is a double whammy because `cmp` is fast, and `mingw_test_cmp` is
slow, even more so on Windows because it is a Bash script function, and
Bash scripts are known to run particularly slowly on Windows due to
Bash's need for the POSIX emulation layer provided by the MSYS2 runtime.

The commit message of 32ed3314c104 (t5351: avoid using `test_cmp` for
binary data, 2022-07-29) provides an illuminating account of the
consequences: On Windows, the platform on which Git could really use all
the help it can get to improve its performance, the time spent on one
entire test script was reduced from half an hour to less than half a
minute merely by avoiding a single call to `mingw_test_cmp` in but a
single test case.

Learning the lesson to avoid shell scripting wherever possible, the Git
for Windows project implemented a minimal replacement for
`mingw_test_cmp` in the form of a `test-tool` subcommand that parses the
input files line by line, ignoring line endings, and compares them.
Essentially the same thing as `mingw_test_cmp`, but implemented in
C instead of Bash. This solution served the Git for Windows project
well, over years.

However, when this solution was finally upstreamed, the conclusion was
reached that a change to use `git diff --no-index` instead of
`mingw_test_cmp` was more easily reviewed and hence should be used
instead.

The reason why this approach was not even considered in Git for Windows
is that in 2007, there was already a motion on the table to use Git's
own diff machinery to perform comparisons in Git's test suite, but it
was dismissed in https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/
as undesirable because tests might potentially succeed due to bugs in
the diff machinery when they should not succeed, and those bugs could
therefore hide regressions that the tests try to prevent.

By the time Git for Windows' `mingw-test-cmp` in C was finally
contributed to the Git mailing list, reviewers agreed that the diff
machinery had matured enough and should be used instead.

When the concern was raised that the diff machinery, due to its
complexity, would perform substantially worse than the test helper
originally implemented in the Git for Windows project, a test
demonstrated that these performance differences are well lost within the
100+ minutes it takes to run Git's test suite on Windows.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 t/test-lib-functions.sh | 66 -----------------------------------------
 t/test-lib.sh           |  2 +-
 2 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 8c44856eaec..452fe9bc8aa 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1541,72 +1541,6 @@ test_skip_or_die () {
 	error "$2"
 }

-# The following mingw_* functions obey POSIX shell syntax, but are actually
-# bash scripts, and are meant to be used only with bash on Windows.
-
-# A test_cmp function that treats LF and CRLF equal and avoids to fork
-# diff when possible.
-mingw_test_cmp () {
-	# Read text into shell variables and compare them. If the results
-	# are different, use regular diff to report the difference.
-	local test_cmp_a= test_cmp_b=
-
-	# When text came from stdin (one argument is '-') we must feed it
-	# to diff.
-	local stdin_for_diff=
-
-	# Since it is difficult to detect the difference between an
-	# empty input file and a failure to read the files, we go straight
-	# to diff if one of the inputs is empty.
-	if test -s "$1" && test -s "$2"
-	then
-		# regular case: both files non-empty
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-	elif test -s "$1" && test "$2" = -
-	then
-		# read 2nd file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b
-		stdin_for_diff='<<<"$test_cmp_b"'
-	elif test "$1" = - && test -s "$2"
-	then
-		# read 1st file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-		stdin_for_diff='<<<"$test_cmp_a"'
-	fi
-	test -n "$test_cmp_a" &&
-	test -n "$test_cmp_b" &&
-	test "$test_cmp_a" = "$test_cmp_b" ||
-	eval "diff -u \"\$@\" $stdin_for_diff"
-}
-
-# $1 is the name of the shell variable to fill in
-mingw_read_file_strip_cr_ () {
-	# Read line-wise using LF as the line separator
-	# and use IFS to strip CR.
-	local line
-	while :
-	do
-		if IFS=$'\r' read -r -d $'\n' line
-		then
-			# good
-			line=$line$'\n'
-		else
-			# we get here at EOF, but also if the last line
-			# was not terminated by LF; in the latter case,
-			# some text was read
-			if test -z "$line"
-			then
-				# EOF, really
-				break
-			fi
-		fi
-		eval "$1=\$$1\$line"
-	done
-}
-
 # Like "env FOO=BAR some-program", but run inside a subshell, which means
 # it also works for shell functions (though those functions cannot impact
 # the environment outside of the test_env invocation).
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 7726d1da88a..f8c6205e08f 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1546,7 +1546,7 @@ case $uname_s in
 	test_set_prereq SED_STRIPS_CR
 	test_set_prereq GREP_STRIPS_CR
 	test_set_prereq WINDOWS
-	GIT_TEST_CMP=mingw_test_cmp
+	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"
 	;;
 *CYGWIN*)
 	test_set_prereq POSIXPERM
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
@ 2022-12-06 18:55           ` Ævar Arnfjörð Bjarmason
  2022-12-06 21:52           ` Johannes Sixt
                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 58+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-12-06 18:55 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget
  Cc: git, René Scharfe, Johannes Schindelin


On Tue, Dec 06 2022, Johannes Schindelin via GitGitGadget wrote:

> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> [...]
> The reason why this approach was not even considered in Git for Windows
> is that in 2007, there was already a motion on the table to use Git's
> own diff machinery to perform comparisons in Git's test suite, but it
> was dismissed in https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/

I think you mixed up your links there, that's the "no, that should be
fine" from 2022, but it contains the link to the 2007 message you seem
to be talking about.

> as undesirable because tests might potentially succeed due to bugs in
> the diff machinery when they should not succeed, and those bugs could
> therefore hide regressions that the tests try to prevent.
>
> By the time Git for Windows' `mingw-test-cmp` in C was finally
> contributed to the Git mailing list, reviewers agreed that the diff
> machinery had matured enough and should be used instead.

I think it's fine to dogfood it like this, but I've been pointing out to
you[1] that this will hide segfaults etc. in "git" as we negate the exit
code of "test_cmp" in some places.

E.g. let's produce this stack overflow:

	diff --git a/diff-no-index.c b/diff-no-index.c
	index 9a8b09346bd..1c4a9e5c351 100644
	--- a/diff-no-index.c
	+++ b/diff-no-index.c
	@@ -279,6 +279,7 @@ int diff_no_index(struct rev_info *revs,
	 
	 	fixup_paths(paths, &replacement);
	 
	+	revs++;
	 	revs->diffopt.skip_stat_unmatch = 1;
	 	if (!revs->diffopt.output_format)
	 		revs->diffopt.output_format = DIFF_FORMAT_PATCH;

Then compiling with SANITIZE=address and running:

	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --" ./t9351-fast-export-anonymize.sh --run=1,17 -vixd

Will produce (less relevant bits elided):
	
	expecting success of 9351.17 'idents are shared': 
		[...]
		test_line_count = 1 unique &&
		! test_cmp authors committers
	
	[...]
	+ test 1 = 1
	+ test_cmp authors committers
	+ test 2 -ne 2
	+ eval GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol -- "$@"
	+ GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol -- authors committers
	=================================================================
	==2865782==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffc6d64d0f4 at pc 0x00000077fa4d bp 0x7ffc6d64bcc0 sp 0x7ffc6d64bcb8
	WRITE of size 4 at 0x7ffc6d64d0f4 thread T0
	    #0 0x77fa4c in diff_no_index diff-no-index.c:292
	[...]
	ok 17 - idents are shared
	
	# passed all 17 test(s)
	1..17

I.e. we'll proclaim "all tests passed", even though we segfaulted. I
think this direction will fix it for you:

	diff --git a/t/t9351-fast-export-anonymize.sh b/t/t9351-fast-export-anonymize.sh
	index 77047e250dc..29a2d3a03a7 100755
	--- a/t/t9351-fast-export-anonymize.sh
	+++ b/t/t9351-fast-export-anonymize.sh
	@@ -133,7 +133,7 @@ test_expect_success 'idents are shared' '
	 	git log --all --format="%cn <%ce>" >committers &&
	 	sort -u committers >unique &&
	 	test_line_count = 1 unique &&
	-	! test_cmp authors committers
	+	test_cmp ! authors committers
	 '
	 
	 test_done
	diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
	index 452fe9bc8aa..d640b4b7881 100644
	--- a/t/test-lib-functions.sh
	+++ b/t/test-lib-functions.sh
	@@ -1239,8 +1239,25 @@ test_expect_code () {
	 # - not all diff versions understand "-u"
	 
	 test_cmp () {
	+	local negate= &&
	+	local exit_code= &&
	+	if test "$1" = "!"
	+	then
	+		negate=t &&
	+		shift
	+	fi &&
	 	test "$#" -ne 2 && BUG "2 param"
	-	eval "$GIT_TEST_CMP" '"$@"'
	+	if test -n "$GIT_TEST_CMP_BUILTIN"
	+	then
	+		if test -n "$negate"
	+		then
	+			test_must_fail env GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol -- "$@"
	+		else
	+			GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol -- "$@"
	+		fi
	+	else
	+		eval "$GIT_TEST_CMP" '"$@"'
	+	fi
	 }
	 
	 # Check that the given config key has the expected value.
	diff --git a/t/test-lib.sh b/t/test-lib.sh
	index f8c6205e08f..0a5f8e7c7fc 100644
	--- a/t/test-lib.sh
	+++ b/t/test-lib.sh
	@@ -1546,7 +1546,7 @@ case $uname_s in
	 	test_set_prereq SED_STRIPS_CR
	 	test_set_prereq GREP_STRIPS_CR
	 	test_set_prereq WINDOWS
	-	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"
	+	GIT_TEST_CMP_BUILTIN=t
	 	;;
	 *CYGWIN*)
	 	test_set_prereq POSIXPERM

I.e. it's fine to start invoking "git" in one of the
test-lib-functions.sh, but it's not OK to do so without also ensuring
that we're properly checking its exit code.

The above diff fixes it only for t9351-fast-export-anonymize.sh, we'd
also need to change "! test_cmp " to "test_cmp ! " elsewhere, but that's
a rather easy mechanical replacement.

1. https://lore.kernel.org/git/221119.86wn7rdugi.gmgdl@evledraar.gmail.com/

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2022-12-06 18:55           ` Ævar Arnfjörð Bjarmason
@ 2022-12-06 21:52           ` Johannes Sixt
  2022-12-06 21:54           ` René Scharfe
  2022-12-07  1:31           ` Taylor Blau
  3 siblings, 0 replies; 58+ messages in thread
From: Johannes Sixt @ 2022-12-06 21:52 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget
  Cc: René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin, git

Am 06.12.22 um 16:07 schrieb Johannes Schindelin via GitGitGadget:
> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 7726d1da88a..f8c6205e08f 100644
> --- a/t/test-lib.sh
> +++ b/t/test-lib.sh
> @@ -1546,7 +1546,7 @@ case $uname_s in
>  	test_set_prereq SED_STRIPS_CR
>  	test_set_prereq GREP_STRIPS_CR
>  	test_set_prereq WINDOWS
> -	GIT_TEST_CMP=mingw_test_cmp
> +	GIT_TEST_CMP="GIT_DIR=/dev/null git diff --no-index --ignore-cr-at-eol --"

After René's work, it should be possible to drop this setting on Windows
altogether.

I started doing that, and when I proof-read the patch, I noticed the
SED_STRIPS_CR and GREP_STRIPS_CR prerequisites that are visible in the
context lines of this hunk. I wanted to explore whether they can be
removed as well, but I ran out of time.

-- Hannes


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
  2022-12-06 18:55           ` Ævar Arnfjörð Bjarmason
  2022-12-06 21:52           ` Johannes Sixt
@ 2022-12-06 21:54           ` René Scharfe
  2022-12-07  4:33             ` Junio C Hamano
  2022-12-07  1:31           ` Taylor Blau
  3 siblings, 1 reply; 58+ messages in thread
From: René Scharfe @ 2022-12-06 21:54 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget, git
  Cc: Ævar Arnfjörð Bjarmason, Johannes Schindelin

Am 06.12.2022 um 16:07 schrieb Johannes Schindelin via GitGitGadget:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> When Git's test suite uses `test_cmp`, it is not actually trying to
> compare binary files as the name `cmp` would suggest to users familiar
> with Unix' tools, but the tests instead verify that actual output
> matches the expected text.
>
> On Unix, `cmp` works well enough for Git's purposes because only Line
> Feed characters are used as line endings. However, on Windows, while
> most tools accept Line Feeds as line endings, many tools produce
> Carriage Return + Line Feed line endings, including some of the tools
> used by the test suite (which are therefore provided via Git for Windows
> SDK). Therefore, `cmp` would frequently fail merely due to different
> line endings.
>
> To accommodate for that, the `mingw_test_cmp` function was introduced
> into Git's test suite to perform a line-by-line comparison that ignores
> line endings. This function is a Bash function that is only used on
> Windows, everywhere else `cmp` is used.
>
> This is a double whammy because `cmp` is fast, and `mingw_test_cmp` is
> slow, even more so on Windows because it is a Bash script function, and
> Bash scripts are known to run particularly slowly on Windows due to
> Bash's need for the POSIX emulation layer provided by the MSYS2 runtime.
>
> The commit message of 32ed3314c104 (t5351: avoid using `test_cmp` for
> binary data, 2022-07-29) provides an illuminating account of the
> consequences: On Windows, the platform on which Git could really use all
> the help it can get to improve its performance, the time spent on one
> entire test script was reduced from half an hour to less than half a
> minute merely by avoiding a single call to `mingw_test_cmp` in but a
> single test case.
>
> Learning the lesson to avoid shell scripting wherever possible, the Git
> for Windows project implemented a minimal replacement for
> `mingw_test_cmp` in the form of a `test-tool` subcommand that parses the
> input files line by line, ignoring line endings, and compares them.
> Essentially the same thing as `mingw_test_cmp`, but implemented in
> C instead of Bash. This solution served the Git for Windows project
> well, over years.
>
> However, when this solution was finally upstreamed, the conclusion was
> reached that a change to use `git diff --no-index` instead of
> `mingw_test_cmp` was more easily reviewed and hence should be used
> instead.
>
> The reason why this approach was not even considered in Git for Windows
> is that in 2007, there was already a motion on the table to use Git's
> own diff machinery to perform comparisons in Git's test suite, but it
> was dismissed in https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/
> as undesirable because tests might potentially succeed due to bugs in
> the diff machinery when they should not succeed, and those bugs could
> therefore hide regressions that the tests try to prevent.
>
> By the time Git for Windows' `mingw-test-cmp` in C was finally
> contributed to the Git mailing list, reviewers agreed that the diff
> machinery had matured enough and should be used instead.
>
> When the concern was raised that the diff machinery, due to its
> complexity, would perform substantially worse than the test helper
> originally implemented in the Git for Windows project, a test
> demonstrated that these performance differences are well lost within the
> 100+ minutes it takes to run Git's test suite on Windows.
Only t3920 needs mingw_test_cmp on my system.  [2] on top of [1] removes
that dependency.  Does that work for you as well?


diff -u and git diff are slower than mingw_test_cmp when running
"make test" on my system ("uname -rs" says "MINGW64_NT-10.0-22621
3.3.6-341.x86_64").  Timings (sorry, only a single run each):

2.39.0-rc2:
All tests successful.
Files=983, Tests=26154, 2082 wallclock secs ( 7.86 usr 15.97 sys + 776.73 cusr 2352.96 csys = 3153.52 CPU)
Result: PASS

2.39.0-rc2 + [1] + [2] + removal of "GIT_TEST_CMP=mingw_test_cmp"
from t/test-lib.sh:
All tests successful.
Files=983, Tests=26154, 2174 wallclock secs ( 7.75 usr 17.27 sys + 813.53 cusr 2699.12 csys = 3537.66 CPU)
Result: PASS

2.39.0-rc2 + your v5:
All tests successful.
Files=983, Tests=26154, 2160 wallclock secs ( 4.16 usr 10.03 sys + 647.75 cusr 2038.99 csys = 2700.92 CPU)
Result: PASS

2.39.0-rc2 + [1] + [2] + the patch below:
All tests successful.
Files=983, Tests=26154, 2026 wallclock secs ( 4.91 usr  9.91 sys + 608.91 cusr 1881.98 csys = 2505.70 CPU)
Result: PASS


This is noisy, running "make test" multiple times takes too long.
Running a single test using hyperfine is feasible.  Here are my results
for the last test that needed a CR-agnostic test_cmp:

2.39.0-rc2:
Benchmark 1: sh.exe t3920-crlf-messages.sh
  Time (mean ± σ):      5.666 s ±  0.085 s    [User: 0.000 s, System: 0.007 s]
  Range (min … max):    5.583 s …  5.858 s    10 runs

2.39.0-rc2 + [1] + [2] + removal of "GIT_TEST_CMP=mingw_test_cmp"
from t/test-lib.sh:
Benchmark 1: sh.exe t3920-crlf-messages.sh
  Time (mean ± σ):      6.540 s ±  0.065 s    [User: 0.000 s, System: 0.004 s]
  Range (min … max):    6.454 s …  6.681 s    10 runs

2.39.0-rc2 + your v5:
Benchmark 1: sh.exe t3920-crlf-messages.sh
  Time (mean ± σ):      6.632 s ±  0.090 s    [User: 0.000 s, System: 0.004 s]
  Range (min … max):    6.550 s …  6.791 s    10 runs

2.39.0-rc2 + [1] + [2] + the patch below:
Benchmark 1: sh.exe t3920-crlf-messages.sh
  Time (mean ± σ):      5.743 s ±  0.065 s    [User: 0.002 s, System: 0.001 s]
  Range (min … max):    5.684 s …  5.870 s    10 runs

Still noisy, but avoiding to fork out to a comparison program seems
worth it, i.e. the shell function wins for the typically short inputs in
test scripts.  Do you get different numbers?


I'm a bit disappointed by the performance of the patch below, but we'd
need something like this to compare precisely (no longer ignoring CRs),
I suppose.


[1] https://lore.kernel.org/git/febcfb0a-c410-fb71-cff9-92acfcb269e2@kdbg.org/
[2] https://lore.kernel.org/git/cbe88abc-c1fb-cb50-6057-47ff27f7a12d@web.de/


diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 796093a7b3..bf10746a08 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1450,70 +1450,21 @@ test_skip_or_die () {
 	error "$2"
 }

-# The following mingw_* functions obey POSIX shell syntax, but are actually
-# bash scripts, and are meant to be used only with bash on Windows.
-
-# A test_cmp function that treats LF and CRLF equal and avoids to fork
-# diff when possible.
+# A test_cmp function that avoids to fork diff when possible. It's only
+# meant to be used with bash on Windows.
 mingw_test_cmp () {
 	# Read text into shell variables and compare them. If the results
 	# are different, use regular diff to report the difference.
-	local test_cmp_a= test_cmp_b=
-
-	# When text came from stdin (one argument is '-') we must feed it
-	# to diff.
-	local stdin_for_diff=
+	local test_cmp_a=a test_cmp_b=b

-	# Since it is difficult to detect the difference between an
-	# empty input file and a failure to read the files, we go straight
-	# to diff if one of the inputs is empty.
-	if test -s "$1" && test -s "$2"
-	then
-		# regular case: both files non-empty
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-	elif test -s "$1" && test "$2" = -
-	then
-		# read 2nd file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a <"$1"
-		mingw_read_file_strip_cr_ test_cmp_b
-		stdin_for_diff='<<<"$test_cmp_b"'
-	elif test "$1" = - && test -s "$2"
+	# Leave the uncommon case of reading from stdin to diff.
+	if test "$1" != "-" && test "$2" != "-"
 	then
-		# read 1st file from stdin
-		mingw_read_file_strip_cr_ test_cmp_a
-		mingw_read_file_strip_cr_ test_cmp_b <"$2"
-		stdin_for_diff='<<<"$test_cmp_a"'
+		IFS= read -r -d '' test_cmp_a <"$1"
+		IFS= read -r -d '' test_cmp_b <"$2"
 	fi
-	test -n "$test_cmp_a" &&
-	test -n "$test_cmp_b" &&
 	test "$test_cmp_a" = "$test_cmp_b" ||
-	eval "diff -u \"\$@\" $stdin_for_diff"
-}
-
-# $1 is the name of the shell variable to fill in
-mingw_read_file_strip_cr_ () {
-	# Read line-wise using LF as the line separator
-	# and use IFS to strip CR.
-	local line
-	while :
-	do
-		if IFS=$'\r' read -r -d $'\n' line
-		then
-			# good
-			line=$line$'\n'
-		else
-			# we get here at EOF, but also if the last line
-			# was not terminated by LF; in the latter case,
-			# some text was read
-			if test -z "$line"
-			then
-				# EOF, really
-				break
-			fi
-		fi
-		eval "$1=\$$1\$line"
-	done
+	diff -u "$@"
 }

 # Like "env FOO=BAR some-program", but run inside a subshell, which means

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
                             ` (2 preceding siblings ...)
  2022-12-06 21:54           ` René Scharfe
@ 2022-12-07  1:31           ` Taylor Blau
  3 siblings, 0 replies; 58+ messages in thread
From: Taylor Blau @ 2022-12-07  1:31 UTC (permalink / raw)
  To: Johannes Schindelin via GitGitGadget
  Cc: git, René Scharfe, Ævar Arnfjörð Bjarmason,
	Johannes Schindelin

Hi Johannes,

On Tue, Dec 06, 2022 at 03:07:47PM +0000, Johannes Schindelin via GitGitGadget wrote:
> Learning the lesson to avoid shell scripting wherever possible, the Git
> for Windows project implemented a minimal replacement for
> `mingw_test_cmp` in the form of a `test-tool` subcommand that parses the
> input files line by line, ignoring line endings, and compares them.
> Essentially the same thing as `mingw_test_cmp`, but implemented in
> C instead of Bash. This solution served the Git for Windows project
> well, over years.
>
> However, when this solution was finally upstreamed, the conclusion was
> reached that a change to use `git diff --no-index` instead of
> `mingw_test_cmp` was more easily reviewed and hence should be used
> instead.
>
> The reason why this approach was not even considered in Git for Windows
> is that in 2007, there was already a motion on the table to use Git's
> own diff machinery to perform comparisons in Git's test suite, but it
> was dismissed in https://lore.kernel.org/git/xmqqbkrpo9or.fsf@gitster.g/
> as undesirable because tests might potentially succeed due to bugs in
> the diff machinery when they should not succeed, and those bugs could
> therefore hide regressions that the tests try to prevent.
>
> By the time Git for Windows' `mingw-test-cmp` in C was finally
> contributed to the Git mailing list, reviewers agreed that the diff
> machinery had matured enough and should be used instead.
>
> When the concern was raised that the diff machinery, due to its
> complexity, would perform substantially worse than the test helper
> originally implemented in the Git for Windows project, a test
> demonstrated that these performance differences are well lost within the
> 100+ minutes it takes to run Git's test suite on Windows.

Thanks for this update. I think these five paragraphs are an accurate
and helpful record of the history behind this patch and approach. It is
very much appreciated.

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp`
  2022-12-06 21:54           ` René Scharfe
@ 2022-12-07  4:33             ` Junio C Hamano
  0 siblings, 0 replies; 58+ messages in thread
From: Junio C Hamano @ 2022-12-07  4:33 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Schindelin via GitGitGadget, git,
	Ævar Arnfjörð Bjarmason, Johannes Schindelin

René Scharfe <l.s.r@web.de> writes:

> Only t3920 needs mingw_test_cmp on my system.  [2] on top of [1] removes
> that dependency.  Does that work for you as well?
> ...
> [1] https://lore.kernel.org/git/febcfb0a-c410-fb71-cff9-92acfcb269e2@kdbg.org/

This is on js/t3920-shell-and-or-fix topic and in 'next'.

> [2] https://lore.kernel.org/git/cbe88abc-c1fb-cb50-6057-47ff27f7a12d@web.de/

I forgot to pick this up, which I just did.

> I'm a bit disappointed by the performance of the patch below, but we'd
> need something like this to compare precisely (no longer ignoring CRs),
> I suppose.
>


^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2022-12-07  4:34 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-29 14:53 [PATCH] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
2022-07-29 14:54 ` Johannes Schindelin
2022-07-29 16:44 ` Junio C Hamano
2022-09-06 13:10   ` Johannes Schindelin
2022-09-07 12:09     ` René Scharfe
2022-09-07 16:25       ` Junio C Hamano
2022-09-07 21:45         ` Re* " Junio C Hamano
2022-09-07 22:39           ` René Scharfe
2022-09-08  0:03             ` Junio C Hamano
2022-09-08  8:59         ` René Scharfe
2022-09-08 15:26           ` Ævar Arnfjörð Bjarmason
2022-09-08 20:54         ` Johannes Schindelin
2022-09-08 21:09           ` Junio C Hamano
2022-09-06 13:10 ` [PATCH v2 0/2] " Johannes Schindelin via GitGitGadget
2022-09-06 13:10   ` [PATCH v2 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
2022-09-06 13:10   ` [PATCH v2 2/2] tests: replace mingw_test_cmp with a helper in C Johannes Schindelin via GitGitGadget
2022-09-07 11:57     ` Ævar Arnfjörð Bjarmason
2022-09-07 12:24       ` Ævar Arnfjörð Bjarmason
2022-09-07 19:45         ` Junio C Hamano
2022-09-07  9:04   ` [PATCH v2 0/2] " Johannes Schindelin
2022-11-12 22:07   ` [PATCH v3 " Johannes Schindelin via GitGitGadget
2022-11-12 22:07     ` [PATCH v3 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
2022-11-12 22:07     ` [PATCH v3 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
2022-11-13  4:51       ` Taylor Blau
2022-11-14 13:34         ` Johannes Schindelin
2022-11-18 23:15         ` Junio C Hamano
2022-11-19  2:53           ` Taylor Blau
2022-11-19 12:03             ` Ævar Arnfjörð Bjarmason
2022-11-19  8:18           ` Johannes Sixt
2022-11-19 17:50             ` René Scharfe
2022-11-20  9:29               ` Torsten Bögershausen
2022-11-21 17:49               ` Johannes Sixt
2022-11-21  3:13             ` Junio C Hamano
2022-11-14  9:53       ` Phillip Wood
2022-11-14 13:47         ` Johannes Schindelin
2022-11-14 11:55       ` Ævar Arnfjörð Bjarmason
2022-11-14 14:02         ` Johannes Schindelin
2022-11-14 15:23           ` Ævar Arnfjörð Bjarmason
2022-11-18 23:19             ` Junio C Hamano
2022-11-19  2:56               ` Taylor Blau
2022-11-19 11:54                 ` Ævar Arnfjörð Bjarmason
2022-11-21  3:17                   ` Junio C Hamano
2022-11-14 14:06     ` [PATCH v4 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
2022-11-14 14:06       ` [PATCH v4 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
2022-11-14 14:06       ` [PATCH v4 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
2022-11-14 22:40         ` Taylor Blau
2022-11-18 13:32           ` Johannes Schindelin
2022-11-18 18:14             ` Taylor Blau
2022-11-20 23:36               ` Johannes Schindelin
2022-11-21  0:07                 ` Taylor Blau
2022-12-06 15:07       ` [PATCH v5 0/2] tests(mingw): avoid super-slow mingw_test_cmp Johannes Schindelin via GitGitGadget
2022-12-06 15:07         ` [PATCH v5 1/2] t0021: use Windows-friendly `pwd` Johannes Schindelin via GitGitGadget
2022-12-06 15:07         ` [PATCH v5 2/2] tests(mingw): avoid very slow `mingw_test_cmp` Johannes Schindelin via GitGitGadget
2022-12-06 18:55           ` Ævar Arnfjörð Bjarmason
2022-12-06 21:52           ` Johannes Sixt
2022-12-06 21:54           ` René Scharfe
2022-12-07  4:33             ` Junio C Hamano
2022-12-07  1:31           ` Taylor Blau

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).