git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
From: tboegi@web.de
To: git@vger.kernel.org, random_n0body@icloud.com,
	evraiphilippeblain@gmail.com
Cc: "Torsten Bögershausen" <tboegi@web.de>,
	"Philippe Blain" <levraiphilippeblain@gmail.com>
Subject: [PATCH/RFC v1 1/1] git restore -p . and precomposed unicode
Date: Sun, 24 Jan 2021 16:13:06 +0100
Message-ID: <20210124151306.23185-1-tboegi@web.de> (raw)
In-Reply-To: <A102844A-9501-4A86-854D-E3B387D378AA@icloud.com>

From: Torsten Bögershausen <tboegi@web.de>

The following sequence leads to a "BUG" assertion running under MacOS:

  !/bin/sh
  DIR=git-test-restore-p
  Adiarnfd=$(printf 'A\314\210')
  DIRNAME=xx${Adiarnfd}yy
  mkdir $DIR &&
  cd $DIR &&
  git init &&
  mkdir $DIRNAME &&
  cd $DIRNAME &&
  echo "Initial" >file &&
  git add file &&
  echo "One more line"  >>file &&
  echo y | git restore -p . &&
  echo "OK"

 Initialized empty Git repository in /tmp/git-test-restore-p/.git/
 BUG: pathspec.c:495: error initializing pathspec_item
 Cannot close git diff-index --cached --numstat
 [snip]


The command `git restore` is run from a directory inside a Git repo.
The Git needs to split the $CWD into 2 parts:
The path to the repo and "the rest", if any.
"The rest" becomes a "prefix" later used inside the pathspec code.

As an example, "/path/to/repo/dir-inside-repå" would determine
"/path/to/repo" as the root of the repo, the place where the
configuration file .git/config is found.

The rest becomes the prefix ("dir-inside-repå"), from where the pathspec
machinery expands the ".", more about this later.
If there is a decomposed form, (making the decomposing visible like this),
"dir-inside-rep°a" doesn't match "dir-inside-repå".

The solution is to read the config variable "core.precomposeunicode" early.
Then, if configured, precompose "prefix" (and argv) and handle the prefix
over into pathspec for expanding "." into a list of path names tracked by Git.

[1] git-bugreport-2021-01-06-1209.txt (git can't deal with special characters)
[2] https://lore.kernel.org/git/A102844A-9501-4A86-854D-E3B387D378AA@icloud.com/

Reported-by: Daniel Troger <random_n0body@icloud.com>
Helped-By: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
 This may need some refinements, but we need to start somewhere...
 Are there any good ideas how to improve the commit message ?
 Should the code in git.c be "hidden" in a function somewhere else ?
 Other comments are appreciated.


compat/precompose_utf8.c     | 24 ++++++++++++++++++++++++
 compat/precompose_utf8.h     |  2 ++
 git-compat-util.h            |  8 ++++++++
 git.c                        |  9 +++++++++
 t/t3910-mac-os-precompose.sh | 15 +++++++++++++++
 5 files changed, 58 insertions(+)

diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c
index 136250fbf6..06e371660f 100644
--- a/compat/precompose_utf8.c
+++ b/compat/precompose_utf8.c
@@ -36,6 +36,11 @@ static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
 	return ret;
 }

+int precompose_read_config_gently(void)
+{
+	git_config_get_bool("core.precomposeunicode", &precomposed_unicode);
+	return precomposed_unicode == 1;
+}

 void probe_utf8_pathname_composition(void)
 {
@@ -60,6 +65,25 @@ void probe_utf8_pathname_composition(void)
 	strbuf_release(&path);
 }

+char *precompose_string_if_needed(const char *in)
+{
+	size_t inlen = strlen(in);
+	size_t outlen;
+	char *out = NULL;
+	if ((has_non_ascii(in, inlen, NULL)) && (precomposed_unicode == 1)) {
+		int saved_errno = errno;
+		out = reencode_string_len(in, inlen,
+					  repo_encoding, path_encoding,
+					  &outlen);
+		if (out && outlen == inlen && !memcmp(in, out, outlen)) {
+			/* strings are identical: no need to return a new one */
+			free(out);
+			out = NULL;
+		}
+		errno = saved_errno;
+	}
+	return out;
+}

 void precompose_argv(int argc, const char **argv)
 {
diff --git a/compat/precompose_utf8.h b/compat/precompose_utf8.h
index 6f843d3e1a..ce82857d73 100644
--- a/compat/precompose_utf8.h
+++ b/compat/precompose_utf8.h
@@ -28,6 +28,8 @@ typedef struct {
 	struct dirent_prec_psx *dirent_nfc;
 } PREC_DIR;

+int precompose_read_config_gently(void);
+char *precompose_string_if_needed(const char *in);
 void precompose_argv(int argc, const char **argv);
 void probe_utf8_pathname_composition(void);

diff --git a/git-compat-util.h b/git-compat-util.h
index 104993b975..f34854b66f 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -252,6 +252,14 @@ typedef unsigned long uintptr_t;
 #ifdef PRECOMPOSE_UNICODE
 #include "compat/precompose_utf8.h"
 #else
+static inline int precompose_read_config_gently(void)
+{
+	return 0;
+}
+static inline char *precompose_string_if_needed(const char *in)
+{
+	return NULL; /* no need to precompose a string */
+}
 static inline void precompose_argv(int argc, const char **argv)
 {
 	; /* nothing */
diff --git a/git.c b/git.c
index a00a0a4d94..f09e14f733 100644
--- a/git.c
+++ b/git.c
@@ -421,6 +421,15 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 			prefix = setup_git_directory_gently(&nongit_ok);
 		}

+		if (precompose_read_config_gently()) {
+			precompose_argv(argc, argv);
+			if (prefix) {
+				const char *prec_pfx;
+					prec_pfx = precompose_string_if_needed(prefix);
+				if (prec_pfx)
+					prefix = prec_pfx; /* memory lost */
+			}
+		}
 		if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY) &&
 		    !(p->option & DELAY_PAGER_CONFIG))
 			use_pager = check_pager_config(p->cmd);
diff --git a/t/t3910-mac-os-precompose.sh b/t/t3910-mac-os-precompose.sh
index 54ce19e353..bbbc50da93 100755
--- a/t/t3910-mac-os-precompose.sh
+++ b/t/t3910-mac-os-precompose.sh
@@ -191,6 +191,21 @@ test_expect_failure 'handle existing decomposed filenames' '
 	test_must_be_empty untracked
 '

+test_expect_success "unicode decomposed: git restore -p . " '
+	DIRNAMEPWD=dir.Odiarnfc &&
+	DIRNAMEINREPO=dir.$Adiarnfc &&
+	export DIRNAMEPWD DIRNAMEINREPO &&
+	git init $DIRNAMEPWD &&
+	( cd $DIRNAMEPWD &&
+		mkdir $DIRNAMEINREPO &&
+		cd $DIRNAMEINREPO &&
+		echo "Initial" >file &&
+		git add file &&
+		echo "More stuff" >>file &&
+		echo y | git restore -p .
+	)
+'
+
 # Test if the global core.precomposeunicode stops autosensing
 # Must be the last test case
 test_expect_success "respect git config --global core.precomposeunicode" '
--
2.30.0.155.g66e871b664


  parent reply	other threads:[~2021-01-24 15:16 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-06 11:35 git-bugreport-2021-01-06-1209.txt (git can't deal with special characters) Daniel Troger
2021-01-06 14:21 ` Torsten Bögershausen
2021-01-06 16:49   ` Daniel Troger
2021-01-06 21:47     ` Torsten Bögershausen
2021-01-06 22:21       ` Daniel Troger
2021-01-06 23:07         ` Randall S. Becker
2021-01-07 14:34           ` Philippe Blain
2021-01-07 15:49             ` Torsten Bögershausen
2021-01-07 16:21               ` Philippe Blain
2021-01-08 19:07                 ` Torsten Bögershausen
2021-01-24 15:13 ` tboegi [this message]
2021-01-24 19:51   ` [PATCH/RFC v1 1/1] git restore -p . and precomposed unicode Junio C Hamano
2021-01-25 16:53     ` Torsten Bögershausen
2021-01-29 17:15 ` [PATCH v2 1/1] MacOS: precompose_argv_prefix() tboegi
2021-01-29 23:19   ` Junio C Hamano
2021-01-31  0:43   ` Junio C Hamano
2021-01-31  9:50     ` Torsten =?unknown-8bit?Q?B=C3=B6gershausen?=
2021-02-02 15:11 ` [PATCH v3 " tboegi
2021-02-02 17:43   ` Junio C Hamano
2021-02-03 16:28 ` [PATCH v4 " tboegi
2021-02-03 19:33   ` Junio C Hamano
2021-02-03 22:13     ` Junio C Hamano
2021-02-05 17:31       ` Torsten Bögershausen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210124151306.23185-1-tboegi@web.de \
    --to=tboegi@web.de \
    --cc=evraiphilippeblain@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=levraiphilippeblain@gmail.com \
    --cc=random_n0body@icloud.com \
    --subject='Re: [PATCH/RFC v1 1/1] git restore -p . and precomposed unicode' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git