From: tboegi@web.de
To: git@vger.kernel.org, ashishnegi33@gmail.com
Cc: "Torsten Bögershausen" <tboegi@web.de>
Subject: [PATCH 1/1] convert: tighten the safe autocrlf handling
Date: Fri, 24 Nov 2017 17:14:07 +0100 [thread overview]
Message-ID: <20171124161407.30698-1-tboegi@web.de> (raw)
In-Reply-To: <CAJ_+vJ6FXXda4fe7=1YxtDGR2d8CqP4KXN+YR6+mdQ+5jQQXug@mail.gmail.com>
From: Torsten Bögershausen <tboegi@web.de>
When a text file had been commited with CRLF and the file is commited
again, the CRLF are kept if .gitattributs has "text=auto".
This is done by analyzing the content of the blob stored in the index:
If a '\r' is found, Git assumes that the blob was commited with CRLF.
The simple search for a '\r' does not always work as expected:
A file is encoded in UTF-16 with CRLF and commited. Git treats it as binary.
Now the content is converted into UTF-8. At the next commit Git treats the
file as text, the CRLF should be converted into LF, but isn't.
Solution:
Replace has_cr_in_index() with has_crlf_in_index(). When no '\r' is found,
0 is returned directly, this is the most common case.
If a '\r' is found, the content is analyzed more deeply.
Reported-By: Ashish Negi <ashishnegi33@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
convert.c | 19 +++++++++----
t/t0027-auto-crlf.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 85 insertions(+), 10 deletions(-)
diff --git a/convert.c b/convert.c
index 20d7ab67bd..63ef799239 100644
--- a/convert.c
+++ b/convert.c
@@ -220,18 +220,27 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
}
}
-static int has_cr_in_index(const struct index_state *istate, const char *path)
+static int has_crlf_in_index(const struct index_state *istate, const char *path)
{
unsigned long sz;
void *data;
- int has_cr;
+ const char *crp;
+ int has_crlf = 0;
data = read_blob_data_from_index(istate, path, &sz);
if (!data)
return 0;
- has_cr = memchr(data, '\r', sz) != NULL;
+
+ crp = memchr(data, '\r', sz);
+ if (crp && (crp[1] == '\n')) {
+ unsigned int ret_stats;
+ ret_stats = gather_convert_stats(data, sz);
+ if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
+ (ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
+ has_crlf = 1;
+ }
free(data);
- return has_cr;
+ return has_crlf;
}
static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
@@ -290,7 +299,7 @@ static int crlf_to_git(const struct index_state *istate,
* cherry-pick.
*/
if ((checksafe != SAFE_CRLF_RENORMALIZE) &&
- has_cr_in_index(istate, path))
+ has_crlf_in_index(istate, path))
convert_crlf_into_lf = 0;
}
if ((checksafe == SAFE_CRLF_WARN ||
diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh
index 68108d956a..0af35cfb1f 100755
--- a/t/t0027-auto-crlf.sh
+++ b/t/t0027-auto-crlf.sh
@@ -43,19 +43,31 @@ create_gitattributes () {
} >.gitattributes
}
-create_NNO_files () {
+# Create 2 sets of files:
+# The NNO files are "Not NOrmalized in the repo. We use CRLF_mix_LF and store
+# it under different names for the different test cases, see ${pfx}
+# Depending on .gitattributes they are normalized at the next commit (or not)
+# The MIX files have different contents in the repo.
+# Depending on its contents, the "new safer autocrlf" may kick in.
+create_NNO_MIX_files () {
for crlf in false true input
do
for attr in "" auto text -text
do
for aeol in "" lf crlf
do
- pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf}
+ pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} &&
cp CRLF_mix_LF ${pfx}_LF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
- cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
+ cp CRLF_mix_LF ${pfx}_CRLF_nul.txt &&
+ pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} &&
+ cp LF ${pfx}_LF.txt &&
+ cp CRLF ${pfx}_CRLF.txt &&
+ cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
+ cp LF_mix_CR ${pfx}_LF_mix_CR.txt &&
+ cp CRLF_nul ${pfx}_CRLF_nul.txt
done
done
done
@@ -136,6 +148,49 @@ commit_chk_wrnNNO () {
'
}
+# Commit a file with mixed line endings on top of different files
+# in the index. Check for warnings
+commit_MIX_chkwrn () {
+ attr=$1 ; shift
+ aeol=$1 ; shift
+ crlf=$1 ; shift
+ lfwarn=$1 ; shift
+ crlfwarn=$1 ; shift
+ lfmixcrlf=$1 ; shift
+ lfmixcr=$1 ; shift
+ crlfnul=$1 ; shift
+ pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf}
+ #Commit file with CLRF_mix_LF on top of existing file
+ create_gitattributes "$attr" $aeol &&
+ for f in LF CRLF CRLF_mix_LF LF_mix_CR CRLF_nul
+ do
+ fname=${pfx}_$f.txt &&
+ cp CRLF_mix_LF $fname &&
+ printf Z >>"$fname" &&
+ git -c core.autocrlf=$crlf add $fname 2>"${pfx}_$f.err"
+ done
+
+ test_expect_success "commit file with mixed EOL crlf=$crlf attr=$attr LF" '
+ check_warning "$lfwarn" ${pfx}_LF.err
+ '
+ test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF" '
+ check_warning "$crlfwarn" ${pfx}_CRLF.err
+ '
+
+ test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_mix_LF" '
+ check_warning "$lfmixcrlf" ${pfx}_CRLF_mix_LF.err
+ '
+
+ test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf LF_mix_cr" '
+ check_warning "$lfmixcr" ${pfx}_LF_mix_CR.err
+ '
+
+ test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_nul" '
+ check_warning "$crlfnul" ${pfx}_CRLF_nul.err
+ '
+}
+
+
stats_ascii () {
case "$1" in
LF)
@@ -323,8 +378,8 @@ test_expect_success 'setup master' '
printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONE\r\nLINETWO\rLINETHREE" >CRLF_mix_CR &&
printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONEQ\r\nLINETWO\r\nLINETHREE" | q_to_nul >CRLF_nul &&
printf "\$Id: 0000000000000000000000000000000000000000 \$\nLINEONEQ\nLINETWO\nLINETHREE" | q_to_nul >LF_nul &&
- create_NNO_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
- git -c core.autocrlf=false add NNO_*.txt &&
+ create_NNO_MIX_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
+ git -c core.autocrlf=false add NNO_*.txt MIX_*.txt &&
git commit -m "mixed line endings" &&
test_tick
'
@@ -385,6 +440,17 @@ test_expect_success 'commit files attr=crlf' '
commit_check_warn input "crlf" "LF_CRLF" "" "LF_CRLF" "LF_CRLF" ""
'
+# Commit "CRLFmixLF" on top of these files already in the repo:
+# LF, CRLF, CRLFmixLF LF_mix_CR CRLFNULL
+# attr LF CRLF CRLFmixLF LF_mix_CR CRLFNUL
+commit_MIX_chkwrn "" "" false "" "" "" "" ""
+commit_MIX_chkwrn "" "" true "LF_CRLF" "" "" "LF_CRLF" "LF_CRLF"
+commit_MIX_chkwrn "" "" input "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF"
+
+commit_MIX_chkwrn "auto" "" false "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF"
+commit_MIX_chkwrn "auto" "" true "LF_CRLF" "" "" "LF_CRLF" "LF_CRLF"
+commit_MIX_chkwrn "auto" "" input "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF"
+
# attr LF CRLF CRLFmixLF LF_mix_CR CRLFNUL
commit_chk_wrnNNO "" "" false "" "" "" "" ""
commit_chk_wrnNNO "" "" true LF_CRLF "" "" "" ""
--
2.15.0.278.gbd97f72a1b.dirty
next prev parent reply other threads:[~2017-11-24 16:14 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-14 12:31 Changing encoding of a file : What should happen to CRLF in file ? Ashish Negi
2017-11-14 15:20 ` Torsten Bögershausen
2017-11-14 16:13 ` Ashish Negi
2017-11-14 16:15 ` Ashish Negi
2017-11-14 17:09 ` Torsten Bögershausen
2017-11-15 8:11 ` Ashish Negi
2017-11-15 17:12 ` Torsten Bögershausen
2017-11-15 19:05 ` Ashish Negi
2017-11-16 16:15 ` Torsten Bögershausen
2017-11-23 16:31 ` Ashish Negi
2017-11-23 20:25 ` Torsten Bögershausen
2017-11-24 6:37 ` Ashish Negi
2017-11-14 16:45 ` Torsten Bögershausen
2017-11-24 16:14 ` tboegi [this message]
2017-11-24 17:24 ` [PATCH 1/1] convert: tighten the safe autocrlf handling Eric Sunshine
2017-11-24 18:59 ` Torsten Bögershausen
2017-11-25 3:16 ` Junio C Hamano
2017-11-26 12:20 ` [PATCH v2 " tboegi
2017-12-08 17:46 ` [PATCH v1 1/2] t0027: Don't use git commit <empty-pathspec> tboegi
2017-12-08 18:13 ` Junio C Hamano
2017-12-08 18:21 ` Junio C Hamano
2017-12-08 18:50 ` Torsten Bögershausen
2017-12-08 17:46 ` [PATCH v1 2/2] t0027: Adapt the new MIX tests to Windows tboegi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171124161407.30698-1-tboegi@web.de \
--to=tboegi@web.de \
--cc=ashishnegi33@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).