From: "Ben Keene via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Ben Keene <seraphire@gmail.com>,
Junio C Hamano <gitster@pobox.com>,
Ben Keene <seraphire@gmail.com>
Subject: [PATCH v5 14/15] git-p4: added --encoding parameter to p4 clone
Date: Sat, 07 Dec 2019 17:47:42 +0000 [thread overview]
Message-ID: <25ad3e23a337b53ef6ca52019899838cc7ec43f7.1575740863.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.463.v5.git.1575740863.gitgitgadget@gmail.com>
From: Ben Keene <seraphire@gmail.com>
The test t9822 did not have any tests that had encoded a directory name
in ISO8859-1.
Additionally, to make it easier for the user to clone new repositories
with a non-UTF-8 encoded path in P4, add a new parameter to p4clone
"--encoding" that sets the
Add new tests that use ISO8859-1 encoded text in both the directory and
file names. git-p4.pathEncoding.
Update the View class in the git-p4 code to properly cast text
as_string() except for depot path and filenames.
Update the documentation to include the new command line parameter for
p4clone
Signed-off-by: Ben Keene <seraphire@gmail.com>
---
Documentation/git-p4.txt | 5 ++
git-p4.py | 57 +++++++++++++-----
t/t9822-git-p4-path-encoding.sh | 101 ++++++++++++++++++++++++++++++++
3 files changed, 147 insertions(+), 16 deletions(-)
diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
index 3494a1db3e..8fb844fc49 100644
--- a/Documentation/git-p4.txt
+++ b/Documentation/git-p4.txt
@@ -305,6 +305,11 @@ options described above.
--bare::
Perform a bare clone. See linkgit:git-clone[1].
+--encoding <encoding>::
+ Optionally sets the git-p4.pathEncoding configuration value in
+ the newly created Git repository before files are synchronized
+ from P4. See git-p4.pathEncoding for more information.
+
Submit options
~~~~~~~~~~~~~~
These options can be used to modify 'git p4 submit' behavior.
diff --git a/git-p4.py b/git-p4.py
index 9cf4e94e28..16f29aae41 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -1241,7 +1241,7 @@ def getClientSpec():
entry = specList[0]
# the //client/ name
- client_name = entry["Client"]
+ client_name = as_string(entry["Client"])
# just the keys that start with "View"
view_keys = [ k for k in list(entry.keys()) if k.startswith("View") ]
@@ -2625,19 +2625,25 @@ def run(self, args):
return True
class View(object):
- """Represent a p4 view ("p4 help views"), and map files in a
- repo according to the view."""
+ """ Represent a p4 view ("p4 help views"), and map files in a
+ repo according to the view.
+ """
def __init__(self, client_name):
self.mappings = []
- self.client_prefix = "//%s/" % client_name
+ # the client prefix is saved in bytes as it is used for comparison
+ # against server data.
+ self.client_prefix = as_bytes("//%s/" % client_name)
# cache results of "p4 where" to lookup client file locations
self.client_spec_path_cache = {}
def append(self, view_line):
- """Parse a view line, splitting it into depot and client
- sides. Append to self.mappings, preserving order. This
- is only needed for tag creation."""
+ """ Parse a view line, splitting it into depot and client
+ sides. Append to self.mappings, preserving order. This
+ is only needed for tag creation.
+
+ view_line should be in bytes (depot path encoding)
+ """
# Split the view line into exactly two words. P4 enforces
# structure on these lines that simplifies this quite a bit.
@@ -2650,28 +2656,28 @@ def append(self, view_line):
# The line is already white-space stripped.
# The two words are separated by a single space.
#
- if view_line[0] == '"':
+ if view_line[0] == b'"':
# First word is double quoted. Find its end.
- close_quote_index = view_line.find('"', 1)
+ close_quote_index = view_line.find(b'"', 1)
if close_quote_index <= 0:
- die("No first-word closing quote found: %s" % view_line)
+ die("No first-word closing quote found: %s" % path_as_string(view_line))
depot_side = view_line[1:close_quote_index]
# skip closing quote and space
rhs_index = close_quote_index + 1 + 1
else:
- space_index = view_line.find(" ")
+ space_index = view_line.find(b" ")
if space_index <= 0:
- die("No word-splitting space found: %s" % view_line)
+ die("No word-splitting space found: %s" % path_as_string(view_line))
depot_side = view_line[0:space_index]
rhs_index = space_index + 1
# prefix + means overlay on previous mapping
- if depot_side.startswith("+"):
+ if depot_side.startswith(b"+"):
depot_side = depot_side[1:]
# prefix - means exclude this path, leave out of mappings
exclude = False
- if depot_side.startswith("-"):
+ if depot_side.startswith(b"-"):
exclude = True
depot_side = depot_side[1:]
@@ -2682,7 +2688,7 @@ def convert_client_path(self, clientFile):
# chop off //client/ part to make it relative
if not clientFile.startswith(self.client_prefix):
die("No prefix '%s' on clientFile '%s'" %
- (self.client_prefix, clientFile))
+ (as_string(self.client_prefix)), path_as_string(clientFile))
return clientFile[len(self.client_prefix):]
def update_client_spec_path_cache(self, files):
@@ -2696,7 +2702,7 @@ def update_client_spec_path_cache(self, files):
where_result = p4CmdList(["-x", "-", "where"], stdin=fileArgs, encode_cmd_output=False)
for res in where_result:
- if "code" in res and res["code"] == "error":
+ if "code" in res and res["code"] == b"error":
# assume error is "... file(s) not in client view"
continue
if "clientFile" not in res:
@@ -4113,10 +4119,14 @@ def __init__(self):
help="where to leave result of the clone"),
optparse.make_option("--bare", dest="cloneBare",
action="store_true", default=False),
+ optparse.make_option("--encoding", dest="setPathEncoding",
+ action="store", default=None,
+ help="Sets the path encoding for this depot")
]
self.cloneDestination = None
self.needsGit = False
self.cloneBare = False
+ self.setPathEncoding = None
def defaultDestination(self, args):
""" Returns the last path component as the default git
@@ -4140,6 +4150,14 @@ def run(self, args):
depotPaths = args
+ # If we have an encoding provided, ignore what may already exist
+ # in the registry. This will ensure we show the displayed values
+ # using the correct encoding.
+ if self.setPathEncoding:
+ gitConfigSet("git-p4.pathEncoding", self.setPathEncoding)
+
+ # If more than 1 path element is supplied, the last element
+ # is the clone destination.
if not self.cloneDestination and len(depotPaths) > 1:
self.cloneDestination = depotPaths[-1]
depotPaths = depotPaths[:-1]
@@ -4167,6 +4185,13 @@ def run(self, args):
if retcode:
raise CalledProcessError(retcode, init_cmd)
+ # Set the encoding if it was provided command line
+ if self.setPathEncoding:
+ init_cmd= ["git", "config", "git-p4.pathEncoding", self.setPathEncoding]
+ retcode = subprocess.call(init_cmd)
+ if retcode:
+ raise CalledProcessError(retcode, init_cmd)
+
if not P4Sync.run(self, depotPaths):
return False
diff --git a/t/t9822-git-p4-path-encoding.sh b/t/t9822-git-p4-path-encoding.sh
index 572d395498..8d3fe6c5d1 100755
--- a/t/t9822-git-p4-path-encoding.sh
+++ b/t/t9822-git-p4-path-encoding.sh
@@ -4,9 +4,20 @@ test_description='Clone repositories with non ASCII paths'
. ./lib-git-p4.sh
+# lowercase filename
+# UTF8 - HEX: a-\xc3\xa4_o-\xc3\xb6_u-\xc3\xbc
+# - octal: a-\303\244_o-\303\266_u-\303\274
+# ISO8859 - HEX: a-\xe4_o-\xf6_u-\xfc
UTF8_ESCAPED="a-\303\244_o-\303\266_u-\303\274.txt"
ISO8859_ESCAPED="a-\344_o-\366_u-\374.txt"
+# lowercase directory
+# UTF8 - HEX: dir_a-\xc3\xa4_o-\xc3\xb6_u-\xc3\xbc
+# ISO8859 - HEX: dir_a-\xe4_o-\xf6_u-\xfc
+DIR_UTF8_ESCAPED="dir_a-\303\244_o-\303\266_u-\303\274"
+DIR_ISO8859_ESCAPED="dir_a-\344_o-\366_u-\374"
+
+
ISO8859="$(printf "$ISO8859_ESCAPED")" &&
echo content123 >"$ISO8859" &&
rm "$ISO8859" || {
@@ -58,6 +69,22 @@ test_expect_success 'Clone repo containing iso8859-1 encoded paths with git-p4.p
)
'
+test_expect_success 'Clone repo containing iso8859-1 encoded paths with using --encoding parameter' '
+ test_when_finished cleanup_git &&
+ (
+ git p4 clone --encoding iso8859 --destination="$git" //depot &&
+ cd "$git" &&
+ UTF8="$(printf "$UTF8_ESCAPED")" &&
+ echo "$UTF8" >expect &&
+ git -c core.quotepath=false ls-files >actual &&
+ test_cmp expect actual &&
+
+ echo content123 >expect &&
+ cat "$UTF8" >actual &&
+ test_cmp expect actual
+ )
+'
+
test_expect_success 'Delete iso8859-1 encoded paths and clone' '
(
cd "$cli" &&
@@ -74,4 +101,78 @@ test_expect_success 'Delete iso8859-1 encoded paths and clone' '
)
'
+# These tests will create a directory with ISO8859-1 characters in both the
+# directory and the path. Since it is possible to clone a path instead of using
+# the whole client-spec. Check both versions: client-spec and with a direct
+# path using --encoding
+test_expect_success 'Create a repo containing iso8859-1 encoded directory and filename' '
+ (
+ DIR_ISO8859="$(printf "$DIR_ISO8859_ESCAPED")" &&
+ ISO8859="$(printf "$ISO8859_ESCAPED")" &&
+ cd "$cli" &&
+ mkdir "$DIR_ISO8859" &&
+ cd "$DIR_ISO8859" &&
+ echo content123 >"$ISO8859" &&
+ p4 add "$ISO8859" &&
+ p4 submit -d "test commit (encoded directory)"
+ )
+'
+
+test_expect_success 'Clone repo containing iso8859-1 encoded depot path and files with git-p4.pathEncoding' '
+ test_when_finished cleanup_git &&
+ (
+ DIR_ISO8859="$(printf "$DIR_ISO8859_ESCAPED")" &&
+ DIR_UTF8="$(printf "$DIR_UTF8_ESCAPED")" &&
+ cd "$git" &&
+ git init . &&
+ git config git-p4.pathEncoding iso8859-1 &&
+ git p4 clone --use-client-spec --destination="$git" "//depot/$DIR_ISO8859" &&
+ cd "$DIR_UTF8" &&
+ UTF8="$(printf "$UTF8_ESCAPED")" &&
+ echo "$UTF8" >expect &&
+ git -c core.quotepath=false ls-files >actual &&
+ test_cmp expect actual &&
+
+ echo content123 >expect &&
+ cat "$UTF8" >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success 'Clone repo containing iso8859-1 encoded depot path and files with git-p4.pathEncoding, without --use-client-spec' '
+ test_when_finished cleanup_git &&
+ (
+ DIR_ISO8859="$(printf "$DIR_ISO8859_ESCAPED")" &&
+ cd "$git" &&
+ git init . &&
+ git config git-p4.pathEncoding iso8859-1 &&
+ git p4 clone --destination="$git" "//depot/$DIR_ISO8859" &&
+ UTF8="$(printf "$UTF8_ESCAPED")" &&
+ echo "$UTF8" >expect &&
+ git -c core.quotepath=false ls-files >actual &&
+ test_cmp expect actual &&
+
+ echo content123 >expect &&
+ cat "$UTF8" >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success 'Clone repo containing iso8859-1 encoded depot path and files with using --encoding parameter' '
+ test_when_finished cleanup_git &&
+ (
+ DIR_ISO8859="$(printf "$DIR_ISO8859_ESCAPED")" &&
+ git p4 clone --encoding iso8859 --destination="$git" "//depot/$DIR_ISO8859" &&
+ cd "$git" &&
+ UTF8="$(printf "$UTF8_ESCAPED")" &&
+ echo "$UTF8" >expect &&
+ git -c core.quotepath=false ls-files >actual &&
+ test_cmp expect actual &&
+
+ echo content123 >expect &&
+ cat "$UTF8" >actual &&
+ test_cmp expect actual
+ )
+'
+
test_done
--
gitgitgadget
next prev parent reply other threads:[~2019-12-07 17:48 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-13 21:07 [PATCH 0/1] git-p4.py: Cast byte strings to unicode strings in python3 Ben Keene via GitGitGadget
2019-11-13 21:07 ` [PATCH 1/1] " Ben Keene via GitGitGadget
2019-11-14 2:25 ` [PATCH 0/1] git-p4.py: " Junio C Hamano
2019-11-14 9:46 ` Luke Diamand
2019-11-15 14:39 ` [PATCH v2 0/3] " Ben Keene via GitGitGadget
2019-11-15 14:39 ` [PATCH v2 1/3] " Ben Keene via GitGitGadget
2019-11-15 14:39 ` [PATCH v2 2/3] FIX: cast as unicode fails when a value is already unicode Ben Keene via GitGitGadget
2019-11-15 14:39 ` [PATCH v2 3/3] FIX: wrap return for read_pipe_lines in ustring() and wrap GitLFS read of the pointer file in ustring() Ben Keene via GitGitGadget
2019-12-02 19:02 ` [PATCH v3 0/1] git-p4.py: Cast byte strings to unicode strings in python3 Ben Keene via GitGitGadget
2019-12-02 19:02 ` [PATCH v3 1/1] Python3 support for t9800 tests. Basic P4/Python3 support Ben Keene via GitGitGadget
2019-12-03 0:18 ` Denton Liu
2019-12-03 16:03 ` Ben Keene
2019-12-04 6:14 ` Denton Liu
2019-12-04 22:29 ` [PATCH v4 00/11] git-p4.py: Cast byte strings to unicode strings in python3 Ben Keene via GitGitGadget
2019-12-04 22:29 ` [PATCH v4 01/11] git-p4: select p4 binary by operating-system Ben Keene via GitGitGadget
2019-12-05 10:19 ` Denton Liu
2019-12-05 16:32 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 02/11] git-p4: change the expansion test from basestring to list Ben Keene via GitGitGadget
2019-12-05 10:27 ` Denton Liu
2019-12-05 17:05 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 03/11] git-p4: add new helper functions for python3 conversion Ben Keene via GitGitGadget
2019-12-05 10:40 ` Denton Liu
2019-12-05 18:42 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 04/11] git-p4: python3 syntax changes Ben Keene via GitGitGadget
2019-12-05 11:02 ` Denton Liu
2019-12-04 22:29 ` [PATCH v4 05/11] git-p4: Add new functions in preparation of usage Ben Keene via GitGitGadget
2019-12-05 10:50 ` Denton Liu
2019-12-05 19:23 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 06/11] git-p4: Fix assumed path separators to be more Windows friendly Ben Keene via GitGitGadget
2019-12-05 13:38 ` Junio C Hamano
2019-12-05 19:37 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 07/11] git-p4: Add a helper class for stream writing Ben Keene via GitGitGadget
2019-12-05 13:42 ` Junio C Hamano
2019-12-05 19:52 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 08/11] git-p4: p4CmdList - support Unicode encoding Ben Keene via GitGitGadget
2019-12-05 13:55 ` Junio C Hamano
2019-12-05 20:23 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 09/11] git-p4: Add usability enhancements Ben Keene via GitGitGadget
2019-12-05 14:04 ` Junio C Hamano
2019-12-05 15:40 ` Ben Keene
2019-12-04 22:29 ` [PATCH v4 10/11] git-p4: Support python3 for basic P4 clone, sync, and submit Ben Keene via GitGitGadget
2019-12-04 22:29 ` [PATCH v4 11/11] git-p4: Added --encoding parameter to p4 clone Ben Keene via GitGitGadget
2019-12-05 9:54 ` [PATCH v4 00/11] git-p4.py: Cast byte strings to unicode strings in python3 Luke Diamand
2019-12-05 16:16 ` Ben Keene
2019-12-05 18:51 ` Denton Liu
2019-12-05 20:47 ` Ben Keene
2019-12-07 17:47 ` [PATCH v5 00/15] " Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 01/15] t/gitweb-lib.sh: drop confusing quotes Jeff King via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 02/15] t/gitweb-lib.sh: set $REQUEST_URI Jeff King via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 03/15] git-p4: select P4 binary by operating-system Ben Keene via GitGitGadget
2019-12-09 19:47 ` Junio C Hamano
2019-12-07 17:47 ` [PATCH v5 04/15] git-p4: change the expansion test from basestring to list Ben Keene via GitGitGadget
2019-12-09 20:25 ` Junio C Hamano
2019-12-13 14:40 ` Ben Keene
2019-12-07 17:47 ` [PATCH v5 05/15] git-p4: promote encodeWithUTF8() to a global function Ben Keene via GitGitGadget
2019-12-11 16:39 ` Junio C Hamano
2019-12-07 17:47 ` [PATCH v5 06/15] git-p4: remove p4_write_pipe() and write_pipe() return values Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 07/15] git-p4: add new support function gitConfigSet() Ben Keene via GitGitGadget
2019-12-11 17:11 ` Junio C Hamano
2019-12-07 17:47 ` [PATCH v5 08/15] git-p4: add casting helper functions for python 3 conversion Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 09/15] git-p4: python 3 syntax changes Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 10/15] git-p4: fix assumed path separators to be more Windows friendly Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 11/15] git-p4: add Py23File() - helper class for stream writing Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 12/15] git-p4: p4CmdList - support Unicode encoding Ben Keene via GitGitGadget
2019-12-07 17:47 ` [PATCH v5 13/15] git-p4: support Python 3 for basic P4 clone, sync, and submit (t9800) Ben Keene via GitGitGadget
2019-12-07 17:47 ` Ben Keene via GitGitGadget [this message]
2019-12-07 17:47 ` [PATCH v5 15/15] git-p4: Add depot manipulation functions Ben Keene via GitGitGadget
2019-12-07 19:47 ` [PATCH v5 00/15] git-p4.py: Cast byte strings to unicode strings in python3 Jeff King
2019-12-07 21:27 ` Ben Keene
2019-12-11 16:54 ` Junio C Hamano
2019-12-11 17:13 ` Denton Liu
2019-12-11 17:57 ` Junio C Hamano
2019-12-11 20:19 ` Luke Diamand
2019-12-11 21:46 ` Junio C Hamano
2019-12-11 22:30 ` Yang Zhao
2019-12-12 14:13 ` Ben Keene
2019-12-13 19:42 ` [PATCH v5 00/15] git-p4.py: Cast byte strings to unicode strings in python3 - Code Review Ben Keene
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=25ad3e23a337b53ef6ca52019899838cc7ec43f7.1575740863.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=seraphire@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).