From: Yang Zhao <yang.zhao@skyboxlabs.com>
To: git@vger.kernel.org
Cc: Yang Zhao <yang.zhao@skyboxlabs.com>,
luke@diamand.org, liu.denton@gmail.com, seraphire@gmail.com
Subject: [PATCH v2 04/14] git-p4: encode/decode communication with p4 for python3
Date: Fri, 13 Dec 2019 15:52:38 -0800 [thread overview]
Message-ID: <20191213235247.23660-6-yang.zhao@skyboxlabs.com> (raw)
In-Reply-To: <20191213235247.23660-1-yang.zhao@skyboxlabs.com>
The marshalled dict in the response given on STDOUT by p4 uses `str` for
keys and string values. When run using python3, these values are
deserialized as `bytes`, leading to a whole host of problems as the rest
of the code assumes `str` is used throughout.
This patch changes the deserialization behaviour such that, as much as
possible, text output from p4 is decoded to native unicode strings.
Exceptions are made for the field `data` as it is usually arbitrary
binary data. `depotFile[0-9]*`, `path`, and `clientFile` are also exempt
as they contain path strings not encoded with UTF-8, and must survive
survive round-trip back to p4.
Conversely, text data being piped to p4 must always be encoded when
running under python3.
encode_text_stream() and decode_text_stream() were added to make these
transformations more convenient.
Signed-off-by: Yang Zhao <yang.zhao@skyboxlabs.com>
---
git-p4.py | 59 +++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 13 deletions(-)
diff --git a/git-p4.py b/git-p4.py
index 153aff16f3..ca891e3d5d 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -135,6 +135,21 @@ def die(msg):
sys.stderr.write(msg + "\n")
sys.exit(1)
+# We need different encoding/decoding strategies for text data being passed
+# around in pipes depending on python version
+if bytes is not str:
+ # For python3, always encode and decode as appropriate
+ def decode_text_stream(s):
+ return s.decode() if isinstance(s, bytes) else s
+ def encode_text_stream(s):
+ return s.encode() if isinstance(s, str) else s
+else:
+ # For python2.7, pass read strings as-is, but also allow writing unicode
+ def decode_text_stream(s):
+ return s
+ def encode_text_stream(s):
+ return s.encode('utf_8') if isinstance(s, unicode) else s
+
def write_pipe(c, stdin):
if verbose:
sys.stderr.write('Writing pipe: %s\n' % str(c))
@@ -151,6 +166,8 @@ def write_pipe(c, stdin):
def p4_write_pipe(c, stdin):
real_cmd = p4_build_cmd(c)
+ if bytes is not str and isinstance(stdin, str):
+ stdin = encode_text_stream(stdin)
return write_pipe(real_cmd, stdin)
def read_pipe_full(c):
@@ -164,7 +181,7 @@ def read_pipe_full(c):
expand = not isinstance(c, list)
p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand)
(out, err) = p.communicate()
- return (p.returncode, out, err)
+ return (p.returncode, out, decode_text_stream(err))
def read_pipe(c, ignore_error=False):
""" Read output from command. Returns the output text on
@@ -187,11 +204,11 @@ def read_pipe_text(c):
if retcode != 0:
return None
else:
- return out.rstrip()
+ return decode_text_stream(out).rstrip()
-def p4_read_pipe(c, ignore_error=False):
+def p4_read_pipe(c, ignore_error=False, raw=False):
real_cmd = p4_build_cmd(c)
- return read_pipe(real_cmd, ignore_error)
+ return read_pipe(real_cmd, ignore_error, raw=raw)
def read_pipe_lines(c):
if verbose:
@@ -200,7 +217,7 @@ def read_pipe_lines(c):
expand = not isinstance(c, list)
p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand)
pipe = p.stdout
- val = pipe.readlines()
+ val = [decode_text_stream(line) for line in pipe.readlines()]
if pipe.close() or p.wait():
die('Command failed: %s' % str(c))
@@ -231,6 +248,7 @@ def p4_has_move_command():
cmd = p4_build_cmd(["move", "-k", "@from", "@to"])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(out, err) = p.communicate()
+ err = decode_text_stream(err)
# return code will be 1 in either case
if err.find("Invalid option") >= 0:
return False
@@ -611,6 +629,20 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
try:
while True:
entry = marshal.load(p4.stdout)
+ if bytes is not str:
+ # Decode unmarshalled dict to use str keys and values, except for:
+ # - `data` which may contain arbitrary binary data
+ # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text
+ decoded_entry = {}
+ for key, value in entry.items():
+ key = key.decode()
+ if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')):
+ value = value.decode()
+ decoded_entry[key] = value
+ # Parse out data if it's an error response
+ if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
+ decoded_entry['data'] = decoded_entry['data'].decode()
+ entry = decoded_entry
if skip_info:
if 'code' in entry and entry['code'] == 'info':
continue
@@ -828,6 +860,7 @@ def branch_exists(branch):
cmd = [ "git", "rev-parse", "--symbolic", "--verify", branch ]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _ = p.communicate()
+ out = decode_text_stream(out)
if p.returncode:
return False
# expect exactly one line of output: the branch name
@@ -1971,7 +2004,7 @@ def applyCommit(self, id):
tmpFile = os.fdopen(handle, "w+b")
if self.isWindows:
submitTemplate = submitTemplate.replace("\n", "\r\n")
- tmpFile.write(submitTemplate)
+ tmpFile.write(encode_text_stream(submitTemplate))
tmpFile.close()
if self.prepare_p4_only:
@@ -2018,7 +2051,7 @@ def applyCommit(self, id):
if self.edit_template(fileName):
# read the edited message and submit
tmpFile = open(fileName, "rb")
- message = tmpFile.read()
+ message = decode_text_stream(tmpFile.read())
tmpFile.close()
if self.isWindows:
message = message.replace("\r\n", "\n")
@@ -2707,7 +2740,7 @@ def splitFilesIntoBranches(self, commit):
return branches
def writeToGitStream(self, gitMode, relPath, contents):
- self.gitStream.write('M %s inline %s\n' % (gitMode, relPath))
+ self.gitStream.write(encode_text_stream(u'M {} inline {}\n'.format(gitMode, relPath)))
self.gitStream.write('data %d\n' % sum(len(d) for d in contents))
for d in contents:
self.gitStream.write(d)
@@ -2748,7 +2781,7 @@ def streamOneP4File(self, file, contents):
git_mode = "120000"
# p4 print on a symlink sometimes contains "target\n";
# if it does, remove the newline
- data = ''.join(contents)
+ data = ''.join(decode_text_stream(c) for c in contents)
if not data:
# Some version of p4 allowed creating a symlink that pointed
# to nothing. This causes p4 errors when checking out such
@@ -2802,7 +2835,7 @@ def streamOneP4File(self, file, contents):
pattern = p4_keywords_regexp_for_type(type_base, type_mods)
if pattern:
regexp = re.compile(pattern, re.VERBOSE)
- text = ''.join(contents)
+ text = ''.join(decode_text_stream(c) for c in contents)
text = regexp.sub(r'$\1$', text)
contents = [ text ]
@@ -2817,7 +2850,7 @@ def streamOneP4Deletion(self, file):
if verbose:
sys.stdout.write("delete %s\n" % relPath)
sys.stdout.flush()
- self.gitStream.write("D %s\n" % relPath)
+ self.gitStream.write(encode_text_stream(u'D {}\n'.format(relPath)))
if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath):
self.largeFileSystem.removeLargeFile(relPath)
@@ -2917,9 +2950,9 @@ def streamP4FilesCbSelf(entry):
if 'shelved_cl' in f:
# Handle shelved CLs using the "p4 print file@=N" syntax to print
# the contents
- fileArg = '%s@=%d' % (f['path'], f['shelved_cl'])
+ fileArg = f['path'] + encode_text_stream('@={}'.format(f['shelved_cl']))
else:
- fileArg = '%s#%s' % (f['path'], f['rev'])
+ fileArg = f['path'] + encode_text_stream('#{}'.format(f['rev']))
fileArgs.append(fileArg)
--
2.21.0.windows.1
next prev parent reply other threads:[~2019-12-13 23:53 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20191213235247.23660-1-yang.zhao@skyboxlabs.com>
2019-12-13 23:52 ` [PATCH v2 00/14] git-p4: python3 compatibility Yang Zhao
2020-01-17 22:00 ` Yang Zhao
2020-01-24 20:14 ` Luke Diamand
2020-01-30 13:35 ` Luke Diamand
2020-02-03 12:54 ` Luke Diamand
2020-02-03 18:11 ` Yang Zhao
2020-02-04 1:35 ` Yang Zhao
2019-12-13 23:52 ` [PATCH v2 01/14] git-p4: make python2.7 the oldest supported version Yang Zhao
2019-12-13 23:52 ` [PATCH v2 02/14] git-p4: change the expansion test from basestring to list Yang Zhao
2019-12-13 23:52 ` [PATCH v2 03/14] git-p4: remove string type aliasing Yang Zhao
2019-12-13 23:52 ` Yang Zhao [this message]
2019-12-17 22:51 ` [PATCH v2 04/14] git-p4: encode/decode communication with p4 for python3 Junio C Hamano
2019-12-13 23:52 ` [PATCH v2 05/14] git-p4: encode/decode communication with git " Yang Zhao
2019-12-13 23:52 ` [PATCH v2 06/14] git-p4: convert path to unicode before processing them Yang Zhao
2019-12-13 23:52 ` [PATCH v2 07/14] git-p4: open .gitp4-usercache.txt in text mode Yang Zhao
2019-12-13 23:52 ` [PATCH v2 08/14] git-p4: use marshal format version 2 when sending to p4 Yang Zhao
2019-12-13 23:52 ` [PATCH v2 09/14] git-p4: fix freezing while waiting for fast-import progress Yang Zhao
2019-12-13 23:52 ` [PATCH v2 10/14] git-p4: use functools.reduce instead of reduce Yang Zhao
2019-12-13 23:52 ` [PATCH v2 11/14] git-p4: use dict.items() iteration for python3 compatibility Yang Zhao
2019-12-13 23:52 ` [PATCH v2 12/14] git-p4: simplify regex pattern generation for parsing diff-tree Yang Zhao
2019-12-13 23:52 ` [PATCH v2 13/14] git-p4: use python3's input() everywhere Yang Zhao
2019-12-13 23:52 ` [RFC PATCH v2 14/14] ci: also run linux-gcc pipeline with python3.5 environment Yang Zhao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191213235247.23660-6-yang.zhao@skyboxlabs.com \
--to=yang.zhao@skyboxlabs.com \
--cc=git@vger.kernel.org \
--cc=liu.denton@gmail.com \
--cc=luke@diamand.org \
--cc=seraphire@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).