From: Yang Zhao <yang.zhao@skyboxlabs.com>
To: git@vger.kernel.org
Cc: Yang Zhao <yang.zhao@skyboxlabs.com>
Subject: [PATCH 06/13] git-p4: convert path to unicode before processing them
Date: Fri, 6 Dec 2019 16:33:24 -0800 [thread overview]
Message-ID: <20191207003333.3228-7-yang.zhao@skyboxlabs.com> (raw)
In-Reply-To: <20191207003333.3228-1-yang.zhao@skyboxlabs.com>
P4 allows essentially arbitrary encoding for path data while we would
perfer to be dealing only with unicode strings. Since path data need to
survive round-trip back to p4, this patch implements the general policy
that we store path data as-is, but decode them to unicode before doing
any non-trivial processing.
A new `decode_path()` method is provided that generally does the correct
conversion, taking into account `git-p4.pathEncoding` configuration.
For python2.7, path strings will be left as-is if it only contains ASCII
characters.
For python3, decoding is always done so that we have str objects.
Signed-off-by: Yang Zhao <yang.zhao@skyboxlabs.com>
---
git-p4.py | 67 +++++++++++++++++++++++++++++++++++--------------------
1 file changed, 43 insertions(+), 24 deletions(-)
diff --git a/git-p4.py b/git-p4.py
index fefa716b17..088924fbe1 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -170,6 +170,21 @@ def decode_text_stream(s):
def encode_text_stream(s):
return s.encode('utf_8') if isinstance(s, unicode) else s
+def decode_path(path):
+ """Decode a given string (bytes or otherwise) using configured path encoding options
+ """
+ encoding = gitConfig('git-p4.pathEncoding') or 'utf_8'
+ if bytes is not str:
+ return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
+ else:
+ try:
+ path.decode('ascii')
+ except:
+ path = path.decode(encoding, errors='replace')
+ if verbose:
+ print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path))
+ return path
+
def write_pipe(c, stdin):
if verbose:
sys.stderr.write('Writing pipe: %s\n' % str(c))
@@ -715,7 +730,8 @@ def p4Where(depotPath):
if "depotFile" in entry:
# Search for the base client side depot path, as long as it starts with the branch's P4 path.
# The base path always ends with "/...".
- if entry["depotFile"].find(depotPath) == 0 and entry["depotFile"][-4:] == "/...":
+ entry_path = decode_path(entry['depotFile'])
+ if entry_path.find(depotPath) == 0 and entry_path[-4:] == "/...":
output = entry
break
elif "data" in entry:
@@ -730,11 +746,11 @@ def p4Where(depotPath):
return ""
clientPath = ""
if "path" in output:
- clientPath = output.get("path")
+ clientPath = decode_path(output['path'])
elif "data" in output:
data = output.get("data")
- lastSpace = data.rfind(" ")
- clientPath = data[lastSpace + 1:]
+ lastSpace = data.rfind(b" ")
+ clientPath = decode_path(data[lastSpace + 1:])
if clientPath.endswith("..."):
clientPath = clientPath[:-3]
@@ -2511,7 +2527,7 @@ def update_client_spec_path_cache(self, files):
""" Caching file paths by "p4 where" batch query """
# List depot file paths exclude that already cached
- fileArgs = [f['path'] for f in files if f['path'] not in self.client_spec_path_cache]
+ fileArgs = [f['path'] for f in files if decode_path(f['path']) not in self.client_spec_path_cache]
if len(fileArgs) == 0:
return # All files in cache
@@ -2526,16 +2542,18 @@ def update_client_spec_path_cache(self, files):
if "unmap" in res:
# it will list all of them, but only one not unmap-ped
continue
+ depot_path = decode_path(res['depotFile'])
if gitConfigBool("core.ignorecase"):
- res['depotFile'] = res['depotFile'].lower()
- self.client_spec_path_cache[res['depotFile']] = self.convert_client_path(res["clientFile"])
+ depot_path = depot_path.lower()
+ self.client_spec_path_cache[depot_path] = self.convert_client_path(res["clientFile"])
# not found files or unmap files set to ""
for depotFile in fileArgs:
+ depotFile = decode_path(depotFile)
if gitConfigBool("core.ignorecase"):
depotFile = depotFile.lower()
if depotFile not in self.client_spec_path_cache:
- self.client_spec_path_cache[depotFile] = ""
+ self.client_spec_path_cache[depotFile] = b''
def map_in_client(self, depot_path):
"""Return the relative location in the client where this
@@ -2653,7 +2671,7 @@ def isPathWanted(self, path):
elif path.lower() == p.lower():
return False
for p in self.depotPaths:
- if p4PathStartsWith(path, p):
+ if p4PathStartsWith(path, decode_path(p)):
return True
return False
@@ -2662,7 +2680,7 @@ def extractFilesFromCommit(self, commit, shelved=False, shelved_cl = 0):
fnum = 0
while "depotFile%s" % fnum in commit:
path = commit["depotFile%s" % fnum]
- found = self.isPathWanted(path)
+ found = self.isPathWanted(decode_path(path))
if not found:
fnum = fnum + 1
continue
@@ -2696,7 +2714,7 @@ def stripRepoPath(self, path, prefixes):
if self.useClientSpec:
# branch detection moves files up a level (the branch name)
# from what client spec interpretation gives
- path = self.clientSpecDirs.map_in_client(path)
+ path = decode_path(self.clientSpecDirs.map_in_client(path))
if self.detectBranches:
for b in self.knownBranches:
if p4PathStartsWith(path, b + "/"):
@@ -2730,14 +2748,15 @@ def splitFilesIntoBranches(self, commit):
branches = {}
fnum = 0
while "depotFile%s" % fnum in commit:
- path = commit["depotFile%s" % fnum]
+ raw_path = commit["depotFile%s" % fnum]
+ path = decode_path(raw_path)
found = self.isPathWanted(path)
if not found:
fnum = fnum + 1
continue
file = {}
- file["path"] = path
+ file["path"] = raw_path
file["rev"] = commit["rev%s" % fnum]
file["action"] = commit["action%s" % fnum]
file["type"] = commit["type%s" % fnum]
@@ -2746,7 +2765,7 @@ def splitFilesIntoBranches(self, commit):
# start with the full relative path where this file would
# go in a p4 client
if self.useClientSpec:
- relPath = self.clientSpecDirs.map_in_client(path)
+ relPath = decode_path(self.clientSpecDirs.map_in_client(path))
else:
relPath = self.stripRepoPath(path, self.depotPaths)
@@ -2784,14 +2803,15 @@ def encodeWithUTF8(self, path):
# - helper for streamP4Files
def streamOneP4File(self, file, contents):
- relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
- relPath = self.encodeWithUTF8(relPath)
+ file_path = file['depotFile']
+ relPath = self.stripRepoPath(decode_path(file_path), self.branchPrefixes)
+
if verbose:
if 'fileSize' in self.stream_file:
size = int(self.stream_file['fileSize'])
else:
size = 0 # deleted files don't get a fileSize apparently
- sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024))
+ sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
sys.stdout.flush()
(type_base, type_mods) = split_p4_type(file["type"])
@@ -2809,7 +2829,7 @@ def streamOneP4File(self, file, contents):
# to nothing. This causes p4 errors when checking out such
# a change, and errors here too. Work around it by ignoring
# the bad symlink; hopefully a future change fixes it.
- print("\nIgnoring empty symlink in %s" % file['depotFile'])
+ print("\nIgnoring empty symlink in %s" % file_path)
return
elif data[-1] == '\n':
contents = [data[:-1]]
@@ -2828,7 +2848,7 @@ def streamOneP4File(self, file, contents):
# just the native "NT" type.
#
try:
- text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])])
+ text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (decode_path(file['depotFile']), file['change'])], raw=True)
except Exception as e:
if 'Translation of file content failed' in str(e):
type_base = 'binary'
@@ -2836,7 +2856,7 @@ def streamOneP4File(self, file, contents):
raise e
else:
if p4_version_string().find('/NT') >= 0:
- text = text.replace('\r\n', '\n')
+ text = text.replace(b'\r\n', b'\n')
contents = [ text ]
if type_base == "apple":
@@ -2867,8 +2887,7 @@ def streamOneP4File(self, file, contents):
self.writeToGitStream(git_mode, relPath, contents)
def streamOneP4Deletion(self, file):
- relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
- relPath = self.encodeWithUTF8(relPath)
+ relPath = self.stripRepoPath(decode_path(file['path']), self.branchPrefixes)
if verbose:
sys.stdout.write("delete %s\n" % relPath)
sys.stdout.flush()
@@ -3055,8 +3074,8 @@ def commit(self, details, files, branch, parent = "", allow_empty=False):
if self.clientSpecDirs:
self.clientSpecDirs.update_client_spec_path_cache(files)
- files = [f for f in files
- if self.inClientSpec(f['path']) and self.hasBranchPrefix(f['path'])]
+ files = [f for (f, path) in ((f, decode_path(f['path'])) for f in files)
+ if self.inClientSpec(path) and self.hasBranchPrefix(path)]
if gitConfigBool('git-p4.keepEmptyCommits'):
allow_empty = True
--
2.21.0.windows.1
next prev parent reply other threads:[~2019-12-07 0:33 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-07 0:33 [PATCH 00/13] git-p4: python3 compatibility Yang Zhao
2019-12-07 0:33 ` [PATCH 01/13] ci: also run linux-gcc pipeline with python-3.7 environment Yang Zhao
2019-12-10 10:30 ` SZEDER Gábor
2019-12-10 19:11 ` Yang Zhao
2019-12-12 14:13 ` SZEDER Gábor
2019-12-12 17:04 ` Yang Zhao
2019-12-12 17:15 ` SZEDER Gábor
2019-12-12 19:02 ` Yang Zhao
2019-12-07 0:33 ` [PATCH 02/13] git-p4: make python-2.7 the oldest supported version Yang Zhao
2019-12-07 0:33 ` [PATCH 03/13] git-p4: simplify python version detection Yang Zhao
2019-12-07 0:33 ` [PATCH 04/13] git-p4: decode response from p4 to str for python3 Yang Zhao
2019-12-07 0:33 ` [PATCH 05/13] git-p4: properly encode/decode communication with git for python 3 Yang Zhao
2019-12-07 0:33 ` Yang Zhao [this message]
2019-12-07 0:33 ` [PATCH 06/13] git-p4: open .gitp4-usercache.txt in text mode Yang Zhao
2019-12-07 0:33 ` [PATCH 07/13] git-p4: convert path to unicode before processing them Yang Zhao
2019-12-07 0:33 ` [PATCH 07/13] git-p4: open .gitp4-usercache.txt in text mode Yang Zhao
2019-12-07 0:33 ` [PATCH 08/13] git-p4: use marshal format version 2 when sending to p4 Yang Zhao
2019-12-07 0:33 ` [PATCH 09/13] git-p4: fix freezing while waiting for fast-import progress Yang Zhao
2019-12-07 0:33 ` [PATCH 10/13] git-p4: use functools.reduce instead of reduce Yang Zhao
2019-12-07 0:33 ` [PATCH 11/13] git-p4: use dict.items() iteration for python3 compatibility Yang Zhao
2019-12-07 0:33 ` [PATCH 12/13] git-p4: simplify regex pattern generation for parsing diff-tree Yang Zhao
2019-12-07 0:33 ` [PATCH 13/13] git-p4: use python3's input() everywhere Yang Zhao
2019-12-07 1:09 ` [PATCH 00/13] git-p4: python3 compatibility Denton Liu
2019-12-07 7:29 ` Yang Zhao
2019-12-07 16:21 ` Ben Keene
2019-12-07 19:59 ` Yang Zhao
2019-12-09 15:03 ` Ben Keene
2019-12-09 18:54 ` Ben Keene
2019-12-09 19:48 ` Johannes Schindelin
2019-12-10 14:20 ` Ben Keene
2019-12-09 20:21 ` Yang Zhao
2019-12-13 17:10 ` Ben Keene
2019-12-07 7:34 ` Yang Zhao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191207003333.3228-7-yang.zhao@skyboxlabs.com \
--to=yang.zhao@skyboxlabs.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).