All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] Transition git-p4.py to support Python 3 only
@ 2021-12-09 20:10 Joel Holdsworth
  2021-12-09 20:10 ` [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists Joel Holdsworth
                   ` (7 more replies)
  0 siblings, 8 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

The git-p4.py script currently implements code-paths for both Python 2 and
3.

Python 2 was discontinued in 2020, and there is no longer any officially
supported interpreter. Further development of git-p4.py will require
would-be developers to test their changes with all supported dialects of
the language. However, if there is no longer any supported runtime
environment available, this places an unreasonable burden on the Git
project to maintain support for an obselete dialect of the language.

This patch-set removes all Python 2-specific code-paths, and then
applies some simplifications to the code which are available given
Python 3's improve delineation between bytes and strings.

Joel Holdsworth (6):
  git-p4: Always pass cmd arguments to subprocess as a python lists
  git-p4: Don't print shell commands as python lists
  git-p4: Removed support for Python 2
  git-p4: Decode byte strings before printing
  git-p4: Eliminate decode_stream and encode_stream
  git-p4: Resolve RCS keywords in binary

 git-p4.py | 319 +++++++++++++++++++++---------------------------------
 1 file changed, 123 insertions(+), 196 deletions(-)

-- 
2.33.0


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
@ 2021-12-09 20:10 ` Joel Holdsworth
  2021-12-09 22:42   ` Junio C Hamano
  2021-12-09 20:10 ` [PATCH 2/6] git-p4: Don't print shell commands as " Joel Holdsworth
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 138 +++++++++++++++++++++++-------------------------------
 1 file changed, 58 insertions(+), 80 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index 2b4500226a..1a4b7331d2 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -93,10 +93,7 @@ def p4_build_cmd(cmd):
         # Provide a way to not pass this option by setting git-p4.retries to 0
         real_cmd += ["-r", str(retries)]
 
-    if not isinstance(cmd, list):
-        real_cmd = ' '.join(real_cmd) + ' ' + cmd
-    else:
-        real_cmd += cmd
+    real_cmd += cmd
 
     # now check that we can actually talk to the server
     global p4_access_checked
@@ -273,12 +270,11 @@ def run_hook_command(cmd, param):
     return subprocess.call(cli, shell=use_shell)
 
 
-def write_pipe(c, stdin):
+def write_pipe(c, stdin, *k, **kw):
     if verbose:
         sys.stderr.write('Writing pipe: %s\n' % str(c))
 
-    expand = not isinstance(c, list)
-    p = subprocess.Popen(c, stdin=subprocess.PIPE, shell=expand)
+    p = subprocess.Popen(c, stdin=subprocess.PIPE, *k, **kw)
     pipe = p.stdin
     val = pipe.write(stdin)
     pipe.close()
@@ -293,7 +289,7 @@ def p4_write_pipe(c, stdin):
         stdin = encode_text_stream(stdin)
     return write_pipe(real_cmd, stdin)
 
-def read_pipe_full(c):
+def read_pipe_full(c, *k, **kw):
     """ Read output from  command. Returns a tuple
         of the return status, stdout text and stderr
         text.
@@ -301,8 +297,8 @@ def read_pipe_full(c):
     if verbose:
         sys.stderr.write('Reading pipe: %s\n' % str(c))
 
-    expand = not isinstance(c, list)
-    p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand)
+    p = subprocess.Popen(
+        c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, *k, **kw)
     (out, err) = p.communicate()
     return (p.returncode, out, decode_text_stream(err))
 
@@ -337,12 +333,11 @@ def p4_read_pipe(c, ignore_error=False, raw=False):
     real_cmd = p4_build_cmd(c)
     return read_pipe(real_cmd, ignore_error, raw=raw)
 
-def read_pipe_lines(c):
+def read_pipe_lines(c, *k, **kw):
     if verbose:
         sys.stderr.write('Reading pipe: %s\n' % str(c))
 
-    expand = not isinstance(c, list)
-    p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand)
+    p = subprocess.Popen(c, stdout=subprocess.PIPE, *k, **kw)
     pipe = p.stdout
     val = [decode_text_stream(line) for line in pipe.readlines()]
     if pipe.close() or p.wait():
@@ -383,11 +378,10 @@ def p4_has_move_command():
     # assume it failed because @... was invalid changelist
     return True
 
-def system(cmd, ignore_error=False):
-    expand = not isinstance(cmd, list)
+def system(cmd, ignore_error=False, *k, **kw):
     if verbose:
         sys.stderr.write("executing %s\n" % str(cmd))
-    retcode = subprocess.call(cmd, shell=expand)
+    retcode = subprocess.call(cmd, *k, **kw)
     if retcode and not ignore_error:
         raise CalledProcessError(retcode, cmd)
 
@@ -396,8 +390,7 @@ def system(cmd, ignore_error=False):
 def p4_system(cmd):
     """Specifically invoke p4 as the system command. """
     real_cmd = p4_build_cmd(cmd)
-    expand = not isinstance(real_cmd, list)
-    retcode = subprocess.call(real_cmd, shell=expand)
+    retcode = subprocess.call(real_cmd)
     if retcode:
         raise CalledProcessError(retcode, real_cmd)
 
@@ -728,14 +721,7 @@ def isModeExecChanged(src_mode, dst_mode):
 def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
         errors_as_exceptions=False):
 
-    if not isinstance(cmd, list):
-        cmd = "-G " + cmd
-        expand = True
-    else:
-        cmd = ["-G"] + cmd
-        expand = False
-
-    cmd = p4_build_cmd(cmd)
+    cmd = p4_build_cmd(["-G"] + cmd)
     if verbose:
         sys.stderr.write("Opening pipe: %s\n" % str(cmd))
 
@@ -745,17 +731,13 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
     stdin_file = None
     if stdin is not None:
         stdin_file = tempfile.TemporaryFile(prefix='p4-stdin', mode=stdin_mode)
-        if not isinstance(stdin, list):
-            stdin_file.write(stdin)
-        else:
-            for i in stdin:
-                stdin_file.write(encode_text_stream(i))
-                stdin_file.write(b'\n')
+        for i in stdin:
+            stdin_file.write(encode_text_stream(i))
+            stdin_file.write(b'\n')
         stdin_file.flush()
         stdin_file.seek(0)
 
     p4 = subprocess.Popen(cmd,
-                          shell=expand,
                           stdin=stdin_file,
                           stdout=subprocess.PIPE)
 
@@ -860,7 +842,7 @@ def isValidGitDir(path):
     return git_dir(path) != None
 
 def parseRevision(ref):
-    return read_pipe("git rev-parse %s" % ref).strip()
+    return read_pipe(["git", "rev-parse", ref]).strip()
 
 def branchExists(ref):
     rev = read_pipe(["git", "rev-parse", "-q", "--verify", ref],
@@ -966,11 +948,11 @@ def p4BranchesInGit(branchesAreInRemotes=True):
 
     branches = {}
 
-    cmdline = "git rev-parse --symbolic "
+    cmdline = ["git", "rev-parse", "--symbolic"]
     if branchesAreInRemotes:
-        cmdline += "--remotes"
+        cmdline.append("--remotes")
     else:
-        cmdline += "--branches"
+        cmdline.append("--branches")
 
     for line in read_pipe_lines(cmdline):
         line = line.strip()
@@ -1035,7 +1017,7 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = "refs/remotes/p4/", silent
 
     originPrefix = "origin/p4/"
 
-    for line in read_pipe_lines("git rev-parse --symbolic --remotes"):
+    for line in read_pipe_lines(["git", "rev-parse", "--symbolic", "--remotes"]):
         line = line.strip()
         if (not line.startswith(originPrefix)) or line.endswith("HEAD"):
             continue
@@ -1073,7 +1055,7 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = "refs/remotes/p4/", silent
                               remoteHead, ','.join(settings['depot-paths'])))
 
         if update:
-            system("git update-ref %s %s" % (remoteHead, originHead))
+            system(["git", "update-ref", remoteHead, originHead])
 
 def originP4BranchesExist():
         return gitBranchExists("origin") or gitBranchExists("origin/p4") or gitBranchExists("origin/p4/master")
@@ -1187,7 +1169,7 @@ def getClientSpec():
     """Look at the p4 client spec, create a View() object that contains
        all the mappings, and return it."""
 
-    specList = p4CmdList("client -o")
+    specList = p4CmdList(["client", "-o"])
     if len(specList) != 1:
         die('Output from "client -o" is %d lines, expecting 1' %
             len(specList))
@@ -1216,7 +1198,7 @@ def getClientSpec():
 def getClientRoot():
     """Grab the client directory."""
 
-    output = p4CmdList("client -o")
+    output = p4CmdList(["client", "-o"])
     if len(output) != 1:
         die('Output from "client -o" is %d lines, expecting 1' % len(output))
 
@@ -1471,7 +1453,7 @@ def p4UserId(self):
         if self.myP4UserId:
             return self.myP4UserId
 
-        results = p4CmdList("user -o")
+        results = p4CmdList(["user", "-o"])
         for r in results:
             if 'User' in r:
                 self.myP4UserId = r['User']
@@ -1496,7 +1478,7 @@ def getUserMapFromPerforceServer(self):
         self.users = {}
         self.emails = {}
 
-        for output in p4CmdList("users"):
+        for output in p4CmdList(["users"]):
             if "User" not in output:
                 continue
             self.users[output["User"]] = output["FullName"] + " <" + output["Email"] + ">"
@@ -1566,10 +1548,10 @@ def run(self, args):
 
         if self.rollbackLocalBranches:
             refPrefix = "refs/heads/"
-            lines = read_pipe_lines("git rev-parse --symbolic --branches")
+            lines = read_pipe_lines(["git", "rev-parse", "--symbolic", "--branches"])
         else:
             refPrefix = "refs/remotes/"
-            lines = read_pipe_lines("git rev-parse --symbolic --remotes")
+            lines = read_pipe_lines(["git", "rev-parse", "--symbolic", "--remotes"])
 
         for line in lines:
             if self.rollbackLocalBranches or (line.startswith("p4/") and line != "p4/HEAD\n"):
@@ -1586,14 +1568,14 @@ def run(self, args):
                 if len(p4Cmd("changes -m 1 "  + ' '.join (['%s...@%s' % (p, maxChange)
                                                            for p in depotPaths]))) == 0:
                     print("Branch %s did not exist at change %s, deleting." % (ref, maxChange))
-                    system("git update-ref -d %s `git rev-parse %s`" % (ref, ref))
+                    system("git update-ref -d {ref} `git rev-parse {ref}`".format(ref=ref), shell=True)
                     continue
 
                 while change and int(change) > maxChange:
                     changed = True
                     if self.verbose:
                         print("%s is at %s ; rewinding towards %s" % (ref, change, maxChange))
-                    system("git update-ref %s \"%s^\"" % (ref, ref))
+                    system(["git", "update-ref", ref, "{}^".format(ref)])
                     log = extractLogMessageFromGitCommit(ref)
                     settings =  extractSettingsGitLog(log)
 
@@ -1694,7 +1676,7 @@ def __init__(self):
             die("Large file system not supported for git-p4 submit command. Please remove it from config.")
 
     def check(self):
-        if len(p4CmdList("opened ...")) > 0:
+        if len(p4CmdList(["opened", "..."])) > 0:
             die("You have files opened with perforce! Close them before starting the sync.")
 
     def separate_jobs_from_description(self, message):
@@ -1803,7 +1785,7 @@ def lastP4Changelist(self):
         # then gets used to patch up the username in the change. If the same
         # client spec is being used by multiple processes then this might go
         # wrong.
-        results = p4CmdList("client -o")        # find the current client
+        results = p4CmdList(["client", "-o"])        # find the current client
         client = None
         for r in results:
             if 'Client' in r:
@@ -1819,7 +1801,7 @@ def lastP4Changelist(self):
 
     def modifyChangelistUser(self, changelist, newUser):
         # fixup the user field of a changelist after it has been submitted.
-        changes = p4CmdList("change -o %s" % changelist)
+        changes = p4CmdList(["change", "-o", changelist])
         if len(changes) != 1:
             die("Bad output from p4 change modifying %s to user %s" %
                 (changelist, newUser))
@@ -1830,7 +1812,7 @@ def modifyChangelistUser(self, changelist, newUser):
         # p4 does not understand format version 3 and above
         input = marshal.dumps(c, 2)
 
-        result = p4CmdList("change -f -i", stdin=input)
+        result = p4CmdList(["change", "-f", "-i"], stdin=input)
         for r in result:
             if 'code' in r:
                 if r['code'] == 'error':
@@ -1936,7 +1918,7 @@ def edit_template(self, template_file):
         if "P4EDITOR" in os.environ and (os.environ.get("P4EDITOR") != ""):
             editor = os.environ.get("P4EDITOR")
         else:
-            editor = read_pipe("git var GIT_EDITOR").strip()
+            editor = read_pipe(["git", "var", "GIT_EDITOR"]).strip()
         system(["sh", "-c", ('%s "$@"' % editor), editor, template_file])
 
         # If the file was not saved, prompt to see if this patch should
@@ -1994,7 +1976,7 @@ def applyCommit(self, id):
 
         (p4User, gitEmail) = self.p4UserForCommit(id)
 
-        diff = read_pipe_lines("git diff-tree -r %s \"%s^\" \"%s\"" % (self.diffOpts, id, id))
+        diff = read_pipe_lines(["git", "diff-tree", "-r"] + self.diffOpts + ["{}^".format(id), id])
         filesToAdd = set()
         filesToChangeType = set()
         filesToDelete = set()
@@ -2131,7 +2113,7 @@ def applyCommit(self, id):
         #
         # Apply the patch for real, and do add/delete/+x handling.
         #
-        system(applyPatchCmd)
+        system(applyPatchCmd, shell=True)
 
         for f in filesToChangeType:
             p4_edit(f, "-t", "auto")
@@ -2481,17 +2463,17 @@ def run(self, args):
         #
         if self.detectRenames:
             # command-line -M arg
-            self.diffOpts = "-M"
+            self.diffOpts = ["-M"]
         else:
             # If not explicitly set check the config variable
             detectRenames = gitConfig("git-p4.detectRenames")
 
             if detectRenames.lower() == "false" or detectRenames == "":
-                self.diffOpts = ""
+                self.diffOpts = []
             elif detectRenames.lower() == "true":
-                self.diffOpts = "-M"
+                self.diffOpts = ["-M"]
             else:
-                self.diffOpts = "-M%s" % detectRenames
+                self.diffOpts = ["-M{}".format(detectRenames)]
 
         # no command-line arg for -C or --find-copies-harder, just
         # config variables
@@ -2499,12 +2481,12 @@ def run(self, args):
         if detectCopies.lower() == "false" or detectCopies == "":
             pass
         elif detectCopies.lower() == "true":
-            self.diffOpts += " -C"
+            self.diffOpts.append("-C")
         else:
-            self.diffOpts += " -C%s" % detectCopies
+            self.diffOpts.append("-C{}".format(detectCopies))
 
         if gitConfigBool("git-p4.detectCopiesHarder"):
-            self.diffOpts += " --find-copies-harder"
+            self.diffOpts.append("--find-copies-harder")
 
         num_shelves = len(self.update_shelve)
         if num_shelves > 0 and num_shelves != len(commits):
@@ -3453,10 +3435,9 @@ def getBranchMapping(self):
         lostAndFoundBranches = set()
 
         user = gitConfig("git-p4.branchUser")
+        command = ["branches"]
         if len(user) > 0:
-            command = "branches -u %s" % user
-        else:
-            command = "branches"
+            command += ["-u", user]
 
         for info in p4CmdList(command):
             details = p4Cmd(["branch", "-o", info["branch"]])
@@ -3549,7 +3530,7 @@ def gitCommitByP4Change(self, ref, change):
         while True:
             if self.verbose:
                 print("trying: earliest %s latest %s" % (earliestCommit, latestCommit))
-            next = read_pipe("git rev-list --bisect %s %s" % (latestCommit, earliestCommit)).strip()
+            next = read_pipe(["git", "rev-list", "--bisect", latestCommit, earliestCommit]).strip()
             if len(next) == 0:
                 if self.verbose:
                     print("argh")
@@ -3704,7 +3685,7 @@ def sync_origin_only(self):
             if self.hasOrigin:
                 if not self.silent:
                     print('Syncing with origin first, using "git fetch origin"')
-                system("git fetch origin")
+                system(["git", "fetch", "origin"])
 
     def importHeadRevision(self, revision):
         print("Doing initial import of %s from revision %s into %s" % (' '.join(self.depotPaths), revision, self.branch))
@@ -3871,8 +3852,8 @@ def run(self, args):
         if len(self.branch) == 0:
             self.branch = self.refPrefix + "master"
             if gitBranchExists("refs/heads/p4") and self.importIntoRemotes:
-                system("git update-ref %s refs/heads/p4" % self.branch)
-                system("git branch -D p4")
+                system(["git", "update-ref", self.branch, "refs/heads/p4"])
+                system(["git", "branch", "-D", "p4"])
 
         # accept either the command-line option, or the configuration variable
         if self.useClientSpec:
@@ -4075,7 +4056,7 @@ def run(self, args):
         # Cleanup temporary branches created during import
         if self.tempBranches != []:
             for branch in self.tempBranches:
-                read_pipe("git update-ref -d %s" % branch)
+                read_pipe(["git", "update-ref", "-d", branch])
             os.rmdir(os.path.join(os.environ.get("GIT_DIR", ".git"), self.tempBranchLocation))
 
         # Create a symbolic ref p4/HEAD pointing to p4/<branch> to allow
@@ -4107,7 +4088,7 @@ def run(self, args):
     def rebase(self):
         if os.system("git update-index --refresh") != 0:
             die("Some files in your working directory are modified and different than what is in your index. You can use git update-index <filename> to bring the index up to date or stash away all your changes with git stash.");
-        if len(read_pipe("git diff-index HEAD --")) > 0:
+        if len(read_pipe(["git", "diff-index", "HEAD", "--"])) > 0:
             die("You have uncommitted changes. Please commit them before rebasing or stash them away with git stash.");
 
         [upstream, settings] = findUpstreamBranchPoint()
@@ -4118,9 +4099,9 @@ def rebase(self):
         upstream = re.sub("~[0-9]+$", "", upstream)
 
         print("Rebasing the current branch onto %s" % upstream)
-        oldHead = read_pipe("git rev-parse HEAD").strip()
-        system("git rebase %s" % upstream)
-        system("git diff-tree --stat --summary -M %s HEAD --" % oldHead)
+        oldHead = read_pipe(["git", "rev-parse", "HEAD"]).strip()
+        system(["git", "rebase", upstream])
+        system(["git", "diff-tree", "--stat", "--summary", "-M", oldHead, "HEAD", "--"])
         return True
 
 class P4Clone(P4Sync):
@@ -4197,7 +4178,7 @@ def run(self, args):
 
         # auto-set this variable if invoked with --use-client-spec
         if self.useClientSpec_from_options:
-            system("git config --bool git-p4.useclientspec true")
+            system(["git", "config", "--bool", "git-p4.useclientspec", "true"])
 
         return True
 
@@ -4328,10 +4309,7 @@ def run(self, args):
         if originP4BranchesExist():
             createOrUpdateBranchesFromOrigin()
 
-        cmdline = "git rev-parse --symbolic "
-        cmdline += " --remotes"
-
-        for line in read_pipe_lines(cmdline):
+        for line in read_pipe_lines(["git", "rev-parse", "--symbolic", "--remotes"]):
             line = line.strip()
 
             if not line.startswith('p4/') or line == "p4/HEAD":
@@ -4416,9 +4394,9 @@ def main():
             cmd.gitdir = os.path.abspath(".git")
             if not isValidGitDir(cmd.gitdir):
                 # "rev-parse --git-dir" without arguments will try $PWD/.git
-                cmd.gitdir = read_pipe("git rev-parse --git-dir").strip()
+                cmd.gitdir = read_pipe(["git", "rev-parse", "--git-dir"]).strip()
                 if os.path.exists(cmd.gitdir):
-                    cdup = read_pipe("git rev-parse --show-cdup").strip()
+                    cdup = read_pipe(["git", "rev-parse", "--show-cdup"]).strip()
                     if len(cdup) > 0:
                         chdir(cdup);
 
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/6] git-p4: Don't print shell commands as python lists
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
  2021-12-09 20:10 ` [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists Joel Holdsworth
@ 2021-12-09 20:10 ` Joel Holdsworth
  2021-12-09 20:10 ` [PATCH 3/6] git-p4: Removed support for Python 2 Joel Holdsworth
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index 1a4b7331d2..32f30e5f9a 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -272,14 +272,14 @@ def run_hook_command(cmd, param):
 
 def write_pipe(c, stdin, *k, **kw):
     if verbose:
-        sys.stderr.write('Writing pipe: %s\n' % str(c))
+        sys.stderr.write('Writing pipe: {}\n'.format(' '.join(c)))
 
     p = subprocess.Popen(c, stdin=subprocess.PIPE, *k, **kw)
     pipe = p.stdin
     val = pipe.write(stdin)
     pipe.close()
     if p.wait():
-        die('Command failed: %s' % str(c))
+        die('Command failed: {}'.format(' '.join(c)))
 
     return val
 
@@ -295,7 +295,7 @@ def read_pipe_full(c, *k, **kw):
         text.
     """
     if verbose:
-        sys.stderr.write('Reading pipe: %s\n' % str(c))
+        sys.stderr.write('Reading pipe: {}\n'.format(' '.join(c)))
 
     p = subprocess.Popen(
         c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, *k, **kw)
@@ -314,7 +314,7 @@ def read_pipe(c, ignore_error=False, raw=False):
         if ignore_error:
             out = ""
         else:
-            die('Command failed: %s\nError: %s' % (str(c), err))
+            die('Command failed: {}\nError: {}'.format(' '.join(c), err))
     if not raw:
         out = decode_text_stream(out)
     return out
@@ -335,13 +335,13 @@ def p4_read_pipe(c, ignore_error=False, raw=False):
 
 def read_pipe_lines(c, *k, **kw):
     if verbose:
-        sys.stderr.write('Reading pipe: %s\n' % str(c))
+        sys.stderr.write('Reading pipe: {}\n'.format(' '.join(c)))
 
     p = subprocess.Popen(c, stdout=subprocess.PIPE, *k, **kw)
     pipe = p.stdout
     val = [decode_text_stream(line) for line in pipe.readlines()]
     if pipe.close() or p.wait():
-        die('Command failed: %s' % str(c))
+        die('Command failed: {}'.format(' '.join(c)))
     return val
 
 def p4_read_pipe_lines(c):
@@ -380,7 +380,8 @@ def p4_has_move_command():
 
 def system(cmd, ignore_error=False, *k, **kw):
     if verbose:
-        sys.stderr.write("executing %s\n" % str(cmd))
+        sys.stderr.write("executing {}\n".format(
+            ' '.join(cmd) if isinstance(cmd, list) else cmd))
     retcode = subprocess.call(cmd, *k, **kw)
     if retcode and not ignore_error:
         raise CalledProcessError(retcode, cmd)
@@ -723,7 +724,7 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
 
     cmd = p4_build_cmd(["-G"] + cmd)
     if verbose:
-        sys.stderr.write("Opening pipe: %s\n" % str(cmd))
+        sys.stderr.write("Opening pipe: {}\n".format(' '.join(cmd)))
 
     # Use a temporary file to avoid deadlocks without
     # subprocess.communicate(), which would put another copy
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/6] git-p4: Removed support for Python 2
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
  2021-12-09 20:10 ` [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists Joel Holdsworth
  2021-12-09 20:10 ` [PATCH 2/6] git-p4: Don't print shell commands as " Joel Holdsworth
@ 2021-12-09 20:10 ` Joel Holdsworth
  2021-12-09 22:44   ` Junio C Hamano
  2021-12-10  3:25   ` David Aguilar
  2021-12-09 20:10 ` [PATCH 4/6] git-p4: Decode byte strings before printing Joel Holdsworth
                   ` (4 subsequent siblings)
  7 siblings, 2 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 89 +++++++++++++++++--------------------------------------
 1 file changed, 28 insertions(+), 61 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index 32f30e5f9a..b5d4fc1176 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # git-p4.py -- A tool for bidirectional operation between a Perforce depot and git.
 #
@@ -16,8 +16,8 @@
 # pylint: disable=too-many-branches,too-many-nested-blocks
 #
 import sys
-if sys.version_info.major < 3 and sys.version_info.minor < 7:
-    sys.stderr.write("git-p4: requires Python 2.7 or later.\n")
+if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor < 7):
+    sys.stderr.write("git-p4: requires Python 3.7 or later.\n")
     sys.exit(1)
 import os
 import optparse
@@ -36,16 +36,6 @@
 import errno
 import glob
 
-# On python2.7 where raw_input() and input() are both availble,
-# we want raw_input's semantics, but aliased to input for python3
-# compatibility
-# support basestring in python3
-try:
-    if raw_input and input:
-        input = raw_input
-except:
-    pass
-
 verbose = False
 
 # Only labels/tags matching this will be imported/exported
@@ -173,35 +163,16 @@ def prompt(prompt_text):
         if response in choices:
             return response
 
-# We need different encoding/decoding strategies for text data being passed
-# around in pipes depending on python version
-if bytes is not str:
-    # For python3, always encode and decode as appropriate
-    def decode_text_stream(s):
-        return s.decode() if isinstance(s, bytes) else s
-    def encode_text_stream(s):
-        return s.encode() if isinstance(s, str) else s
-else:
-    # For python2.7, pass read strings as-is, but also allow writing unicode
-    def decode_text_stream(s):
-        return s
-    def encode_text_stream(s):
-        return s.encode('utf_8') if isinstance(s, unicode) else s
+def decode_text_stream(s):
+    return s.decode() if isinstance(s, bytes) else s
+def encode_text_stream(s):
+    return s.encode() if isinstance(s, str) else s
 
 def decode_path(path):
     """Decode a given string (bytes or otherwise) using configured path encoding options
     """
     encoding = gitConfig('git-p4.pathEncoding') or 'utf_8'
-    if bytes is not str:
-        return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
-    else:
-        try:
-            path.decode('ascii')
-        except:
-            path = path.decode(encoding, errors='replace')
-            if verbose:
-                print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path))
-        return path
+    return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
 
 def run_git_hook(cmd, param=[]):
     """Execute a hook if the hook exists."""
@@ -285,8 +256,8 @@ def write_pipe(c, stdin, *k, **kw):
 
 def p4_write_pipe(c, stdin):
     real_cmd = p4_build_cmd(c)
-    if bytes is not str and isinstance(stdin, str):
-        stdin = encode_text_stream(stdin)
+    if isinstance(stdin, str):
+        stdin = stdin.encode()
     return write_pipe(real_cmd, stdin)
 
 def read_pipe_full(c, *k, **kw):
@@ -745,21 +716,18 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
     result = []
     try:
         while True:
-            entry = marshal.load(p4.stdout)
-            if bytes is not str:
-                # Decode unmarshalled dict to use str keys and values, except for:
-                #   - `data` which may contain arbitrary binary data
-                #   - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text
-                decoded_entry = {}
-                for key, value in entry.items():
-                    key = key.decode()
-                    if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')):
-                        value = value.decode()
-                    decoded_entry[key] = value
-                # Parse out data if it's an error response
-                if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
-                    decoded_entry['data'] = decoded_entry['data'].decode()
-                entry = decoded_entry
+            # Decode unmarshalled dict to use str keys and values, except for:
+            #   - `data` which may contain arbitrary binary data
+            #   - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text
+            entry = {}
+            for key, value in marshal.load(p4.stdout).items():
+                key = key.decode()
+                if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')):
+                    value = value.decode()
+                entry[key] = value
+            # Parse out data if it's an error response
+            if entry.get('code') == 'error' and 'data' in entry:
+                entry['data'] = entry['data'].decode()
             if skip_info:
                 if 'code' in entry and entry['code'] == 'info':
                     continue
@@ -3822,14 +3790,13 @@ def openStreams(self):
         self.gitStream = self.importProcess.stdin
         self.gitError = self.importProcess.stderr
 
-        if bytes is not str:
-            # Wrap gitStream.write() so that it can be called using `str` arguments
-            def make_encoded_write(write):
-                def encoded_write(s):
-                    return write(s.encode() if isinstance(s, str) else s)
-                return encoded_write
+        # Wrap gitStream.write() so that it can be called using `str` arguments
+        def make_encoded_write(write):
+            def encoded_write(s):
+                return write(s.encode() if isinstance(s, str) else s)
+            return encoded_write
 
-            self.gitStream.write = make_encoded_write(self.gitStream.write)
+        self.gitStream.write = make_encoded_write(self.gitStream.write)
 
     def closeStreams(self):
         if self.gitStream is None:
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 4/6] git-p4: Decode byte strings before printing
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
                   ` (2 preceding siblings ...)
  2021-12-09 20:10 ` [PATCH 3/6] git-p4: Removed support for Python 2 Joel Holdsworth
@ 2021-12-09 20:10 ` Joel Holdsworth
  2021-12-09 22:47   ` Junio C Hamano
  2021-12-09 20:10 ` [PATCH 5/6] git-p4: Eliminate decode_stream and encode_stream Joel Holdsworth
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index b5d4fc1176..b5945a0306 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -2917,7 +2917,8 @@ def streamOneP4File(self, file, contents):
                 size = int(self.stream_file['fileSize'])
             else:
                 size = 0 # deleted files don't get a fileSize apparently
-            sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
+            sys.stdout.write('\r{} --> {} ({} MB)\n'.format(
+                file_path.decode(), relPath, size/1024/1024))
             sys.stdout.flush()
 
         (type_base, type_mods) = split_p4_type(file["type"])
@@ -3061,7 +3062,8 @@ def streamP4FilesCb(self, marshalled):
             size = int(self.stream_file["fileSize"])
             if size > 0:
                 progress = 100*self.stream_file['streamContentSize']/size
-                sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024)))
+                sys.stdout.write('\r{} {}% ({} MB)'.format(
+                    self.stream_file['depotFile'].decode(), progress, int(size/1024/1024)))
                 sys.stdout.flush()
 
         self.stream_have_file_info = True
@@ -3803,7 +3805,7 @@ def closeStreams(self):
             return
         self.gitStream.close()
         if self.importProcess.wait() != 0:
-            die("fast-import failed: %s" % self.gitError.read())
+            die("fast-import failed: {}".format(self.gitError.read().decode()))
         self.gitOutput.close()
         self.gitError.close()
         self.gitStream = None
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 5/6] git-p4: Eliminate decode_stream and encode_stream
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
                   ` (3 preceding siblings ...)
  2021-12-09 20:10 ` [PATCH 4/6] git-p4: Decode byte strings before printing Joel Holdsworth
@ 2021-12-09 20:10 ` Joel Holdsworth
  2021-12-09 20:10 ` [PATCH 6/6] git-p4: Resolve RCS keywords in binary Joel Holdsworth
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 50 ++++++++++++++++++++------------------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index b5945a0306..c362a5fa38 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -163,11 +163,6 @@ def prompt(prompt_text):
         if response in choices:
             return response
 
-def decode_text_stream(s):
-    return s.decode() if isinstance(s, bytes) else s
-def encode_text_stream(s):
-    return s.encode() if isinstance(s, str) else s
-
 def decode_path(path):
     """Decode a given string (bytes or otherwise) using configured path encoding options
     """
@@ -271,7 +266,7 @@ def read_pipe_full(c, *k, **kw):
     p = subprocess.Popen(
         c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, *k, **kw)
     (out, err) = p.communicate()
-    return (p.returncode, out, decode_text_stream(err))
+    return (p.returncode, out, err.decode())
 
 def read_pipe(c, ignore_error=False, raw=False):
     """ Read output from  command. Returns the output text on
@@ -283,22 +278,17 @@ def read_pipe(c, ignore_error=False, raw=False):
     (retcode, out, err) = read_pipe_full(c)
     if retcode != 0:
         if ignore_error:
-            out = ""
+            out = b""
         else:
             die('Command failed: {}\nError: {}'.format(' '.join(c), err))
-    if not raw:
-        out = decode_text_stream(out)
-    return out
+    return out if raw else out.decode()
 
 def read_pipe_text(c):
     """ Read output from a command with trailing whitespace stripped.
         On error, returns None.
     """
     (retcode, out, err) = read_pipe_full(c)
-    if retcode != 0:
-        return None
-    else:
-        return decode_text_stream(out).rstrip()
+    return out.decode().rstrip() if retcode == 0 else None
 
 def p4_read_pipe(c, ignore_error=False, raw=False):
     real_cmd = p4_build_cmd(c)
@@ -310,7 +300,7 @@ def read_pipe_lines(c, *k, **kw):
 
     p = subprocess.Popen(c, stdout=subprocess.PIPE, *k, **kw)
     pipe = p.stdout
-    val = [decode_text_stream(line) for line in pipe.readlines()]
+    val = [line.decode() for line in pipe.readlines()]
     if pipe.close() or p.wait():
         die('Command failed: {}'.format(' '.join(c)))
     return val
@@ -340,7 +330,7 @@ def p4_has_move_command():
     cmd = p4_build_cmd(["move", "-k", "@from", "@to"])
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     (out, err) = p.communicate()
-    err = decode_text_stream(err)
+    err = err.decode()
     # return code will be 1 in either case
     if err.find("Invalid option") >= 0:
         return False
@@ -704,7 +694,7 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
     if stdin is not None:
         stdin_file = tempfile.TemporaryFile(prefix='p4-stdin', mode=stdin_mode)
         for i in stdin:
-            stdin_file.write(encode_text_stream(i))
+            stdin_file.write(i.encode())
             stdin_file.write(b'\n')
         stdin_file.flush()
         stdin_file.seek(0)
@@ -945,8 +935,7 @@ def branch_exists(branch):
 
     cmd = [ "git", "rev-parse", "--symbolic", "--verify", branch ]
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    out, _ = p.communicate()
-    out = decode_text_stream(out)
+    out = p.communicate()[0].decode()
     if p.returncode:
         return False
     # expect exactly one line of output: the branch name
@@ -1331,7 +1320,7 @@ def generatePointer(self, contentFile):
             ['git', 'lfs', 'pointer', '--file=' + contentFile],
             stdout=subprocess.PIPE
         )
-        pointerFile = decode_text_stream(pointerProcess.stdout.read())
+        pointerFile = pointerProcess.stdout.read().decode()
         if pointerProcess.wait():
             os.remove(contentFile)
             die('git-lfs pointer command failed. Did you install the extension?')
@@ -2130,7 +2119,7 @@ def applyCommit(self, id):
         tmpFile = os.fdopen(handle, "w+b")
         if self.isWindows:
             submitTemplate = submitTemplate.replace("\n", "\r\n")
-        tmpFile.write(encode_text_stream(submitTemplate))
+        tmpFile.write(submitTemplate.encode())
         tmpFile.close()
 
         submitted = False
@@ -2186,8 +2175,8 @@ def applyCommit(self, id):
                         return False
 
                 # read the edited message and submit
-                tmpFile = open(fileName, "rb")
-                message = decode_text_stream(tmpFile.read())
+                with open(fileName, "r") as tmpFile:
+                    message = tmpFile.read()
                 tmpFile.close()
                 if self.isWindows:
                     message = message.replace("\r\n", "\n")
@@ -2887,7 +2876,7 @@ def splitFilesIntoBranches(self, commit):
         return branches
 
     def writeToGitStream(self, gitMode, relPath, contents):
-        self.gitStream.write(encode_text_stream(u'M {} inline {}\n'.format(gitMode, relPath)))
+        self.gitStream.write('M {} inline {}\n'.format(gitMode, relPath))
         self.gitStream.write('data %d\n' % sum(len(d) for d in contents))
         for d in contents:
             self.gitStream.write(d)
@@ -2930,7 +2919,7 @@ def streamOneP4File(self, file, contents):
             git_mode = "120000"
             # p4 print on a symlink sometimes contains "target\n";
             # if it does, remove the newline
-            data = ''.join(decode_text_stream(c) for c in contents)
+            data = ''.join(c.decode() for c in contents)
             if not data:
                 # Some version of p4 allowed creating a symlink that pointed
                 # to nothing.  This causes p4 errors when checking out such
@@ -2984,9 +2973,9 @@ def streamOneP4File(self, file, contents):
         pattern = p4_keywords_regexp_for_type(type_base, type_mods)
         if pattern:
             regexp = re.compile(pattern, re.VERBOSE)
-            text = ''.join(decode_text_stream(c) for c in contents)
+            text = ''.join(c.decode() for c in contents)
             text = regexp.sub(r'$\1$', text)
-            contents = [ encode_text_stream(text) ]
+            contents = [text.encode()]
 
         if self.largeFileSystem:
             (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
@@ -2998,7 +2987,7 @@ def streamOneP4Deletion(self, file):
         if verbose:
             sys.stdout.write("delete %s\n" % relPath)
             sys.stdout.flush()
-        self.gitStream.write(encode_text_stream(u'D {}\n'.format(relPath)))
+        self.gitStream.write('D {}\n'.format(relPath))
 
         if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath):
             self.largeFileSystem.removeLargeFile(relPath)
@@ -3096,12 +3085,13 @@ def streamP4FilesCbSelf(entry):
 
             fileArgs = []
             for f in filesToRead:
+                fileArg = f['path'].decode()
                 if 'shelved_cl' in f:
                     # Handle shelved CLs using the "p4 print file@=N" syntax to print
                     # the contents
-                    fileArg = f['path'] + encode_text_stream('@={}'.format(f['shelved_cl']))
+                    fileArg += '@={}'.format(f['shelved_cl'])
                 else:
-                    fileArg = f['path'] + encode_text_stream('#{}'.format(f['rev']))
+                    fileArg += '#{}'.format(f['rev'])
 
                 fileArgs.append(fileArg)
 
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 6/6] git-p4: Resolve RCS keywords in binary
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
                   ` (4 preceding siblings ...)
  2021-12-09 20:10 ` [PATCH 5/6] git-p4: Eliminate decode_stream and encode_stream Joel Holdsworth
@ 2021-12-09 20:10 ` Joel Holdsworth
  2021-12-10  7:57   ` Luke Diamand
  2021-12-10  0:48 ` [PATCH 0/6] Transition git-p4.py to support Python 3 only Ævar Arnfjörð Bjarmason
  2021-12-10  7:53 ` Luke Diamand
  7 siblings, 1 reply; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-09 20:10 UTC (permalink / raw)
  To: git; +Cc: Joel Holdsworth

Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/git-p4.py b/git-p4.py
index c362a5fa38..87e6685eb6 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -46,6 +46,9 @@
 
 p4_access_checked = False
 
+re_ko_keywords = re.compile(rb'\$(Id|Header)(:[^$\n]+)?\$')
+re_k_keywords = re.compile(rb'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
+
 def p4_build_cmd(cmd):
     """Build a suitable p4 command line.
 
@@ -532,20 +535,12 @@ def p4_type(f):
 #
 def p4_keywords_regexp_for_type(base, type_mods):
     if base in ("text", "unicode", "binary"):
-        kwords = None
         if "ko" in type_mods:
-            kwords = 'Id|Header'
+            return re_ko_keywords
         elif "k" in type_mods:
-            kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
+            return re_k_keywords
         else:
             return None
-        pattern = r"""
-            \$              # Starts with a dollar, followed by...
-            (%s)            # one of the keywords, followed by...
-            (:[^$\n]+)?     # possibly an old expansion, followed by...
-            \$              # another dollar
-            """ % kwords
-        return pattern
     else:
         return None
 
@@ -2035,11 +2030,10 @@ def applyCommit(self, id):
                 kwfiles = {}
                 for file in editedFiles | filesToDelete:
                     # did this file's delta contain RCS keywords?
-                    pattern = p4_keywords_regexp_for_file(file)
+                    regexp = p4_keywords_regexp_for_file(file)
 
-                    if pattern:
+                    if regexp:
                         # this file is a possibility...look for RCS keywords.
-                        regexp = re.compile(pattern, re.VERBOSE)
                         for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
                             if regexp.search(line):
                                 if verbose:
@@ -2968,14 +2962,9 @@ def streamOneP4File(self, file, contents):
             print("\nIgnoring apple filetype file %s" % file['depotFile'])
             return
 
-        # Note that we do not try to de-mangle keywords on utf16 files,
-        # even though in theory somebody may want that.
-        pattern = p4_keywords_regexp_for_type(type_base, type_mods)
-        if pattern:
-            regexp = re.compile(pattern, re.VERBOSE)
-            text = ''.join(c.decode() for c in contents)
-            text = regexp.sub(r'$\1$', text)
-            contents = [text.encode()]
+        regexp = p4_keywords_regexp_for_type(type_base, type_mods)
+        if regexp:
+            contents = [regexp.sub(rb'$\1$', c) for c in contents]
 
         if self.largeFileSystem:
             (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists
  2021-12-09 20:10 ` [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists Joel Holdsworth
@ 2021-12-09 22:42   ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2021-12-09 22:42 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: git

Joel Holdsworth <jholdsworth@nvidia.com> writes:

> Subject: Re: [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists

Style: downcase "Always" or anything that comes after the initial
"area" designator, i.e. "git-p4:".

The most important bit is missing from the proposed log message.

It says what the updated code does clearly (i.e. if we find a code
that still passes cmd arguments to subprocess not as a list, the
title tells us that this patch did not do a thorough job at it),
which is good.

But it does not explain why we want to do so.  There must be some
backstory about wanting to do so (e.g. "because it is more Python3
way of doing things") that we can explain to the future readers of
"git log" output.  Being aware of the general direction these
patches are taking us early would help the readers.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/6] git-p4: Removed support for Python 2
  2021-12-09 20:10 ` [PATCH 3/6] git-p4: Removed support for Python 2 Joel Holdsworth
@ 2021-12-09 22:44   ` Junio C Hamano
  2021-12-09 23:07     ` rsbecker
  2021-12-10  3:25   ` David Aguilar
  1 sibling, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2021-12-09 22:44 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: git

Joel Holdsworth <jholdsworth@nvidia.com> writes:

> Subject: Re: [PATCH 3/6] git-p4: Removed support for Python 2

"Removed" -> "remove".

Losing unused/no longer usable code is good.

> Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
> ---
>  git-p4.py | 89 +++++++++++++++++--------------------------------------
>  1 file changed, 28 insertions(+), 61 deletions(-)

In these 28 new/replacement lines, there is nothing that deserves
any mention in the proposed log message?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 4/6] git-p4: Decode byte strings before printing
  2021-12-09 20:10 ` [PATCH 4/6] git-p4: Decode byte strings before printing Joel Holdsworth
@ 2021-12-09 22:47   ` Junio C Hamano
  2021-12-10  8:40     ` Fabian Stelzer
  2021-12-10 10:41     ` Joel Holdsworth
  0 siblings, 2 replies; 31+ messages in thread
From: Junio C Hamano @ 2021-12-09 22:47 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: git

Joel Holdsworth <jholdsworth@nvidia.com> writes:

> Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
> ---
>  git-p4.py | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)

Is the use of strings with {} placeholders and their .format() method
integral part of "decoding byte strings before printing", or it is just
a new/better/improved/subjectively-preferred/whatever style?

If the latter, such a change should be separated into its own step,
or at least needs to be mentioned and justified in the proposed log
message.

Lack of explanation on "why" is shared among all these patches, it
seems, so I won't repeat, but the patches need to explain why to
their readers.

> diff --git a/git-p4.py b/git-p4.py
> index b5d4fc1176..b5945a0306 100755
> --- a/git-p4.py
> +++ b/git-p4.py
> @@ -2917,7 +2917,8 @@ def streamOneP4File(self, file, contents):
>                  size = int(self.stream_file['fileSize'])
>              else:
>                  size = 0 # deleted files don't get a fileSize apparently
> -            sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
> +            sys.stdout.write('\r{} --> {} ({} MB)\n'.format(
> +                file_path.decode(), relPath, size/1024/1024))
>              sys.stdout.flush()
>  
>          (type_base, type_mods) = split_p4_type(file["type"])
> @@ -3061,7 +3062,8 @@ def streamP4FilesCb(self, marshalled):
>              size = int(self.stream_file["fileSize"])
>              if size > 0:
>                  progress = 100*self.stream_file['streamContentSize']/size
> -                sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024)))
> +                sys.stdout.write('\r{} {}% ({} MB)'.format(
> +                    self.stream_file['depotFile'].decode(), progress, int(size/1024/1024)))
>                  sys.stdout.flush()
>  
>          self.stream_have_file_info = True
> @@ -3803,7 +3805,7 @@ def closeStreams(self):
>              return
>          self.gitStream.close()
>          if self.importProcess.wait() != 0:
> -            die("fast-import failed: %s" % self.gitError.read())
> +            die("fast-import failed: {}".format(self.gitError.read().decode()))
>          self.gitOutput.close()
>          self.gitError.close()
>          self.gitStream = None

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 3/6] git-p4: Removed support for Python 2
  2021-12-09 22:44   ` Junio C Hamano
@ 2021-12-09 23:07     ` rsbecker
  0 siblings, 0 replies; 31+ messages in thread
From: rsbecker @ 2021-12-09 23:07 UTC (permalink / raw)
  To: 'Junio C Hamano', 'Joel Holdsworth'; +Cc: git

On December 9, 2021 5:45 PM, Junio C Hamano wrote:
> Joel Holdsworth <jholdsworth@nvidia.com> writes:
> 
> > Subject: Re: [PATCH 3/6] git-p4: Removed support for Python 2
> 
> "Removed" -> "remove".
> 
> Losing unused/no longer usable code is good.
> 
> > Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
> > ---
> >  git-p4.py | 89
> > +++++++++++++++++--------------------------------------
> >  1 file changed, 28 insertions(+), 61 deletions(-)
> 
> In these 28 new/replacement lines, there is nothing that deserves any
> mention in the proposed log message?

Just a reminder that Python 2 is the only option available on some older
(still supported) platforms for the next year or so.

Sincerely,
Randall


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
                   ` (5 preceding siblings ...)
  2021-12-09 20:10 ` [PATCH 6/6] git-p4: Resolve RCS keywords in binary Joel Holdsworth
@ 2021-12-10  0:48 ` Ævar Arnfjörð Bjarmason
  2021-12-10 10:37   ` Joel Holdsworth
  2021-12-10 21:34   ` Junio C Hamano
  2021-12-10  7:53 ` Luke Diamand
  7 siblings, 2 replies; 31+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-12-10  0:48 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: git


On Thu, Dec 09 2021, Joel Holdsworth wrote:

> Python 2 was discontinued in 2020, and there is no longer any officially
> supported interpreter. Further development of git-p4.py will require
> would-be developers to test their changes with all supported dialects of
> the language. However, if there is no longer any supported runtime
> environment available, this places an unreasonable burden on the Git
> project to maintain support for an obselete dialect of the language.

Does it? I can still install Python 2.7 on Debian, presumably other OS's
have similar ways to easily test it.

I'm not that familiar with our python integration and have never used
git-py, but I found this series hard to read through.

You've got [12]/6 which don't make it clear whether they're needed for
python3, or are some mixture of requirenments and a matter of taste (or
a newer API?). E.g. isn't the formatting you're changing in 2/6
supported in Python3?

Then for 1/6 "pass cmd arguments to subprocess as a python lists" if
it's not just a matter of taste can we lead with a narrow change to the
new API (presumably we can pass to our own function as a string, split
on whitespace, and then pass to whatever python API executes it as a
list first.

Some of these changes also just seem to be entirely unrelated
refactorings, e.g. 6/6 where you're changing a multi-line commented
regexp into something that's a dense one-liner. Does Python 3 not
support the equivalent of Perl's /x, or is something else going on here?

You then change the requirenment not to python 3.0, but 3.7, which
AFAICT was released a couple of years ago. We tend to try to capture
some of the oldest LTS OS's in common use, e.g. the last 2-3 RHEL
releases.

We still "support" Perl 5.8, which was released in 2002 (although that
could probably do with a version bump, but not to a release to 2018).

I'm not at all opposed to this Python version bump, I truly don't know
enough to know if it's a good change. Maybe we can/should also be more
aggressive with a version dependency with git-p4 than with something
more central to git like perl or curl.

The commit messages could just really use some extra hand-holding and
explanation, and a clear split-out of things related to the version bump
v.s. things not needed for that, or unrelated refactorings.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/6] git-p4: Removed support for Python 2
  2021-12-09 20:10 ` [PATCH 3/6] git-p4: Removed support for Python 2 Joel Holdsworth
  2021-12-09 22:44   ` Junio C Hamano
@ 2021-12-10  3:25   ` David Aguilar
  2021-12-10 10:44     ` Joel Holdsworth
  1 sibling, 1 reply; 31+ messages in thread
From: David Aguilar @ 2021-12-10  3:25 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: Git Mailing List

On Thu, Dec 9, 2021 at 1:13 PM Joel Holdsworth <jholdsworth@nvidia.com> wrote:
>
> Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
> ---
>  git-p4.py | 89 +++++++++++++++++--------------------------------------
>  1 file changed, 28 insertions(+), 61 deletions(-)
>
> diff --git a/git-p4.py b/git-p4.py
> index 32f30e5f9a..b5d4fc1176 100755
> --- a/git-p4.py
> +++ b/git-p4.py
> @@ -1,4 +1,4 @@
> -#!/usr/bin/env python
> +#!/usr/bin/env python3
>  #
>  # git-p4.py -- A tool for bidirectional operation between a Perforce depot and git.
>  #
> @@ -16,8 +16,8 @@
>  # pylint: disable=too-many-branches,too-many-nested-blocks
>  #
>  import sys
> -if sys.version_info.major < 3 and sys.version_info.minor < 7:
> -    sys.stderr.write("git-p4: requires Python 2.7 or later.\n")
> +if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor < 7):
> +    sys.stderr.write("git-p4: requires Python 3.7 or later.\n")
>      sys.exit(1)
>  import os
>  import optparse


There are pretty large user bases on centos/rhel 7+8 where python3.6
is the default version.

If we don't necessarily require any features from 3.7 then it might be
worth lowering this constraint to allow 3.6 or maybe even 3.4 to
maximize the number of users that would benefit.

I realize these python versions are retired according to the python
core team, but I tend to be a little more sympathetic to users when
it doesn't have any impact on the code.

--
David

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
                   ` (6 preceding siblings ...)
  2021-12-10  0:48 ` [PATCH 0/6] Transition git-p4.py to support Python 3 only Ævar Arnfjörð Bjarmason
@ 2021-12-10  7:53 ` Luke Diamand
  2021-12-10 10:54   ` Joel Holdsworth
  7 siblings, 1 reply; 31+ messages in thread
From: Luke Diamand @ 2021-12-10  7:53 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: Git Users

On Thu, 9 Dec 2021 at 20:10, Joel Holdsworth <jholdsworth@nvidia.com> wrote:
>
> The git-p4.py script currently implements code-paths for both Python 2 and
> 3.
>
> Python 2 was discontinued in 2020, and there is no longer any officially
> supported interpreter. Further development of git-p4.py will require
> would-be developers to test their changes with all supported dialects of
> the language. However, if there is no longer any supported runtime
> environment available, this places an unreasonable burden on the Git
> project to maintain support for an obselete dialect of the language.
>
> This patch-set removes all Python 2-specific code-paths, and then
> applies some simplifications to the code which are available given
> Python 3's improve delineation between bytes and strings.

I might as well take this opportunity to say that I've stopped needing
to worry about git-p4!

Hurrah!

I'm finding that the unit tests no longer pass with this change. I'm
not exactly sure why.


Luke





>
> Joel Holdsworth (6):
>   git-p4: Always pass cmd arguments to subprocess as a python lists
>   git-p4: Don't print shell commands as python lists
>   git-p4: Removed support for Python 2
>   git-p4: Decode byte strings before printing
>   git-p4: Eliminate decode_stream and encode_stream
>   git-p4: Resolve RCS keywords in binary
>
>  git-p4.py | 319 +++++++++++++++++++++---------------------------------
>  1 file changed, 123 insertions(+), 196 deletions(-)
>
> --
> 2.33.0
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/6] git-p4: Resolve RCS keywords in binary
  2021-12-09 20:10 ` [PATCH 6/6] git-p4: Resolve RCS keywords in binary Joel Holdsworth
@ 2021-12-10  7:57   ` Luke Diamand
  2021-12-10 10:51     ` Joel Holdsworth
  0 siblings, 1 reply; 31+ messages in thread
From: Luke Diamand @ 2021-12-10  7:57 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: Git Users

On Thu, 9 Dec 2021 at 20:11, Joel Holdsworth <jholdsworth@nvidia.com> wrote:
>
> Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
> ---
>  git-p4.py | 31 ++++++++++---------------------
>  1 file changed, 10 insertions(+), 21 deletions(-)
>
> diff --git a/git-p4.py b/git-p4.py
> index c362a5fa38..87e6685eb6 100755
> --- a/git-p4.py
> +++ b/git-p4.py
> @@ -46,6 +46,9 @@
>
>  p4_access_checked = False
>
> +re_ko_keywords = re.compile(rb'\$(Id|Header)(:[^$\n]+)?\$')
> +re_k_keywords = re.compile(rb'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')

I'm not sure what's going on here, but it does not look like just
turning off support for python2.x.


> +
>  def p4_build_cmd(cmd):
>      """Build a suitable p4 command line.
>
> @@ -532,20 +535,12 @@ def p4_type(f):
>  #
>  def p4_keywords_regexp_for_type(base, type_mods):
>      if base in ("text", "unicode", "binary"):
> -        kwords = None
>          if "ko" in type_mods:
> -            kwords = 'Id|Header'
> +            return re_ko_keywords
>          elif "k" in type_mods:
> -            kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
> +            return re_k_keywords
>          else:
>              return None
> -        pattern = r"""
> -            \$              # Starts with a dollar, followed by...
> -            (%s)            # one of the keywords, followed by...
> -            (:[^$\n]+)?     # possibly an old expansion, followed by...
> -            \$              # another dollar
> -            """ % kwords
> -        return pattern
>      else:
>          return None
>
> @@ -2035,11 +2030,10 @@ def applyCommit(self, id):
>                  kwfiles = {}
>                  for file in editedFiles | filesToDelete:
>                      # did this file's delta contain RCS keywords?
> -                    pattern = p4_keywords_regexp_for_file(file)
> +                    regexp = p4_keywords_regexp_for_file(file)
>
> -                    if pattern:
> +                    if regexp:
>                          # this file is a possibility...look for RCS keywords.
> -                        regexp = re.compile(pattern, re.VERBOSE)
>                          for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
>                              if regexp.search(line):
>                                  if verbose:
> @@ -2968,14 +2962,9 @@ def streamOneP4File(self, file, contents):
>              print("\nIgnoring apple filetype file %s" % file['depotFile'])
>              return
>
> -        # Note that we do not try to de-mangle keywords on utf16 files,
> -        # even though in theory somebody may want that.

This comment appears to have been stripped out, does that mean that we
now *do* try to demangle keywords on utf16?

> -        pattern = p4_keywords_regexp_for_type(type_base, type_mods)
> -        if pattern:
> -            regexp = re.compile(pattern, re.VERBOSE)
> -            text = ''.join(c.decode() for c in contents)
> -            text = regexp.sub(r'$\1$', text)
> -            contents = [text.encode()]
> +        regexp = p4_keywords_regexp_for_type(type_base, type_mods)
> +        if regexp:
> +            contents = [regexp.sub(rb'$\1$', c) for c in contents]
>
>          if self.largeFileSystem:
>              (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
> --
> 2.33.0
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 4/6] git-p4: Decode byte strings before printing
  2021-12-09 22:47   ` Junio C Hamano
@ 2021-12-10  8:40     ` Fabian Stelzer
  2021-12-10 10:48       ` Joel Holdsworth
  2021-12-10 10:41     ` Joel Holdsworth
  1 sibling, 1 reply; 31+ messages in thread
From: Fabian Stelzer @ 2021-12-10  8:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Joel Holdsworth, git

On 09.12.2021 14:47, Junio C Hamano wrote:
>Joel Holdsworth <jholdsworth@nvidia.com> writes:
>
>> Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
>> ---
>>  git-p4.py | 8 +++++---
>>  1 file changed, 5 insertions(+), 3 deletions(-)
>
>Is the use of strings with {} placeholders and their .format() method
>integral part of "decoding byte strings before printing", or it is just
>a new/better/improved/subjectively-preferred/whatever style?
>

If the new minimum python version will be 3.6 or above I'd vote for using 
f-Strings instead of .format() which I think are more readable and are also 
supposed to be faster.

So:
sys.stdout.write(f'\r{file_path} --> {rel_path} ({size/1024/1024} MB)\n')

instead of one of these:
sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
sys.stdout.write('\r{} --> {} ({} MB)\n'.format(file_path.decode(), relPath, 
size/1024/1024))

>> diff --git a/git-p4.py b/git-p4.py
>> index b5d4fc1176..b5945a0306 100755
>> --- a/git-p4.py
>> +++ b/git-p4.py
>> @@ -2917,7 +2917,8 @@ def streamOneP4File(self, file, contents):
>>                  size = int(self.stream_file['fileSize'])
>>              else:
>>                  size = 0 # deleted files don't get a fileSize apparently
>> -            sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
>> +            sys.stdout.write('\r{} --> {} ({} MB)\n'.format(
>> +                file_path.decode(), relPath, size/1024/1024))
>>              sys.stdout.flush()
>>
>>          (type_base, type_mods) = split_p4_type(file["type"])
>> @@ -3061,7 +3062,8 @@ def streamP4FilesCb(self, marshalled):
>>              size = int(self.stream_file["fileSize"])
>>              if size > 0:
>>                  progress = 100*self.stream_file['streamContentSize']/size
>> -                sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024)))
>> +                sys.stdout.write('\r{} {}% ({} MB)'.format(
>> +                    self.stream_file['depotFile'].decode(), progress, int(size/1024/1024)))
>>                  sys.stdout.flush()
>>
>>          self.stream_have_file_info = True
>> @@ -3803,7 +3805,7 @@ def closeStreams(self):
>>              return
>>          self.gitStream.close()
>>          if self.importProcess.wait() != 0:
>> -            die("fast-import failed: %s" % self.gitError.read())
>> +            die("fast-import failed: {}".format(self.gitError.read().decode()))
>>          self.gitOutput.close()
>>          self.gitError.close()
>>          self.gitStream = None

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10  0:48 ` [PATCH 0/6] Transition git-p4.py to support Python 3 only Ævar Arnfjörð Bjarmason
@ 2021-12-10 10:37   ` Joel Holdsworth
  2021-12-10 11:30     ` Ævar Arnfjörð Bjarmason
  2021-12-10 21:34   ` Junio C Hamano
  1 sibling, 1 reply; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-10 10:37 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason; +Cc: git, Tzadik Vanderhoof

> The commit messages could just really use some extra hand-holding and
> explanation, and a clear split-out of things related to the version bump v.s.
> things not needed for that, or unrelated refactorings.

Yes, I am getting this message loud and clear. I will resubmit with more detailed commit messages today.

To explain the story here: I started using git-p4 as part of my work-flow, and I expect to need it for several years to come. As I began to use it, I found that a series of bugs - mostly related to character encoding. In fixing these, I found that some of the troubles were specific to Python 3 - or rather Python 2's less strict approach to distinguishing between byte sequences and textual strings allowed the script to proceed Python 2 even though what it was doing was in fact invalid, and was potentially corrupting data.

A common problem that users are encountering is that the script attempts to decode incoming textual byte-streams into UTF-8 strings. On Python 3 this fails with an exception if the data contains invalid UTF-8 codes. For text files created in Windows, CP1252 Smart Quote characters: 0x93 and 0x94 are seen fairly frequently. These codes are invalid in UTF-8, so if the script encounters any file or file name containing them, it will fail with an exception.

Tzadik Vanderhoof submitted a patch attempting to fix some of these issues in April 2021:
https://lore.kernel.org/git/20210429073905.837-1-tzadik.vanderhoof@gmail.com/

My two comments about this patch are that 1. It doesn't fix my issue, and 2. Even with the proposed fallbackEncoding option it still leaves git-p4 broken by default.

A fallbackEncoding option may still be necessary, but I found that most of the issues I encountered could be side-stepped by simply avoiding decoding incoming data into UTF-8 in the first place.

Keeping a clean separation between encoded and decoded text is much easier to do in Python 3. If Python 2 support must be maintained, this will require careful testing of separate code-paths both platforms which I don't regard as reasonable given that Python 2 is thoroughly deprecated. Therefore, this first patch-set focusses primarily on removing Python 2 support.

Furthermore, because I expect to be using git-p4 in my daily work-flow for some time to come, I am interested in investing some effort into improving it. There are bugs, unreliable behaviour, user-hostile behaviour, as well as code that would benefit from clean-up and modernisation. In submitting these patches, I am trying to get a read on to what extent such efforts would be accepted by the Git maintainers. 

Is it preferable that patch-sets have a tight focus on a single topic? I am already dividing up my full patch collection. I can divide it further if requested. I am happy to do this, I was just worried that it just might make longer to get all my patches through review.


> Some of these changes also just seem to be entirely unrelated refactorings,
> e.g. 6/6 where you're changing a multi-line commented regexp into
> something that's a dense one-liner. Does Python 3 not support the
> equivalent of Perl's /x, or is something else going on here?

I will improve the commit message to explain the changes being made here.

The regexp is matching RCS keywords: https://www.perforce.com/manuals/p4guide/Content/P4Guide/filetypes.rcs.html - $File$, $Author$, $Author$ etc., a very simple match. We could keep it multi-line, though this seems overkill to me.

The main significance of this change that previously git-p4 would compile one of these two regexes for every single file processed. This patch just pre-compiles the two regexes (now binary regexes, not utf-8 regexes) at the start of the script.
 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 4/6] git-p4: Decode byte strings before printing
  2021-12-09 22:47   ` Junio C Hamano
  2021-12-10  8:40     ` Fabian Stelzer
@ 2021-12-10 10:41     ` Joel Holdsworth
  1 sibling, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-10 10:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

> Is the use of strings with {} placeholders and their .format() method integral
> part of "decoding byte strings before printing", or it is just a
> new/better/improved/subjectively-preferred/whatever style?
> 
> If the latter, such a change should be separated into its own step, or at least
> needs to be mentioned and justified in the proposed log message.

As I mentioned in my other message, I would like to invest some time into tidying and modernising the script - as well as fixing bugs and improving behaviour. If I submit patches that only make subjective style improvements, are these likely to be accepted?

> Lack of explanation on "why" is shared among all these patches, it seems, so I
> won't repeat, but the patches need to explain why to their readers.

Fair enough. I will resubmit.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 3/6] git-p4: Removed support for Python 2
  2021-12-10  3:25   ` David Aguilar
@ 2021-12-10 10:44     ` Joel Holdsworth
  0 siblings, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-10 10:44 UTC (permalink / raw)
  To: David Aguilar; +Cc: Git Mailing List

> There are pretty large user bases on centos/rhel 7+8 where python3.6 is the
> default version.
> 
> If we don't necessarily require any features from 3.7 then it might be worth
> lowering this constraint to allow 3.6 or maybe even 3.4 to maximize the
> number of users that would benefit.
> 
> I realize these python versions are retired according to the python core
> team, but I tend to be a little more sympathetic to users when it doesn't
> have any impact on the code.

I don't mind 3.6 - I don't need any features from 3.7 or newer.

I draw the line at Python 2, though.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 4/6] git-p4: Decode byte strings before printing
  2021-12-10  8:40     ` Fabian Stelzer
@ 2021-12-10 10:48       ` Joel Holdsworth
  0 siblings, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-10 10:48 UTC (permalink / raw)
  To: Fabian Stelzer, Junio C Hamano; +Cc: git

> If the new minimum python version will be 3.6 or above I'd vote for using f-
> Strings instead of .format() which I think are more readable and are also
> supposed to be faster.

Time passes so fast - I would prefer to use f-strings, but I didn't realise that they were universally available yet. They're still a "new thing" as far as I'm concerned.

I would prefer f-strings, I just used the str.format() method as a middle-ground.

> So:
> sys.stdout.write(f'\r{file_path} --> {rel_path} ({size/1024/1024} MB)\n')

By the way, I have a patch coming soon that can print the size in human readable units: b, kb, Mb, Gb etc. rather than always converting it to Mb.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 6/6] git-p4: Resolve RCS keywords in binary
  2021-12-10  7:57   ` Luke Diamand
@ 2021-12-10 10:51     ` Joel Holdsworth
  0 siblings, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-10 10:51 UTC (permalink / raw)
  To: Luke Diamand; +Cc: Git Users

> This comment appears to have been stripped out, does that mean that we
> now *do* try to demangle keywords on utf16?

Good point. I guess the comment should stay.

Though really... I'm not sure what we should do with utf16 files. Currently the RCS keywords just won't be resolved, with no warning for the user! I guess we could resolve them, if we had a reliable way of detecting UTF-16?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10  7:53 ` Luke Diamand
@ 2021-12-10 10:54   ` Joel Holdsworth
  2021-12-11  9:58     ` Luke Diamand
  0 siblings, 1 reply; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-10 10:54 UTC (permalink / raw)
  To: Luke Diamand; +Cc: Git Users

> I might as well take this opportunity to say that I've stopped needing to
> worry about git-p4!
> 
> Hurrah!

Lucky you. It looks like I'm going to be working with Perforce a lot in the coming years, but if I can get the bridge to git working really nicely, then I am hoping to have a happy workflow even so.

> I'm finding that the unit tests no longer pass with this change. I'm not exactly
> sure why.

What unit tests are these? I am happy to test with them.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10 10:37   ` Joel Holdsworth
@ 2021-12-10 11:30     ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 31+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-12-10 11:30 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: git, Tzadik Vanderhoof


On Fri, Dec 10 2021, Joel Holdsworth wrote:

>> The commit messages could just really use some extra hand-holding and
>> explanation, and a clear split-out of things related to the version bump v.s.
>> things not needed for that, or unrelated refactorings.
>
> Yes, I am getting this message loud and clear. I will resubmit with more detailed commit messages today.

Thanks...

> To explain the story here: I started using git-p4 as part of my
> work-flow, and I expect to need it for several years to come. As I
> began to use it, I found that a series of bugs - mostly related to
> character encoding. In fixing these, I found that some of the troubles
> were specific to Python 3 - or rather Python 2's less strict approach
> to distinguishing between byte sequences and textual strings allowed
> the script to proceed Python 2 even though what it was doing was in
> fact invalid, and was potentially corrupting data.
>
> A common problem that users are encountering is that the script
> attempts to decode incoming textual byte-streams into UTF-8
> strings. On Python 3 this fails with an exception if the data contains
> invalid UTF-8 codes. For text files created in Windows, CP1252 Smart
> Quote characters: 0x93 and 0x94 are seen fairly frequently. These
> codes are invalid in UTF-8, so if the script encounters any file or
> file name containing them, it will fail with an exception.
>
> Tzadik Vanderhoof submitted a patch attempting to fix some of these issues in April 2021:
> https://lore.kernel.org/git/20210429073905.837-1-tzadik.vanderhoof@gmail.com/
>
> My two comments about this patch are that 1. It doesn't fix my issue, and 2. Even with the proposed fallbackEncoding option it still leaves git-p4 broken by default.
>
> A fallbackEncoding option may still be necessary, but I found that most of the issues I encountered could be side-stepped by simply avoiding decoding incoming data into UTF-8 in the first place.
>
> Keeping a clean separation between encoded and decoded text is much
> easier to do in Python 3. If Python 2 support must be maintained, this
> will require careful testing of separate code-paths both platforms
> which I don't regard as reasonable given that Python 2 is thoroughly
> deprecated. Therefore, this first patch-set focusses primarily on
> removing Python 2 support.

This all makes perfect sense to me (having never used git-p4). Having
this sort of explanation be part of the relevant commit message would be
great :)

I haven't worked extensively with Python myself, but I've understood
that its Unicode support was a big pain before v3 as you describe, which
is just the sort of thing that would justify a version prereq bump, even
if it's a bit painful to some users on older systems (if even that,
maybe everyone's upgraded already...)

> Furthermore, because I expect to be using git-p4 in my daily work-flow
> for some time to come, I am interested in investing some effort into
> improving it. There are bugs, unreliable behaviour, user-hostile
> behaviour, as well as code that would benefit from clean-up and
> modernisation. In submitting these patches, I am trying to get a read
> on to what extent such efforts would be accepted by the Git
> maintainers.

I don't think there's any reason we wouldn't accept these sorts of
changes.

The comment from me (and others I see) is purely on the topic of making
them easier to review, i.e. splitting out "this is for a version
upgrade" v.s. "this is just better Python style" or whatever.

> Is it preferable that patch-sets have a tight focus on a single topic?
> I am already dividing up my full patch collection. I can divide it
> further if requested. I am happy to do this, I was just worried that
> it just might make longer to get all my patches through review.

Yeah this project really prefers to do it that way. A good example is
this recent 19-part series:
https://lore.kernel.org/git/20211210095757.gu7w4n2rqulx2dvg@fs/T/#m5d9e8180551907578d56cdd6cd6244b9df6b59d5

This would probably be 1-3 patches, or even 1 patch in some other
projects, but especially with repetitive formatting changes I think it's
fair to say that we prefer for them to be split up closer to that,
i.e. one commit with some %s -> {} formatting change explaining why
(probably just style, preferenc) etc.

There's also the option of splitting things into different patch
series. I'd say if you e.g. have one series of "we're dropping python 2
support" and another "here's nice formatting changes" it would be nice
to split those into two different patch serieses.

But that's always a matter of taste & how easy it is. If they
extensively textually conflict it's often worth it to just stack them
together, or if they changes are all small/easy enough to review some
"while we're at it..." changes are generally fine.

Finally, for a re-submission it's also nice to find people who've worked
on the relevant code (with some fuzzing for "is this person likely to
still be active in the project?") and CC them on the series, or in this
case people who've made recent changes to git-p4.py.

>> Some of these changes also just seem to be entirely unrelated refactorings,
>> e.g. 6/6 where you're changing a multi-line commented regexp into
>> something that's a dense one-liner. Does Python 3 not support the
>> equivalent of Perl's /x, or is something else going on here?
>
> I will improve the commit message to explain the changes being made here.
>
> The regexp is matching RCS keywords:
> https://www.perforce.com/manuals/p4guide/Content/P4Guide/filetypes.rcs.html
> - $File$, $Author$, $Author$ etc., a very simple match. We could keep
> it multi-line, though this seems overkill to me.

Sure, my preference in Perl would be to split it, but I'm never going to
be maintaining git-p4.py, so... :)

I.e. I think it's perfectly fair to roll it into some general "this
improves readability" changes, just as long as they're clearly labeled
as such.

> The main significance of this change that previously git-p4 would
> compile one of these two regexes for every single file processed. This
> patch just pre-compiles the two regexes (now binary regexes, not utf-8
> regexes) at the start of the script.

Makes sens, and another thing that would be perfect for pretty much
copy/pasting as-is to a re-rolled commit's message :)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10  0:48 ` [PATCH 0/6] Transition git-p4.py to support Python 3 only Ævar Arnfjörð Bjarmason
  2021-12-10 10:37   ` Joel Holdsworth
@ 2021-12-10 21:34   ` Junio C Hamano
  2021-12-10 21:53     ` rsbecker
  2021-12-11 21:00     ` Elijah Newren
  1 sibling, 2 replies; 31+ messages in thread
From: Junio C Hamano @ 2021-12-10 21:34 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason; +Cc: Joel Holdsworth, git

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> On Thu, Dec 09 2021, Joel Holdsworth wrote:
>
>> Python 2 was discontinued in 2020, and there is no longer any officially
>> supported interpreter. Further development of git-p4.py will require
>> would-be developers to test their changes with all supported dialects of
>> the language. However, if there is no longer any supported runtime
>> environment available, this places an unreasonable burden on the Git
>> project to maintain support for an obselete dialect of the language.
>
> Does it? I can still install Python 2.7 on Debian, presumably other OS's
> have similar ways to easily test it.

Yes, that is a good point to make against "we cannot maintain the
half meant to cater to Python2 of the script".  Developers should be
able to keep and test Python2 support, if it is necessary.

So the more important question is if there are end-users that have
no choice but sticking to Python2.  Is there distributions and
systems that do not offer Python3, on which end-users have happily
been using Python2?  If some users with vendor supported Python2 do
not have access to Python3, cutting them off may be premature.

As the general direction, I do not mind deprecating support for
Python2, and then eventually removing it.  I just do not know if 2
years is long enough for the latter (IIRC, the sunset happened at
the beginning of 2020, and we are about to end 2021).

Thanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10 21:34   ` Junio C Hamano
@ 2021-12-10 21:53     ` rsbecker
  2021-12-11 21:00     ` Elijah Newren
  1 sibling, 0 replies; 31+ messages in thread
From: rsbecker @ 2021-12-10 21:53 UTC (permalink / raw)
  To: 'Junio C Hamano',
	'Ævar Arnfjörð Bjarmason'
  Cc: 'Joel Holdsworth', git

On December 10, 2021 4:34 PM, Junio C Hamano wrote:
> Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:
> 
> > On Thu, Dec 09 2021, Joel Holdsworth wrote:
> >
> >> Python 2 was discontinued in 2020, and there is no longer any
> >> officially supported interpreter. Further development of git-p4.py
> >> will require would-be developers to test their changes with all
> >> supported dialects of the language. However, if there is no longer
> >> any supported runtime environment available, this places an
> >> unreasonable burden on the Git project to maintain support for an
> obselete dialect of the language.
> >
> > Does it? I can still install Python 2.7 on Debian, presumably other
> > OS's have similar ways to easily test it.
> 
> Yes, that is a good point to make against "we cannot maintain the half meant
> to cater to Python2 of the script".  Developers should be able to keep and
> test Python2 support, if it is necessary.
> 
> So the more important question is if there are end-users that have no choice
> but sticking to Python2.  Is there distributions and systems that do not offer
> Python3, on which end-users have happily been using Python2?  If some
> users with vendor supported Python2 do not have access to Python3, cutting
> them off may be premature.
> 
> As the general direction, I do not mind deprecating support for Python2, and
> then eventually removing it.  I just do not know if 2 years is long enough for
> the latter (IIRC, the sunset happened at the beginning of 2020, and we are
> about to end 2021).

The HPE NonStop Itanium platform only provides Python 2.7. That is the last version that will be available on that platform until it goes off support some time in the next few years (there are known very large US companies who are git users on that platform but I cannot share their names here). The NonStop x86 platform is currently on Python 3.6.8 but I have to take action to select the python3 object - not a big deal. Since I am continually running the git test suite with each release, the python 2 code can continue to be tested. Python 2 is also available on our x86 machine for backward compatibility reasons - it may vanish at some point but that isn’t scheduled yet.

-Randall


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10 10:54   ` Joel Holdsworth
@ 2021-12-11  9:58     ` Luke Diamand
  2021-12-13 13:47       ` Joel Holdsworth
  0 siblings, 1 reply; 31+ messages in thread
From: Luke Diamand @ 2021-12-11  9:58 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: Git Users

On Fri, 10 Dec 2021 at 10:54, Joel Holdsworth <jholdsworth@nvidia.com> wrote:
>
> > I might as well take this opportunity to say that I've stopped needing to
> > worry about git-p4!
> >
> > Hurrah!
>
> Lucky you. It looks like I'm going to be working with Perforce a lot in the coming years, but if I can get the bridge to git working really nicely, then I am hoping to have a happy workflow even so.

I cannot begin to tell you how liberating it is to just have a pure
git workflow!

But yes, git-p4 at least stops some of the pain.

>
> > I'm finding that the unit tests no longer pass with this change. I'm not exactly
> > sure why.
>
> What unit tests are these? I am happy to test with them.

    cd t
    make T=t98* -j$(nproc)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-10 21:34   ` Junio C Hamano
  2021-12-10 21:53     ` rsbecker
@ 2021-12-11 21:00     ` Elijah Newren
  2021-12-12  8:55       ` Luke Diamand
  1 sibling, 1 reply; 31+ messages in thread
From: Elijah Newren @ 2021-12-11 21:00 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Joel Holdsworth,
	Git Mailing List

Hi,

On Fri, Dec 10, 2021 at 10:07 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:
>
> > On Thu, Dec 09 2021, Joel Holdsworth wrote:
> >
> >> Python 2 was discontinued in 2020, and there is no longer any officially
> >> supported interpreter. Further development of git-p4.py will require
> >> would-be developers to test their changes with all supported dialects of
> >> the language. However, if there is no longer any supported runtime
> >> environment available, this places an unreasonable burden on the Git
> >> project to maintain support for an obselete dialect of the language.
> >
> > Does it? I can still install Python 2.7 on Debian, presumably other OS's
> > have similar ways to easily test it.
>
> Yes, that is a good point to make against "we cannot maintain the
> half meant to cater to Python2 of the script".  Developers should be
> able to keep and test Python2 support, if it is necessary.

I also disagree with the reason Joel gave in the quoted paragraph for
dropping Python2 support, but I think there are other good reasons to
drop it.

> So the more important question is if there are end-users that have
> no choice but sticking to Python2.  Is there distributions and
> systems that do not offer Python3, on which end-users have happily
> been using Python2?  If some users with vendor supported Python2 do
> not have access to Python3, cutting them off may be premature.

These are good questions, though I think there's more to it than this,
as I'll mention in just a minute...

> As the general direction, I do not mind deprecating support for
> Python2, and then eventually removing it.  I just do not know if 2
> years is long enough for the latter (IIRC, the sunset happened at
> the beginning of 2020, and we are about to end 2021).

Python2 was deprecated by the python project in 2008, with announced
plans to stop all support (including security fixes) in 2015.  They
pushed the sunset date back to Jan 1, 2020.  So it has only been
end-of-life for just under 2 years, but it's been deprecated for over
13 years.

In regards to your good questions about Python3 availability on some
platforms: If such platforms exist, they have had over a decade's
heads up...so let's ask a few extra questions.  If these platforms
still haven't made python3 available, would newer versions of Git even
be available on these platforms?  Even if newer Git versions are
available, would users on such platforms have any qualms with using an
older Git version given the platform insistence of only providing an
old Python version lacking any support (even security fixes)?


Some of my personal python2/python3 experience, if it's useful in
weighing decisions:

* There are python projects for which I still continue to support
simultaneous python2 and python3 usage, though for projects that are
smaller then git-p4.py (e.g. 1/2 to 1/3 the size).  Such multi-version
support is painful, and it causes occasional bugs that hit users that
wouldn't arise if there was only one supported python version.

* I initially wanted to also do the multi-version support for
git-filter-repo (which is approximately the same size as git-p4.py,
and obviously also interfaces with git somewhat deeply).  I gave up on
it, and didn't consider it justified, especially with the
then-soon-impending End-Of-Life for python2.  I instead just switched
from python2 -> python3 (in 2019; yes, I'm a straggler.)  Granted, I
did benefit from the fact that git-filter-repo is a
once-in-a-blue-moon usage tool (and only by one member on the team),
rather than a daily usage tool, but I may have come to the same
decision anyway even back then.

* (Slight tangent) I tried to use unicode strings everywhere in
git-filter-repo a few times, but invariably found it to be buggy and
slow.  It was a mistake, and I eventually switched over to bytestrings
everywhere, only converting to unicode (when possible) when printing
messages for the user on the console.  bytestrings are ugly to use
(IMO), but they're a better data model when dealing with file
contents, process output, filenames, etc.  I think git-p4's decision
to attempt to use unicode strings everywhere is a mistake that'll
probably result in bugs based on that experience of mine; it's not an
appropriate model of the relevant data.  It'll also make things
slower.

[I actually think the unicode vs. bytestring thing might be more
important for bug fixing than limiting to python3.  Though I think
both are worthwhile.]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-11 21:00     ` Elijah Newren
@ 2021-12-12  8:55       ` Luke Diamand
  0 siblings, 0 replies; 31+ messages in thread
From: Luke Diamand @ 2021-12-12  8:55 UTC (permalink / raw)
  To: Elijah Newren
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Joel Holdsworth, Git Mailing List

On Sat, 11 Dec 2021 at 21:00, Elijah Newren <newren@gmail.com> wrote:
>
> Hi,
>
> On Fri, Dec 10, 2021 at 10:07 PM Junio C Hamano <gitster@pobox.com> wrote:
> >
> > Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:
> >
> > > On Thu, Dec 09 2021, Joel Holdsworth wrote:
> > >
> > >> Python 2 was discontinued in 2020, and there is no longer any officially
> > >> supported interpreter. Further development of git-p4.py will require
> > >> would-be developers to test their changes with all supported dialects of
> > >> the language. However, if there is no longer any supported runtime
> > >> environment available, this places an unreasonable burden on the Git
> > >> project to maintain support for an obselete dialect of the language.
> > >
> > > Does it? I can still install Python 2.7 on Debian, presumably other OS's
> > > have similar ways to easily test it.
> >
> > Yes, that is a good point to make against "we cannot maintain the
> > half meant to cater to Python2 of the script".  Developers should be
> > able to keep and test Python2 support, if it is necessary.
>
> I also disagree with the reason Joel gave in the quoted paragraph for
> dropping Python2 support, but I think there are other good reasons to
> drop it.
>
> > So the more important question is if there are end-users that have
> > no choice but sticking to Python2.  Is there distributions and
> > systems that do not offer Python3, on which end-users have happily
> > been using Python2?  If some users with vendor supported Python2 do
> > not have access to Python3, cutting them off may be premature.
>
> These are good questions, though I think there's more to it than this,
> as I'll mention in just a minute...
>
> > As the general direction, I do not mind deprecating support for
> > Python2, and then eventually removing it.  I just do not know if 2
> > years is long enough for the latter (IIRC, the sunset happened at
> > the beginning of 2020, and we are about to end 2021).
>
> Python2 was deprecated by the python project in 2008, with announced
> plans to stop all support (including security fixes) in 2015.  They
> pushed the sunset date back to Jan 1, 2020.  So it has only been
> end-of-life for just under 2 years, but it's been deprecated for over
> 13 years.
>
> In regards to your good questions about Python3 availability on some
> platforms: If such platforms exist, they have had over a decade's
> heads up...so let's ask a few extra questions.  If these platforms
> still haven't made python3 available, would newer versions of Git even
> be available on these platforms?  Even if newer Git versions are
> available, would users on such platforms have any qualms with using an
> older Git version given the platform insistence of only providing an
> old Python version lacking any support (even security fixes)?
>
>
> Some of my personal python2/python3 experience, if it's useful in
> weighing decisions:
>
> * There are python projects for which I still continue to support
> simultaneous python2 and python3 usage, though for projects that are
> smaller then git-p4.py (e.g. 1/2 to 1/3 the size).  Such multi-version
> support is painful, and it causes occasional bugs that hit users that
> wouldn't arise if there was only one supported python version.
>
> * I initially wanted to also do the multi-version support for
> git-filter-repo (which is approximately the same size as git-p4.py,
> and obviously also interfaces with git somewhat deeply).  I gave up on
> it, and didn't consider it justified, especially with the
> then-soon-impending End-Of-Life for python2.  I instead just switched
> from python2 -> python3 (in 2019; yes, I'm a straggler.)  Granted, I
> did benefit from the fact that git-filter-repo is a
> once-in-a-blue-moon usage tool (and only by one member on the team),
> rather than a daily usage tool, but I may have come to the same
> decision anyway even back then.
>
> * (Slight tangent) I tried to use unicode strings everywhere in
> git-filter-repo a few times, but invariably found it to be buggy and
> slow.  It was a mistake, and I eventually switched over to bytestrings
> everywhere, only converting to unicode (when possible) when printing
> messages for the user on the console.  bytestrings are ugly to use
> (IMO), but they're a better data model when dealing with file
> contents, process output, filenames, etc.  I think git-p4's decision
> to attempt to use unicode strings everywhere is a mistake that'll
> probably result in bugs based on that experience of mine; it's not an
> appropriate model of the relevant data.  It'll also make things
> slower.

+1

When using python2, git-p4 just ignores all the encoding issues, and I
think this is actually exactly what we want. This means that in some
ways, the Python2 version is currently more correct than the Python3
one.

With Python3, as you say, it attempts to convert to/from unicode
strings, and probably this is the root of the problems.

Perforce has a mode where it works in unicode internally. This gets
configured in the server, and clients then do The Right Thing.

https://www.perforce.com/manuals/p4sag/Content/P4SAG/superuser.unicode.clients.html

However, many P4 shops don't bother to set this (I know we don't) so
you end up with BIG5 and CP1252 just being dumped raw into Perforce,
and then coming back out again. This causes problems for Perforce
clients just as much as for git-p4 clients.

When git-p4 is used with python2, provided your client has the same
character encoding as the author of the code you are looking at, it
will appear to work. Otherwise you get character encoding errors.

To fix this with python3, we need to make a choice:

- either work out the character encoding that Perforce is sending us
which it doesn't know and can't tell us and then convert that to/from
unicode. There was a patch series a while ago which tried to let you
configure a default encoding.
- or to just preserve everything that Perforce sends us and let the
client figure it out.

The current patch series is (I think) tending towards the first of
those options. But we're trying to recreate information that just
doesn't exist.

Here's the patch that Andrew Oakley sent a while back which attempts
to get git-p4 to just preserve what it gets:

https://lore.kernel.org/git/20210412085251.51475-1-andrew@adoakley.name/

Luke


>
> [I actually think the unicode vs. bytestring thing might be more
> important for bug fixing than limiting to python3.  Though I think
> both are worthwhile.]

+1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-11  9:58     ` Luke Diamand
@ 2021-12-13 13:47       ` Joel Holdsworth
  2021-12-13 19:29         ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-13 13:47 UTC (permalink / raw)
  To: Luke Diamand; +Cc: Git Users

> > What unit tests are these? I am happy to test with them.
> 
>     cd t
>     make T=t98* -j$(nproc)

Awesome! Just ran the tests. We got a clean sweep.

With one proviso:

When running the current upstream master git-p4 version, there are errors if /usr/bin/python is not present.

lib-git-p4.sh checks for the presence of python with "test_have_prereq PYTHON" - but if I only have /usr/bin/python3 installed, the prerequisite check passes, but git-p4.py itself fails because the shebang points at python not python3.

On Debian installing the package "python-is-python3" fixes the issue.

Perhaps it might help to have something like "test_have_prereq PYTHON3".

Regardless, if python3 is installed, this patch-set passes the tests just fine.

Joel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-13 13:47       ` Joel Holdsworth
@ 2021-12-13 19:29         ` Junio C Hamano
  2021-12-13 19:58           ` Joel Holdsworth
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2021-12-13 19:29 UTC (permalink / raw)
  To: Joel Holdsworth; +Cc: Luke Diamand, Git Users

Joel Holdsworth <jholdsworth@nvidia.com> writes:

>> > What unit tests are these? I am happy to test with them.
>> 
>>     cd t
>>     make T=t98* -j$(nproc)
>
> Awesome! Just ran the tests. We got a clean sweep.
>
> With one proviso:
>
> When running the current upstream master git-p4 version, there are errors if /usr/bin/python is not present.
>
> lib-git-p4.sh checks for the presence of python with "test_have_prereq PYTHON" - but if I only have /usr/bin/python3 installed, the prerequisite check passes, but git-p4.py itself fails because the shebang points at python not python3.
>
> On Debian installing the package "python-is-python3" fixes the issue.

Can I take that a distro allowing an installation without
python-is-python3 (or just having only python2) as a sign that
"transtion to 3 only" is a bit premature?

> Perhaps it might help to have something like "test_have_prereq PYTHON3".

Sure, but that defeats the whole notion of "python3 is everywhere,
python2 is dead, and nobody should be using the 2-year dead
version".  "test_have_prereq PYTHON" should be sufficient in such a
world, no?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH 0/6] Transition git-p4.py to support Python 3 only
  2021-12-13 19:29         ` Junio C Hamano
@ 2021-12-13 19:58           ` Joel Holdsworth
  0 siblings, 0 replies; 31+ messages in thread
From: Joel Holdsworth @ 2021-12-13 19:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Luke Diamand, Git Users

> Sure, but that defeats the whole notion of "python3 is everywhere,
> python2 is dead, and nobody should be using the 2-year dead version".
> "test_have_prereq PYTHON" should be sufficient in such a world, no?

That's a bit of a stretch.

As Elijah said:

> Python2 was deprecated by the python project in 2008, with announced
> plans to stop all support (including security fixes) in 2015.  They pushed the
> sunset date back to Jan 1, 2020.  So it has only been end-of-life for just under
> 2 years, but it's been deprecated for over
> 13 years.

Python 3 *is* everywhere. During the transitionary period, Debian allowed python2 and python3 to coexist on a system by giving them the names /usr/bin/python and /usr/bin/python3 respectively, because they are effectively different languages. This allowed legacy code to continue to function in view that it would eventually get ported over to python 3, opting into it by changing the shebang to point at /usr/bin/python3

"test_have_prereq PYTHON" lumps all python versions together as if they were one thing, which they are not. It's as meaningful as lumping together all the Perl versions, or lumping C++98 together with modern C++. If a system has Python 1 installed, strictly speaking the configuration script should indicate that Python is present! - but there's a bit more

I am quite sure the Python 2 will linger on in some form or other - maybe forever, but that doesn't mean the Git project should be developing, maintaining, testing or releasing Python 2 code in 2021.

Python 3 is so well established, that even the minimum version requirement I want to bump to: Python 3.6, is end-of-life.

Joel

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2021-12-13 19:58 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-09 20:10 [PATCH 0/6] Transition git-p4.py to support Python 3 only Joel Holdsworth
2021-12-09 20:10 ` [PATCH 1/6] git-p4: Always pass cmd arguments to subprocess as a python lists Joel Holdsworth
2021-12-09 22:42   ` Junio C Hamano
2021-12-09 20:10 ` [PATCH 2/6] git-p4: Don't print shell commands as " Joel Holdsworth
2021-12-09 20:10 ` [PATCH 3/6] git-p4: Removed support for Python 2 Joel Holdsworth
2021-12-09 22:44   ` Junio C Hamano
2021-12-09 23:07     ` rsbecker
2021-12-10  3:25   ` David Aguilar
2021-12-10 10:44     ` Joel Holdsworth
2021-12-09 20:10 ` [PATCH 4/6] git-p4: Decode byte strings before printing Joel Holdsworth
2021-12-09 22:47   ` Junio C Hamano
2021-12-10  8:40     ` Fabian Stelzer
2021-12-10 10:48       ` Joel Holdsworth
2021-12-10 10:41     ` Joel Holdsworth
2021-12-09 20:10 ` [PATCH 5/6] git-p4: Eliminate decode_stream and encode_stream Joel Holdsworth
2021-12-09 20:10 ` [PATCH 6/6] git-p4: Resolve RCS keywords in binary Joel Holdsworth
2021-12-10  7:57   ` Luke Diamand
2021-12-10 10:51     ` Joel Holdsworth
2021-12-10  0:48 ` [PATCH 0/6] Transition git-p4.py to support Python 3 only Ævar Arnfjörð Bjarmason
2021-12-10 10:37   ` Joel Holdsworth
2021-12-10 11:30     ` Ævar Arnfjörð Bjarmason
2021-12-10 21:34   ` Junio C Hamano
2021-12-10 21:53     ` rsbecker
2021-12-11 21:00     ` Elijah Newren
2021-12-12  8:55       ` Luke Diamand
2021-12-10  7:53 ` Luke Diamand
2021-12-10 10:54   ` Joel Holdsworth
2021-12-11  9:58     ` Luke Diamand
2021-12-13 13:47       ` Joel Holdsworth
2021-12-13 19:29         ` Junio C Hamano
2021-12-13 19:58           ` Joel Holdsworth

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.