All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH/RFC v2] git-p4: stream from perforce to speed up clones
@ 2009-07-11  8:31 Luke Diamand
  2009-07-25 14:23 ` Pete Wyckoff
  0 siblings, 1 reply; 7+ messages in thread
From: Luke Diamand @ 2009-07-11  8:31 UTC (permalink / raw)
  To: git; +Cc: Luke Diamand

Change commit() to stream data from Perforce and into fast-import
rather than reading into memory first, and then writing out. This
hugely reduces the memory requirements when cloning non-incrementally.

Signed-off-by: Luke Diamand <luke@diamand.org>
---
I've modified git-p4 so that it streams/pipes data into fast-import rather
that reading everything into memory first. The old scheme meant that
for a large repository (mine is around 2G) my PC just grinds to a
halt and never actually finishes. With this change it takes around ten
minutes.

This is a resend of a patch I sent earlier, which my MUA helpfully
managed to word-wrap.

 contrib/fast-import/git-p4 |  164 ++++++++++++++++++++++++++++++++++++--------
 1 files changed, 136 insertions(+), 28 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 342529d..f415ad0 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -1008,6 +1008,141 @@ class P4Sync(Command):
 
         return filesForCommit
 
+    # output one file from the P4 stream
+    # - helper for streamP4Files
+
+    def streamOneP4File(self, file, contents, branchPrefixes):
+        if verbose:
+            sys.stderr.write("%s\n" %  file["depotFile"])
+
+        relPath = self.stripRepoPath(file['depotFile'], branchPrefixes)
+
+        mode = "644"
+        if isP4Exec(file["type"]):
+            mode = "755"
+        elif file["type"] == "symlink":
+            mode = "120000"
+            # p4 print on a symlink contains "target\n", so strip it off
+            last = contents.pop()
+            last = last[:-1]
+            contents.append(last)
+
+        if self.isWindows and file["type"].endswith("text"):
+            mangled = []
+            for data in contents:
+                data = data.replace("\r\n", "\n")
+                mangled.append(data)
+            contents = mangled
+
+        if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
+            contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
+        elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
+            contents = map(lambda text: re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text), contents)
+
+        self.gitStream.write("M %s inline %s\n" % (mode, relPath))
+
+        # total length...
+        length = 0
+        for d in contents:
+            length = length + len(d)
+
+        self.gitStream.write("data %d\n" % length)
+        for d in contents:
+            self.gitStream.write(d)
+        self.gitStream.write("\n")
+
+    def streamOneP4Deletion(self, file, branchPrefixes):
+        if verbose:
+            sys.stderr.write("delete %s\n" %  file["path"])
+
+        relPath = self.stripRepoPath(file['path'], branchPrefixes)
+
+        self.gitStream.write("D %s\n" % relPath)
+
+    # Stream directly from "p4 files" into "git fast-import"
+    def streamP4Files(self, files, branchPrefixes):
+        filesForCommit = []
+        filesToRead = []
+        filesToDelete = []
+
+        for f in files:
+            includeFile = True
+            for val in self.clientSpecDirs:
+                if f['path'].startswith(val[0]):
+                    if val[1] <= 0:
+                        includeFile = False
+                    break
+
+            if includeFile:
+                filesForCommit.append(f)
+                if f['action'] not in ('delete', 'purge'):
+                    filesToRead.append(f)
+                else:
+                    filesToDelete.append(f)
+
+        filedata = []
+
+        # deleted files...
+        for f in filesToDelete:
+            self.streamOneP4Deletion(f, branchPrefixes)
+
+        if len(filesToRead) > 0:
+            stdin_file = tempfile.TemporaryFile(prefix='p4-stdin', mode='w+b')
+            stdin_file.write('\n'.join(['%s#%s' % (f['path'], f['rev'])
+                                                  for f in filesToRead]))
+            stdin_file.flush()
+            stdin_file.seek(0)
+            try:
+                p4 = subprocess.Popen('p4 -G -x - print',
+                                        shell=True,
+                                        stdin=stdin_file,
+                                        stdout=subprocess.PIPE);
+            except OSError,e:
+                print >> sys.stderr, "p4 print failed:", e
+
+            file = {}
+            contents = []
+            have_file_info = False
+
+            try:
+                while True:
+                    marshalled = marshal.load(p4.stdout)
+
+                    if marshalled.has_key('depotFile') and have_file_info:
+                        # start of a new file - output the old one first
+
+                        if file["type"] == "apple":
+                            print "\nfile %s is a strange apple file that forks. Ignoring" % file['path']
+                            continue
+
+
+                        self.streamOneP4File(file,contents,branchPrefixes)
+                        file = {}
+                        contents = []
+                        have_file_info = False
+
+                    # pick up the new file information... for the
+                    # 'data' field we need to append to our array
+                    for k in marshalled.keys():
+                        if k == 'data':
+                            contents.append(marshalled['data'])
+                        else:
+                            file[k] = marshalled[k]
+
+                    have_file_info = True
+            except EOFError:
+                pass
+
+            # do the last chunk
+
+            if file.has_key('depotFile'):
+                self.streamOneP4File(file,contents,branchPrefixes)
+
+            exitCode = p4.wait()
+            if exitCode != 0:
+                sys.stderr.write("p4 subshell failed getting file data\n")
+                sys.exit(1)
+
     def commit(self, details, files, branch, branchPrefixes, parent = ""):
         epoch = details["time"]
         author = details["user"]
@@ -1023,7 +1158,6 @@ class P4Sync(Command):
                 new_files.append (f)
             else:
                 sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
-        files = self.readP4Files(new_files)
 
         self.gitStream.write("commit %s\n" % branch)
 #        gitStream.write("mark :%s\n" % details["change"])
@@ -1051,33 +1185,7 @@ class P4Sync(Command):
                 print "parent %s" % parent
             self.gitStream.write("from %s\n" % parent)
 
-        for file in files:
-            if file["type"] == "apple":
-                print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
-                continue
-
-            relPath = self.stripRepoPath(file['path'], branchPrefixes)
-            if file["action"] in ("delete", "purge"):
-                self.gitStream.write("D %s\n" % relPath)
-            else:
-                data = file['data']
-
-                mode = "644"
-                if isP4Exec(file["type"]):
-                    mode = "755"
-                elif file["type"] == "symlink":
-                    mode = "120000"
-                    # p4 print on a symlink contains "target\n", so strip it off
-                    data = data[:-1]
-
-                if self.isWindows and file["type"].endswith("text"):
-                    data = data.replace("\r\n", "\n")
-
-                self.gitStream.write("M %s inline %s\n" % (mode, relPath))
-                self.gitStream.write("data %s\n" % len(data))
-                self.gitStream.write(data)
-                self.gitStream.write("\n")
-
+        self.streamP4Files(new_files,branchPrefixes)
         self.gitStream.write("\n")
 
         change = int(details["change"])
-- 
1.6.3.GIT

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH/RFC v2] git-p4: stream from perforce to speed up clones
  2009-07-11  8:31 [PATCH/RFC v2] git-p4: stream from perforce to speed up clones Luke Diamand
@ 2009-07-25 14:23 ` Pete Wyckoff
  2009-07-25 14:23   ` [PATCH 1/5] git-p4 stream: remove unused function Pete Wyckoff
                     ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Pete Wyckoff @ 2009-07-25 14:23 UTC (permalink / raw)
  To: Luke Diamand; +Cc: git

luke@diamand.org wrote on Sat, 11 Jul 2009 09:31 +0100:
> Change commit() to stream data from Perforce and into fast-import
> rather than reading into memory first, and then writing out. This
> hugely reduces the memory requirements when cloning non-incrementally.
> 
> Signed-off-by: Luke Diamand <luke@diamand.org>
> ---
> I've modified git-p4 so that it streams/pipes data into fast-import rather
> that reading everything into memory first. The old scheme meant that
> for a large repository (mine is around 2G) my PC just grinds to a
> halt and never actually finishes. With this change it takes around ten
> minutes.
> 
> This is a resend of a patch I sent earlier, which my MUA helpfully
> managed to word-wrap.

This is definitely good stuff.  While I'd like to get Daniel's
native transport code working in my environment, I had a need to
import some big trees quickly from p4 to git.  Usually I resort to a
rare big-memory machine to do the git-p4 import, but with your
change, I can use my laptop without going into swap or
out-of-memory.

For the content, I would recommend some changes.  Here are some
patches on top of yours:

    [PATCH 1/5] git-p4 stream: remove unused function
    [PATCH 2/5] git-p4 stream: do not pass branchPrefixes so much
    [PATCH 3/5] git-p4 stream: show relative path in debug messages
    [PATCH 4/5] git-p4 stream: check apple file type
    [PATCH 5/5] git-p4 stream: use existing p4CmdList with callback

They clean up the code and change your main iterator loop to reuse
the existing p4CmdList.

I'd suggest you merge these into your patch and send it back out.
It's a worthwhile change and we should get Simon to review it.

		-- Pete

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/5] git-p4 stream: remove unused function
  2009-07-25 14:23 ` Pete Wyckoff
@ 2009-07-25 14:23   ` Pete Wyckoff
  2009-07-25 14:24   ` [PATCH 2/5] git-p4 stream: do not pass branchPrefixes so much Pete Wyckoff
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Pete Wyckoff @ 2009-07-25 14:23 UTC (permalink / raw)
  To: Luke Diamand; +Cc: git

This functionality was replaced.

Signed-off-by: Pete Wyckoff <pw@padd.com>
---
 contrib/fast-import/git-p4 |   58 --------------------------------------------
 1 files changed, 0 insertions(+), 58 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index f415ad0..70fa403 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -950,64 +950,6 @@ class P4Sync(Command):
 
         return branches
 
-    ## Should move this out, doesn't use SELF.
-    def readP4Files(self, files):
-        filesForCommit = []
-        filesToRead = []
-
-        for f in files:
-            includeFile = True
-            for val in self.clientSpecDirs:
-                if f['path'].startswith(val[0]):
-                    if val[1] <= 0:
-                        includeFile = False
-                    break
-
-            if includeFile:
-                filesForCommit.append(f)
-                if f['action'] not in ('delete', 'purge'):
-                    filesToRead.append(f)
-
-        filedata = []
-        if len(filesToRead) > 0:
-            filedata = p4CmdList('-x - print',
-                                 stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
-                                                  for f in filesToRead]),
-                                 stdin_mode='w+')
-
-            if "p4ExitCode" in filedata[0]:
-                die("Problems executing p4. Error: [%d]."
-                    % (filedata[0]['p4ExitCode']));
-
-        j = 0;
-        contents = {}
-        while j < len(filedata):
-            stat = filedata[j]
-            j += 1
-            text = ''
-            while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'):
-                text += filedata[j]['data']
-                del filedata[j]['data']
-                j += 1
-
-            if not stat.has_key('depotFile'):
-                sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
-                continue
-
-            if stat['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
-                text = re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text)
-            elif stat['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
-                text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text)
-
-            contents[stat['depotFile']] = text
-
-        for f in filesForCommit:
-            path = f['path']
-            if contents.has_key(path):
-                f['data'] = contents[path]
-
-        return filesForCommit
-
     # output one file from the P4 stream
     # - helper for streamP4Files
 
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/5] git-p4 stream: do not pass branchPrefixes so much
  2009-07-25 14:23 ` Pete Wyckoff
  2009-07-25 14:23   ` [PATCH 1/5] git-p4 stream: remove unused function Pete Wyckoff
@ 2009-07-25 14:24   ` Pete Wyckoff
  2009-07-25 14:24   ` [PATCH 3/5] git-p4 stream: show relative path in debug messages Pete Wyckoff
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Pete Wyckoff @ 2009-07-25 14:24 UTC (permalink / raw)
  To: Luke Diamand; +Cc: git

Just set it in self when commit starts and use it everywhere.
This makes the code a bit cleaner, but no functional change

Signed-off-by: Pete Wyckoff <pw@padd.com>
---
 contrib/fast-import/git-p4 |   19 ++++++++++---------
 1 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 70fa403..3a52254 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -953,11 +953,11 @@ class P4Sync(Command):
     # output one file from the P4 stream
     # - helper for streamP4Files
 
-    def streamOneP4File(self, file, contents, branchPrefixes):
+    def streamOneP4File(self, file, contents):
         if verbose:
             sys.stderr.write("%s\n" %  file["depotFile"])
 
-        relPath = self.stripRepoPath(file['depotFile'], branchPrefixes)
+        relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
 
         mode = "644"
         if isP4Exec(file["type"]):
@@ -993,16 +993,16 @@ class P4Sync(Command):
             self.gitStream.write(d)
         self.gitStream.write("\n")
 
-    def streamOneP4Deletion(self, file, branchPrefixes):
+    def streamOneP4Deletion(self, file):
         if verbose:
             sys.stderr.write("delete %s\n" %  file["path"])
 
-        relPath = self.stripRepoPath(file['path'], branchPrefixes)
+        relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
 
         self.gitStream.write("D %s\n" % relPath)
 
     # Stream directly from "p4 files" into "git fast-import"
-    def streamP4Files(self, files, branchPrefixes):
+    def streamP4Files(self, files):
         filesForCommit = []
         filesToRead = []
         filesToDelete = []
@@ -1026,7 +1026,7 @@ class P4Sync(Command):
 
         # deleted files...
         for f in filesToDelete:
-            self.streamOneP4Deletion(f, branchPrefixes)
+            self.streamOneP4Deletion(f)
 
         if len(filesToRead) > 0:
             stdin_file = tempfile.TemporaryFile(prefix='p4-stdin', mode='w+b')
@@ -1058,7 +1058,7 @@ class P4Sync(Command):
                             continue
 
 
-                        self.streamOneP4File(file,contents,branchPrefixes)
+                        self.streamOneP4File(file, contents)
                         file = {}
                         contents = []
                         have_file_info = False
@@ -1078,7 +1078,7 @@ class P4Sync(Command):
             # do the last chunk
 
             if file.has_key('depotFile'):
-                self.streamOneP4File(file,contents,branchPrefixes)
+                self.streamOneP4File(file,contents)
 
             exitCode = p4.wait()
             if exitCode != 0:
@@ -1088,6 +1088,7 @@ class P4Sync(Command):
     def commit(self, details, files, branch, branchPrefixes, parent = ""):
         epoch = details["time"]
         author = details["user"]
+	self.branchPrefixes = branchPrefixes
 
         if self.verbose:
             print "commit into %s" % branch
@@ -1127,7 +1128,7 @@ class P4Sync(Command):
                 print "parent %s" % parent
             self.gitStream.write("from %s\n" % parent)
 
-        self.streamP4Files(new_files,branchPrefixes)
+        self.streamP4Files(new_files)
         self.gitStream.write("\n")
 
         change = int(details["change"])
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/5] git-p4 stream: show relative path in debug messages
  2009-07-25 14:23 ` Pete Wyckoff
  2009-07-25 14:23   ` [PATCH 1/5] git-p4 stream: remove unused function Pete Wyckoff
  2009-07-25 14:24   ` [PATCH 2/5] git-p4 stream: do not pass branchPrefixes so much Pete Wyckoff
@ 2009-07-25 14:24   ` Pete Wyckoff
  2009-07-25 14:24   ` [PATCH 4/5] git-p4 stream: check apple file type Pete Wyckoff
  2009-07-25 14:25   ` [PATCH 5/5] git-p4 stream: use existing p4CmdList with callback Pete Wyckoff
  4 siblings, 0 replies; 7+ messages in thread
From: Pete Wyckoff @ 2009-07-25 14:24 UTC (permalink / raw)
  To: Luke Diamand; +Cc: git

Not repo path, but the one where git will put the file.  They're
shorter and allows verifying branchPrefixes too.

Signed-off-by: Pete Wyckoff <pw@padd.com>
---
 contrib/fast-import/git-p4 |   11 ++++-------
 1 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 3a52254..3ab016b 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -954,10 +954,9 @@ class P4Sync(Command):
     # - helper for streamP4Files
 
     def streamOneP4File(self, file, contents):
-        if verbose:
-            sys.stderr.write("%s\n" %  file["depotFile"])
-
         relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
+        if verbose:
+            sys.stderr.write("%s\n" % relPath)
 
         mode = "644"
         if isP4Exec(file["type"]):
@@ -994,11 +993,9 @@ class P4Sync(Command):
         self.gitStream.write("\n")
 
     def streamOneP4Deletion(self, file):
-        if verbose:
-            sys.stderr.write("delete %s\n" %  file["path"])
-
         relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
-
+        if verbose:
+            sys.stderr.write("delete %s\n" % relPath)
         self.gitStream.write("D %s\n" % relPath)
 
     # Stream directly from "p4 files" into "git fast-import"
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/5] git-p4 stream: check apple file type
  2009-07-25 14:23 ` Pete Wyckoff
                     ` (2 preceding siblings ...)
  2009-07-25 14:24   ` [PATCH 3/5] git-p4 stream: show relative path in debug messages Pete Wyckoff
@ 2009-07-25 14:24   ` Pete Wyckoff
  2009-07-25 14:25   ` [PATCH 5/5] git-p4 stream: use existing p4CmdList with callback Pete Wyckoff
  4 siblings, 0 replies; 7+ messages in thread
From: Pete Wyckoff @ 2009-07-25 14:24 UTC (permalink / raw)
  To: Luke Diamand; +Cc: git

Move this check into the function so both callers do it.
And always reset the file contents in the main loop, as
we just ignored this apple file.

Signed-off-by: Pete Wyckoff <pw@padd.com>
---
 contrib/fast-import/git-p4 |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 3ab016b..af66026 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -954,6 +954,11 @@ class P4Sync(Command):
     # - helper for streamP4Files
 
     def streamOneP4File(self, file, contents):
+	if file["type"] == "apple":
+	    print "\nfile %s is a strange apple file that forks. Ignoring" % \
+	    	  file['depotFile']
+	    return
+
         relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
         if verbose:
             sys.stderr.write("%s\n" % relPath)
@@ -1049,12 +1054,6 @@ class P4Sync(Command):
 
                     if marshalled.has_key('depotFile') and have_file_info:
                         # start of a new file - output the old one first
-
-                        if file["type"] == "apple":
-                            print "\nfile %s is a strange apple file that forks. Ignoring" % file['path']
-                            continue
-
-
                         self.streamOneP4File(file, contents)
                         file = {}
                         contents = []
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/5] git-p4 stream: use existing p4CmdList with callback
  2009-07-25 14:23 ` Pete Wyckoff
                     ` (3 preceding siblings ...)
  2009-07-25 14:24   ` [PATCH 4/5] git-p4 stream: check apple file type Pete Wyckoff
@ 2009-07-25 14:25   ` Pete Wyckoff
  4 siblings, 0 replies; 7+ messages in thread
From: Pete Wyckoff @ 2009-07-25 14:25 UTC (permalink / raw)
  To: Luke Diamand; +Cc: git

Add a callback argrument to iterate over returned contents
rather than replicate the entire function just to do that.

Signed-off-by: Pete Wyckoff <pw@padd.com>
---
 contrib/fast-import/git-p4 |   88 +++++++++++++++++++-------------------------
 1 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index af66026..eece984 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -201,7 +201,7 @@ def isModeExec(mode):
 def isModeExecChanged(src_mode, dst_mode):
     return isModeExec(src_mode) != isModeExec(dst_mode)
 
-def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None):
     cmd = p4_build_cmd("-G %s" % (cmd))
     if verbose:
         sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -224,7 +224,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
     try:
         while True:
             entry = marshal.load(p4.stdout)
-            result.append(entry)
+	    if cb is not None:
+		cb(entry)
+	    else:
+		result.append(entry)
     except EOFError:
         pass
     exitCode = p4.wait()
@@ -1003,6 +1006,26 @@ class P4Sync(Command):
             sys.stderr.write("delete %s\n" % relPath)
         self.gitStream.write("D %s\n" % relPath)
 
+    # handle another chunk of streaming data
+    def streamP4FilesCb(self, marshalled):
+
+	if marshalled.has_key('depotFile') and self.stream_have_file_info:
+	    # start of a new file - output the old one first
+	    self.streamOneP4File(self.stream_file, self.stream_contents)
+	    self.stream_file = {}
+	    self.stream_contents = []
+	    self.stream_have_file_info = False
+
+	# pick up the new file information... for the
+	# 'data' field we need to append to our array
+	for k in marshalled.keys():
+	    if k == 'data':
+		self.stream_contents.append(marshalled['data'])
+	    else:
+		self.stream_file[k] = marshalled[k]
+
+	self.stream_have_file_info = True
+
     # Stream directly from "p4 files" into "git fast-import"
     def streamP4Files(self, files):
         filesForCommit = []
@@ -1024,62 +1047,27 @@ class P4Sync(Command):
                 else:
                     filesToDelete.append(f)
 
-        filedata = []
-
         # deleted files...
         for f in filesToDelete:
             self.streamOneP4Deletion(f)
 
         if len(filesToRead) > 0:
-            stdin_file = tempfile.TemporaryFile(prefix='p4-stdin', mode='w+b')
-            stdin_file.write('\n'.join(['%s#%s' % (f['path'], f['rev'])
-                                                  for f in filesToRead]))
-            stdin_file.flush()
-            stdin_file.seek(0)
-            try:
-                p4 = subprocess.Popen('p4 -G -x - print',
-                                        shell=True,
-                                        stdin=stdin_file,
-                                        stdout=subprocess.PIPE);
-            except OSError,e:
-                print >> sys.stderr, "p4 print failed:", e
+            self.stream_file = {}
+            self.stream_contents = []
+            self.stream_have_file_info = False
 
-            file = {}
-            contents = []
-            have_file_info = False
+	    # curry self argument
+	    def streamP4FilesCbSelf(entry):
+		self.streamP4FilesCb(entry)
 
-            try:
-                while True:
-                    marshalled = marshal.load(p4.stdout)
-
-                    if marshalled.has_key('depotFile') and have_file_info:
-                        # start of a new file - output the old one first
-                        self.streamOneP4File(file, contents)
-                        file = {}
-                        contents = []
-                        have_file_info = False
-
-                    # pick up the new file information... for the
-                    # 'data' field we need to append to our array
-                    for k in marshalled.keys():
-                        if k == 'data':
-                            contents.append(marshalled['data'])
-                        else:
-                            file[k] = marshalled[k]
-
-                    have_file_info = True
-            except EOFError:
-                pass
+	    p4CmdList("-x - print",
+		'\n'.join(['%s#%s' % (f['path'], f['rev'])
+                                                  for f in filesToRead]),
+	        cb=streamP4FilesCbSelf)
 
             # do the last chunk
-
-            if file.has_key('depotFile'):
-                self.streamOneP4File(file,contents)
-
-            exitCode = p4.wait()
-            if exitCode != 0:
-                sys.stderr.write("p4 subshell failed getting file data\n")
-                sys.exit(1)
+            if self.stream_file.has_key('depotFile'):
+                self.streamOneP4File(self.stream_file, self.stream_contents)
 
     def commit(self, details, files, branch, branchPrefixes, parent = ""):
         epoch = details["time"]
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-07-25 14:33 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-11  8:31 [PATCH/RFC v2] git-p4: stream from perforce to speed up clones Luke Diamand
2009-07-25 14:23 ` Pete Wyckoff
2009-07-25 14:23   ` [PATCH 1/5] git-p4 stream: remove unused function Pete Wyckoff
2009-07-25 14:24   ` [PATCH 2/5] git-p4 stream: do not pass branchPrefixes so much Pete Wyckoff
2009-07-25 14:24   ` [PATCH 3/5] git-p4 stream: show relative path in debug messages Pete Wyckoff
2009-07-25 14:24   ` [PATCH 4/5] git-p4 stream: check apple file type Pete Wyckoff
2009-07-25 14:25   ` [PATCH 5/5] git-p4 stream: use existing p4CmdList with callback Pete Wyckoff

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.