git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git-svn: Make it scream by minimizing temp files
@ 2008-08-08 22:41 Marcus Griep
  2008-08-08 22:59 ` Junio C Hamano
  2008-08-09  6:25 ` Eric Wong
  0 siblings, 2 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-08 22:41 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

Currently, git-svn would create a temp file on four occasions:
1. Reading a blob out of the object db
2. Creating a delta from svn
3. Hashing and writing a blob into the object db
4. Reading a blob out of the object db (in another place in code)

Any time git-svn did the above, it would dutifully create and then
delete said temp file.  Unfortunately, this means that between 2-4
temporary files are created/deleted per file 'add/modify'-ed in
svn (O(n)).  This causes significant overhead and helps the inode
counter to spin beautifully.

By its nature, git-svn is a serial beast.  Thus, reusing a temp file
does not pose significant problems.  "truncate and seek" takes much
less time than "unlink and create".  This patch centralizes the
tempfile creation and holds onto the tempfile until they are deleted
on exit.  This significantly reduces file overhead, now requiring
at most three (3) temp files per run (O(1)).

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 git-svn.perl |   48 ++++++++++++++++++++++++++++++------------------
 1 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 5099c1f..02ae207 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -1257,7 +1257,7 @@ sub md5sum {
 	my $arg = shift;
 	my $ref = ref $arg;
 	my $md5 = Digest::MD5->new();
-        if ($ref eq 'GLOB' || $ref eq 'IO::File') {
+        if ($ref eq 'GLOB' || $ref eq 'IO::File' || $ref eq 'File::Temp') {
 		$md5->addfile($arg) or croak $!;
 	} elsif ($ref eq 'SCALAR') {
 		$md5->add($$arg) or croak $!;
@@ -1282,6 +1282,8 @@ use Carp qw/croak/;
 use File::Path qw/mkpath/;
 use File::Copy qw/copy/;
 use IPC::Open3;
+use File::Temp qw/ :seekable /;
+use File::Spec;
 
 my ($_gc_nr, $_gc_period);
 
@@ -1320,10 +1322,11 @@ BEGIN {
 	}
 }
 
-my (%LOCKFILES, %INDEX_FILES);
+my (%LOCKFILES, %INDEX_FILES, %TEMP_FILES);
 END {
 	unlink keys %LOCKFILES if %LOCKFILES;
 	unlink keys %INDEX_FILES if %INDEX_FILES;
+	unlink values %TEMP_FILES if %TEMP_FILES;
 }
 
 sub resolve_local_globs {
@@ -2932,6 +2935,23 @@ sub remove_username {
 	$_[0] =~ s{^([^:]*://)[^@]+@}{$1};
 }
 
+sub _temp_file {
+	my ($self, $fd, $autoflush) = @_;
+	if (defined $TEMP_FILES{$fd}) {
+		truncate $TEMP_FILES{$fd}, 0 or croak $!;
+		seek $TEMP_FILES{$fd}, 0, 0 or croak $!;
+	} else {
+		$TEMP_FILES{$fd} = File::Temp->new(
+									TEMPLATE => 'GitSvn_XXXXXX',
+									DIR => File::Spec->tmpdir
+									) or croak $!;
+		if (defined $autoflush) {
+			$TEMP_FILES{$fd}->autoflush($autoflush);
+		}
+	}
+	$TEMP_FILES{$fd};
+}
+
 package Git::SVN::Prompt;
 use strict;
 use warnings;
@@ -3222,13 +3242,11 @@ sub change_file_prop {
 
 sub apply_textdelta {
 	my ($self, $fb, $exp) = @_;
-	my $fh = IO::File->new_tmpfile;
-	$fh->autoflush(1);
+	my $fh = Git::SVN->_temp_file('delta_temp', 1);
 	# $fh gets auto-closed() by SVN::TxDelta::apply(),
 	# (but $base does not,) so dup() it for reading in close_file
 	open my $dup, '<&', $fh or croak $!;
-	my $base = IO::File->new_tmpfile;
-	$base->autoflush(1);
+	my $base = Git::SVN->_temp_file('git_blob_temp', 1);
 	if ($fb->{blob}) {
 		print $base 'link ' if ($fb->{mode_a} == 120000);
 		my $size = $::_repository->cat_blob($fb->{blob}, $base);
@@ -3243,9 +3261,9 @@ sub apply_textdelta {
 		}
 	}
 	seek $base, 0, 0 or croak $!;
-	$fb->{fh} = $dup;
+	$fb->{fh} = $fh;
 	$fb->{base} = $base;
-	[ SVN::TxDelta::apply($base, $fh, undef, $fb->{path}, $fb->{pool}) ];
+	[ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
 }
 
 sub close_file {
@@ -3274,22 +3292,18 @@ sub close_file {
 			}
 		}
 
-		my ($tmp_fh, $tmp_filename) = File::Temp::tempfile(UNLINK => 1);
+		my $tmp_fh = Git::SVN->_temp_file('hash_temp');
 		my $result;
 		while ($result = sysread($fh, my $string, 1024)) {
 			my $wrote = syswrite($tmp_fh, $string, $result);
 			defined($wrote) && $wrote == $result
-				or croak("write $tmp_filename: $!\n");
+				or croak("write $tmp_fh->filename: $!\n");
 		}
 		defined $result or croak $!;
-		close $tmp_fh or croak $!;
 
-		close $fh or croak $!;
 
-		$hash = $::_repository->hash_and_insert_object($tmp_filename);
-		unlink($tmp_filename);
+		$hash = $::_repository->hash_and_insert_object($tmp_fh->filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
-		close $fb->{base} or croak $!;
 	} else {
 		$hash = $fb->{blob} or die "no blob information\n";
 	}
@@ -3659,7 +3673,7 @@ sub chg_file {
 	} elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 		$self->change_file_prop($fbat,'svn:executable',undef);
 	}
-	my $fh = IO::File->new_tmpfile or croak $!;
+	my $fh = Git::SVN->_temp_file('git_blob_temp');
 	if ($m->{mode_b} =~ /^120/) {
 		print $fh 'link ' or croak $!;
 		$self->change_file_prop($fbat,'svn:special','*');
@@ -3679,8 +3693,6 @@ sub chg_file {
 	my $got = SVN::TxDelta::send_stream($fh, @$atd, $pool);
 	die "Checksum mismatch\nexpected: $exp\ngot: $got\n" if ($got ne $exp);
 	$pool->clear;
-
-	close $fh or croak $!;
 }
 
 sub D {
-- 
1.6.0.rc2.4.g39f8

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-08 22:41 [PATCH] git-svn: Make it scream by minimizing temp files Marcus Griep
@ 2008-08-08 22:59 ` Junio C Hamano
  2008-08-09  1:12   ` Marcus Griep
  2008-08-09  6:25 ` Eric Wong
  1 sibling, 1 reply; 43+ messages in thread
From: Junio C Hamano @ 2008-08-08 22:59 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Eric Wong

Marcus Griep <marcus@griep.us> writes:

> Currently, git-svn would create a temp file on four occasions:
> 1. Reading a blob out of the object db
> 2. Creating a delta from svn
> 3. Hashing and writing a blob into the object db
> 4. Reading a blob out of the object db (in another place in code)
>
> Any time git-svn did the above, it would dutifully create and then
> delete said temp file.  Unfortunately, this means that between 2-4
> temporary files are created/deleted per file 'add/modify'-ed in
> svn (O(n)).  This causes significant overhead and helps the inode
> counter to spin beautifully.
>
> By its nature, git-svn is a serial beast.  Thus, reusing a temp file
> does not pose significant problems.  "truncate and seek" takes much
> less time than "unlink and create".  This patch centralizes the
> tempfile creation and holds onto the tempfile until they are deleted
> on exit.  This significantly reduces file overhead, now requiring
> at most three (3) temp files per run (O(1)).

Beautifully written analysis of the issue being tackled.

But optimization patch should be backed by numbers --- do you have a
benchmark result of some sort that you would want to include here?

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-08 22:59 ` Junio C Hamano
@ 2008-08-09  1:12   ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-09  1:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Eric Wong

I am working on that right now; however, against master I am getting
checksum mismatches with my svn repository, so generating benchmarks
against that requires committing a revert of ffe256f9, which makes
things even slower. My work comp is running cygwin, and that could be 
why ffe256f9 is a problem.

I am, however using a smaller repository, namely that of the Boo
Programming Language, to run some benchmarks.  I'm running it on a 
Linux box, and I'll publish the results as soon as they are ready.  

I'll include:

ffe256f9 and my patch
ffe256f9 and no patch
revert ffe256f9 and my patch
revert ffe256f9 and no patch

Marcus

Junio C Hamano wrote:
> Marcus Griep <marcus@griep.us> writes:
> 
>> Currently, git-svn would create a temp file on four occasions:
>> 1. Reading a blob out of the object db
>> 2. Creating a delta from svn
>> 3. Hashing and writing a blob into the object db
>> 4. Reading a blob out of the object db (in another place in code)
>>
>> Any time git-svn did the above, it would dutifully create and then
>> delete said temp file.  Unfortunately, this means that between 2-4
>> temporary files are created/deleted per file 'add/modify'-ed in
>> svn (O(n)).  This causes significant overhead and helps the inode
>> counter to spin beautifully.
>>
>> By its nature, git-svn is a serial beast.  Thus, reusing a temp file
>> does not pose significant problems.  "truncate and seek" takes much
>> less time than "unlink and create".  This patch centralizes the
>> tempfile creation and holds onto the tempfile until they are deleted
>> on exit.  This significantly reduces file overhead, now requiring
>> at most three (3) temp files per run (O(1)).
> 
> Beautifully written analysis of the issue being tackled.
> 
> But optimization patch should be backed by numbers --- do you have a
> benchmark result of some sort that you would want to include here?
> 
> 

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-08 22:41 [PATCH] git-svn: Make it scream by minimizing temp files Marcus Griep
  2008-08-08 22:59 ` Junio C Hamano
@ 2008-08-09  6:25 ` Eric Wong
  2008-08-09 15:45   ` Marcus Griep
  1 sibling, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-09  6:25 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Junio C Hamano

Marcus Griep <marcus@griep.us> wrote:
> Currently, git-svn would create a temp file on four occasions:
> 1. Reading a blob out of the object db
> 2. Creating a delta from svn
> 3. Hashing and writing a blob into the object db
> 4. Reading a blob out of the object db (in another place in code)
> 
> Any time git-svn did the above, it would dutifully create and then
> delete said temp file.  Unfortunately, this means that between 2-4
> temporary files are created/deleted per file 'add/modify'-ed in
> svn (O(n)).  This causes significant overhead and helps the inode
> counter to spin beautifully.
> 
> By its nature, git-svn is a serial beast.  Thus, reusing a temp file
> does not pose significant problems.  "truncate and seek" takes much
> less time than "unlink and create".  This patch centralizes the
> tempfile creation and holds onto the tempfile until they are deleted
> on exit.  This significantly reduces file overhead, now requiring
> at most three (3) temp files per run (O(1)).

Wow.  I've considered this in the past didn't think there would be a
significant difference (of course I'm always network I/O bound).  Which
platform and filesystem are you using are you using for tests?

I don't notice any difference running the test suite on Linux + ext3
here, but the test suite is not a good benchmark :)

> --- a/git-svn.perl
> +++ b/git-svn.perl
> @@ -1282,6 +1282,8 @@ use Carp qw/croak/;
>  use File::Path qw/mkpath/;
>  use File::Copy qw/copy/;
>  use IPC::Open3;
> +use File::Temp qw/ :seekable /;

qw/ :seekable / does not appear in my version of Perl (5.8.8-7etch3 from
Debian stable)  Just having "use File::Temp;" there works for me.

>  sub resolve_local_globs {
> @@ -2932,6 +2935,23 @@ sub remove_username {
>  	$_[0] =~ s{^([^:]*://)[^@]+@}{$1};
>  }
>  
> +sub _temp_file {
> +	my ($self, $fd, $autoflush) = @_;
> +	if (defined $TEMP_FILES{$fd}) {
> +		truncate $TEMP_FILES{$fd}, 0 or croak $!;
> +		seek $TEMP_FILES{$fd}, 0, 0 or croak $!;

Perhaps a sysseek in addition to the seek above would help
with the problems you mentioned in the other email.

		sysseek $TEMP_FILES{$fd}, 0, 0 or croak $!;

(It doesn't seem to affect me when running the test suite, though).

> +	} else {
> +		$TEMP_FILES{$fd} = File::Temp->new(
> +									TEMPLATE => 'GitSvn_XXXXXX',
> +									DIR => File::Spec->tmpdir
> +									) or croak $!;

Way too much indentation :x

> +		if (defined $autoflush) {
> +			$TEMP_FILES{$fd}->autoflush($autoflush);
> +		}

Given how much we interact with external programs, I'd rather force
every autoflush on every file handle to avoid subtle bugs on
different platforms.  It's faster in some (most?) cases, too.


Also, this seems generic enough that other programs (git-cvsimport
perhaps) can probably use it, too.  So maybe it could go into Git.pm or
a new module, Git/Tempfile.pm?

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-09  6:25 ` Eric Wong
@ 2008-08-09 15:45   ` Marcus Griep
  2008-08-10  1:46     ` Eric Wong
                       ` (2 more replies)
  0 siblings, 3 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-09 15:45 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git Mailing List, Junio C Hamano

Eric Wong wrote:
> Wow.  I've considered this in the past didn't think there would be a
> significant difference (of course I'm always network I/O bound).  Which
> platform and filesystem are you using are you using for tests?
> 
> I don't notice any difference running the test suite on Linux + ext3
> here, but the test suite is not a good benchmark :)

Yeah, much of the test suite uses small repositories without much history.
Where you see the benefit is with large repositories with many files.
In such cases, even a small speedup can reduce the total import time 
significantly.

My benchmark against a large repository uses the svn we have at work, but
there is currently a planned power outage, so I'll have to wait until 
tonight to run my benchmarks there (and they'll take significant time).

Nonetheless, my tests against the smaller Boo repository showed almost no
change in user time, but a 10% reduction in system time used.  There was
also a small (1%) drop in minor page faults.  I'm confident in these
results, but won't certify them until I'm able to run the tests on a much
larger repository.

>> +use File::Temp qw/ :seekable /;
> 
> qw/ :seekable / does not appear in my version of Perl (5.8.8-7etch3 from
> Debian stable)  Just having "use File::Temp;" there works for me.

My newbishness in perl shows.  I'll change it to a simple 'use'.

>> +		seek $TEMP_FILES{$fd}, 0, 0 or croak $!;
> 
> Perhaps a sysseek in addition to the seek above would help
> with the problems you mentioned in the other email.
> 
> 		sysseek $TEMP_FILES{$fd}, 0, 0 or croak $!;
> 
> (It doesn't seem to affect me when running the test suite, though).

Sounds like a good idea, but I found the source of my cygwin issue,
namely that /tmp (which perl uses for its temp files) was mounted
in textmode.  I fixed that by remounting that folder in binmode.

Nonetheless, if consumers may use sysread, after getting the file handle
then we'll want to use sysseek.

>> +	} else {
>> +		$TEMP_FILES{$fd} = File::Temp->new(
>> +									TEMPLATE => 'GitSvn_XXXXXX',
>> +									DIR => File::Spec->tmpdir
>> +									) or croak $!;
> 
> Way too much indentation :x

That's what I get for assuming a tab width of 4.  I'll redo it with
about half as many tabs.

>> +		if (defined $autoflush) {
>> +			$TEMP_FILES{$fd}->autoflush($autoflush);
>> +		}
> 
> Given how much we interact with external programs, I'd rather force
> every autoflush on every file handle to avoid subtle bugs on
> different platforms.  It's faster in some (most?) cases, too.

That sounds good to me.

> Also, this seems generic enough that other programs (git-cvsimport
> perhaps) can probably use it, too.  So maybe it could go into Git.pm or
> a new module, Git/Tempfile.pm?

I'd advocate the latter since it's not really Git functionality, but
rather a support, so a submodule would perhaps be the better placement.

Also, I came up with one more optimization inside 'sub close_file', so
I'll roll that in too.  Tell me where you/the community would prefer 
the tempfile functionality, and I'll submit a new patch series with 
one patch for the module and one patch for git-svn.

By then, I should have some better benchmark results.

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-09 15:45   ` Marcus Griep
@ 2008-08-10  1:46     ` Eric Wong
  2008-08-10  3:53       ` Junio C Hamano
  2008-08-10  8:09     ` Eric Wong
  2008-08-11 15:53     ` [PATCH 0/3] git-svn and temporary file improvements Marcus Griep
  2 siblings, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-10  1:46 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Junio C Hamano

Marcus Griep <marcus@griep.us> wrote:
> Eric Wong wrote:
> > Perhaps a sysseek in addition to the seek above would help
> > with the problems you mentioned in the other email.
> > 
> > 		sysseek $TEMP_FILES{$fd}, 0, 0 or croak $!;
> > 
> > (It doesn't seem to affect me when running the test suite, though).
> 
> Sounds like a good idea, but I found the source of my cygwin issue,
> namely that /tmp (which perl uses for its temp files) was mounted
> in textmode.  I fixed that by remounting that folder in binmode.

Hmm.. Instead of relying on users on weird platforms to change their
mount options, git-svn should also set binmode on all filehandles
regardless.  Will that get around the problem you had with cygwin?

git-svn already sets binmode for all the rev_map files.

> Nonetheless, if consumers may use sysread, after getting the file handle
> then we'll want to use sysseek.
> 
> >> +	} else {
> >> +		$TEMP_FILES{$fd} = File::Temp->new(
> >> +									TEMPLATE => 'GitSvn_XXXXXX',
> >> +									DIR => File::Spec->tmpdir
> >> +									) or croak $!;
> > 
> > Way too much indentation :x
> 
> That's what I get for assuming a tab width of 4.  I'll redo it with
> about half as many tabs.

Tabwidth is 8 characters by default, and that is what git uses.

> > Also, this seems generic enough that other programs (git-cvsimport
> > perhaps) can probably use it, too.  So maybe it could go into Git.pm or
> > a new module, Git/Tempfile.pm?
> 
> I'd advocate the latter since it's not really Git functionality, but
> rather a support, so a submodule would perhaps be the better placement.
> 
> Also, I came up with one more optimization inside 'sub close_file', so
> I'll roll that in too.  Tell me where you/the community would prefer 
> the tempfile functionality, and I'll submit a new patch series with 
> one patch for the module and one patch for git-svn.
> 
> By then, I should have some better benchmark results.

Junio (or anybody else), any thoughts on what the submodule should be
named?  I'm not good at naming things :x

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-10  1:46     ` Eric Wong
@ 2008-08-10  3:53       ` Junio C Hamano
  2008-08-10  7:47         ` Eric Wong
  0 siblings, 1 reply; 43+ messages in thread
From: Junio C Hamano @ 2008-08-10  3:53 UTC (permalink / raw)
  To: Eric Wong; +Cc: Marcus Griep, Git Mailing List

Eric Wong <normalperson@yhbt.net> writes:

> Junio (or anybody else), any thoughts on what the submodule should be
> named?  I'm not good at naming things :x

I'd say putting it in Git.pm itself is fine.  Git.pm is to give common
services to Porcelains, and we already have things like command_*_pipe()
family of functions that do not have to be git specific.

I'd be a bit surprised if there isn't any existing CPAN module for things
like this, though...

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-10  3:53       ` Junio C Hamano
@ 2008-08-10  7:47         ` Eric Wong
  2008-08-10  8:26           ` Junio C Hamano
  0 siblings, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-10  7:47 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Marcus Griep, Git Mailing List

Junio C Hamano <gitster@pobox.com> wrote:
> Eric Wong <normalperson@yhbt.net> writes:
> 
> > Junio (or anybody else), any thoughts on what the submodule should be
> > named?  I'm not good at naming things :x
> 
> I'd say putting it in Git.pm itself is fine.  Git.pm is to give common
> services to Porcelains, and we already have things like command_*_pipe()
> family of functions that do not have to be git specific.

OK.

> I'd be a bit surprised if there isn't any existing CPAN module for things
> like this, though...

Wow, I am surprised.  I couldn't find anything in a few minutes of
searching...

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-09 15:45   ` Marcus Griep
  2008-08-10  1:46     ` Eric Wong
@ 2008-08-10  8:09     ` Eric Wong
  2008-08-11 15:53     ` [PATCH 0/3] git-svn and temporary file improvements Marcus Griep
  2 siblings, 0 replies; 43+ messages in thread
From: Eric Wong @ 2008-08-10  8:09 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Junio C Hamano

Marcus Griep <marcus@griep.us> wrote:
> Also, I came up with one more optimization inside 'sub close_file',

Would that be truncating the file immediately after we're done using it?

Previously IO->new_tmpfile would unlink the file immediately after it
got created; so closing the file descriptor immediately after use would
allow the OS it to bypass the actual writeout to disk on an asynchronous
filesystem.

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] git-svn: Make it scream by minimizing temp files
  2008-08-10  7:47         ` Eric Wong
@ 2008-08-10  8:26           ` Junio C Hamano
  0 siblings, 0 replies; 43+ messages in thread
From: Junio C Hamano @ 2008-08-10  8:26 UTC (permalink / raw)
  To: Eric Wong; +Cc: Marcus Griep, Git Mailing List

Eric Wong <normalperson@yhbt.net> writes:

> Junio C Hamano <gitster@pobox.com> wrote:
> ...
>> I'd be a bit surprised if there isn't any existing CPAN module for things
>> like this, though...
>
> Wow, I am surprised.  I couldn't find anything in a few minutes of
> searching...

That's Ok.  Even CPAN has something, if it is not widely used and/or if it
comes with a lot of unnecessary baggage, we would be better off having the
single function in Git.pm.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 0/3] git-svn and temporary file improvements
  2008-08-09 15:45   ` Marcus Griep
  2008-08-10  1:46     ` Eric Wong
  2008-08-10  8:09     ` Eric Wong
@ 2008-08-11 15:53     ` Marcus Griep
  2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
  2 siblings, 1 reply; 43+ messages in thread
From: Marcus Griep @ 2008-08-11 15:53 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano


This series of patches relates to temp file usage within git-svn and possible
extensions applicable to other perl auxiliary functions.

The first patch allows for a central "registry" of temp files to be maintained.
It offers both locking and non-locking constructs depending upon the user's
complexity concern. The functions provided are also documented for perldoc.

The second patch changes git-svn to utilize the central registry in the first
patch to help reduce the amount of temp files created and destroyed during a
normal run of git-svn. The asymptotic limit on the number of temp files needed
is decreased from O(n+m) to O(1) where n is the number of files imported and
m is the number of file deltas. In real terms, this change does not
significantly reduce the time required for an import as other concerns, such as
network and disk i/o dominate over inode/MFT changes, however an incremental
reduction of ~10% system time was found on large change sets, though in a large
repository of small changesets, this incremental reduction reduced to 
approximately 3%.

The third patch modifies the way git-svn handles symlinks versus normal files
imported from svn. Currently, git-svn is very inefficient in this respect,
duplicating entire files solely for the sake of eliminating the first five
bytes of the file if it is a symlink. This causes a large amount of unnecessary
disk i/o, even when considering most of it takes place in in-memory buffers.
By eliminating the unnecessary duplication for normal files, a significant 48%
reduction in system time and a 33% reduction in user time was realized on
large changesets. Over many commits with small changesets, other operations
dominate, but an incremental 6% reduction was still noted. In addition, in both
cases a 15-25% reduction in maximum resident set size was found.

Logs and results of the benchmarks along with the procedure used are available
at http://blog.xpdm.us/2008/08/git-svn-and-temporary-files.html.

Marcus Griep (3):
      Git.pm: Add faculties to allow temp files to be cached
      git-svn: Make it scream by minimizing temp files
      git-svn: Reduce temp file usage when dealing with non-links

 git-svn.perl |   84 ++++++++++++++++++++--------------
 perl/Git.pm  |  145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 192 insertions(+), 37 deletions(-)

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-11 15:53     ` [PATCH 0/3] git-svn and temporary file improvements Marcus Griep
@ 2008-08-11 15:53       ` Marcus Griep
  2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
                           ` (4 more replies)
  0 siblings, 5 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-11 15:53 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

This patch offers a generic interface to allow temp files to be
cached while using an instance of the 'Git' package. If many
temp files are created and destroyed during the execution of a
program, this caching mechanism can help reduce the amount of
files created and destroyed by the filesystem.

There are two methods offered for creating a new file: a no-lock and
a acquire-lock version. The no-lock version provides no
guarantee that a file is not in use or that the temp file may be
stolen by a subsequent request. The acquire-lock version provides a
weak guarantee that a temp file will not be stolen by subsequent
requests even from a no-lock request. If a file is locked when
another acquire request is made, a simple error is thrown.

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 perl/Git.pm |  145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/perl/Git.pm b/perl/Git.pm
index e1ca5b4..fc24f55 100644
--- a/perl/Git.pm
+++ b/perl/Git.pm
@@ -57,7 +57,8 @@ require Exporter;
                 command_output_pipe command_input_pipe command_close_pipe
                 command_bidi_pipe command_close_bidi_pipe
                 version exec_path hash_object git_cmd_try
-                remote_refs);
+                remote_refs
+                temp_acquire temp_release temp_unsafe temp_reset);
 
 
 =head1 DESCRIPTION
@@ -99,7 +100,9 @@ use Carp qw(carp croak); # but croak is bad - throw instead
 use Error qw(:try);
 use Cwd qw(abs_path);
 use IPC::Open2 qw(open2);
-
+use File::Temp ();
+require File::Spec;
+use Fcntl qw(SEEK_SET);
 }
 
 
@@ -933,6 +936,143 @@ sub _close_cat_blob {
 	delete @$self{@vars};
 }
 
+
+{ # %TEMP_* Lexical Context
+
+my (%TEMP_LOCKS, %TEMP_FILES);
+
+=item temp_acquire ( NAME )
+
+Attempts to retreive the temporary file mapped to the string C<NAME>. If an
+associated temp file has not been created this session or was closed, it is
+created, cached, and set for autoflush and binmode.
+
+Internally locks the file mapped to C<NAME>. This lock must be released with
+C<temp_release()> when the temp file is no longer needed. Subsequent attempts
+to retrieve temporary files mapped to the same C<NAME> while still locked will
+cause an error. This locking mechanism provides a weak guarantee and is not
+threadsafe. It does provide some error checking to help prevent temp file refs
+writing over one another.
+
+The L<File::Handle> returned is truncated and seeked to position 0.
+
+In general, the L<File::Handle> returned should not be closed by consumers as
+it defeats the purpose of this caching mechanism. If you need to close the temp
+file handle, then you should use L<File::Temp> or another temp file faculty
+directly. If a handle is closed and then requested again, then a warning will
+issue.
+
+=cut
+
+sub temp_acquire {
+	my ($self, $name) = _maybe_self(@_);
+
+	my $temp_fd = _temp_cache($name);
+
+	$TEMP_LOCKS{$temp_fd} = 1;
+	$temp_fd;
+}
+
+=item temp_release ( NAME [, BOOL] )
+
+=item temp_release ( FILEHANDLE [, BOOL] )
+
+Releases a lock acquired through C<temp_acquire()>. Can be called either with
+the C<NAME> mapping used when acquiring the temp file or with the C<FILEHANDLE>
+referencing a locked temp file.
+
+Warns if an attempt is made to release a file that is not locked.
+
+If called with C<BOOL> true, then the temp file will be truncated before being
+released. This can help to reduce disk I/O where the system is smart enough to
+detect the truncation while data is in the output buffers.
+
+=cut
+
+sub temp_release {
+	my ($self, $temp_fd, $trunc) = _maybe_self(@_);
+
+	if (ref($temp_fd) ne 'File::Temp') {
+		$temp_fd = $TEMP_FILES{$temp_fd};
+	}
+	unless ($TEMP_LOCKS{$temp_fd}) {
+		carp "Attempt to release temp file '$temp_fd' that has not been locked";
+	}
+	temp_reset($temp_fd) if $trunc and $temp_fd->opened;
+
+	$TEMP_LOCKS{$temp_fd} = 0;
+	undef;
+}
+
+=item temp_unsafe ( NAME )
+
+Attempts to retreive the temporary file mapped to the string C<NAME>. If an
+associated temp file has not been created this session or was closed, it is
+created, cached, and set for autoflush and binmode.
+
+If the file mapped to C<NAME> has been locked using C<temp_acquire()>, then
+this method will throw an L<Error::Simple>.
+
+The L<File::Handle> returned is truncated and seeked to position 0.
+
+In general, the L<File::Handle> returned should not be closed by consumers as
+it defeats the purpose of this caching mechanism. If you need to close the temp
+file handle, then you should use L<File::Temp> or another temp file faculty
+directly. If a handle is closed and then requested again, then a warning will
+issue.
+
+=cut
+
+sub temp_unsafe {
+	my ($self, $name) = _maybe_self(@_);
+
+	_temp_cache($name);
+}
+
+sub _temp_cache {
+	my ($name) = @_;
+
+	my $temp_fd = \$TEMP_FILES{$name};
+	if (defined $$temp_fd and $$temp_fd->opened) {
+		if ($TEMP_LOCKS{$$temp_fd}) {
+			throw Error::Simple("Temp file with moniker '$name' already in use");
+		}
+		temp_reset($$temp_fd);
+	} else {
+		if (defined $$temp_fd) { # then we're here because of a closed handle.
+			carp "Temp file '$name' was closed. Opening replacement.";
+		}
+		$$temp_fd = File::Temp->new(
+			TEMPLATE => 'Git_XXXXXX',
+			DIR => File::Spec->tmpdir
+			) or throw Error::Simple("couldn't open new temp file");
+		$$temp_fd->autoflush;
+		binmode $$temp_fd;
+	}
+	$$temp_fd;
+}
+
+=item temp_reset ( FILEHANDLE )
+
+Truncates and resets the position of the C<FILEHANDLE>.  Uses C<sysseek>.
+
+=cut
+
+sub temp_reset {
+	my ($self, $temp_fd) = _maybe_self(@_);
+
+	truncate $temp_fd, 0
+		or throw Error::Simple("couldn't truncate file");
+	sysseek $temp_fd, 0, SEEK_SET
+		or throw Error::Simple("couldn't seek to beginning of file");
+}
+
+sub END {
+	unlink values %TEMP_FILES if %TEMP_FILES;
+}
+
+} # %TEMP_* Lexical Context
+
 =back
 
 =head1 ERROR HANDLING
@@ -1153,6 +1293,7 @@ sub DESTROY {
 	my ($self) = @_;
 	$self->_close_hash_and_insert_object();
 	$self->_close_cat_blob();
+	unlink values %{$self->{temp_files}} if $self->{temp_files};
 }
 
 
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 2/3] git-svn: Make it scream by minimizing temp files
  2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
@ 2008-08-11 15:53         ` Marcus Griep
  2008-08-11 15:53           ` [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links Marcus Griep
                             ` (2 more replies)
  2008-08-12  3:08         ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
                           ` (3 subsequent siblings)
  4 siblings, 3 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-11 15:53 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

Currently, git-svn would create a temp file on four occasions:
1. Reading a blob out of the object db
2. Creating a delta from svn
3. Hashing and writing a blob into the object db
4. Reading a blob out of the object db (in another place in code)

Any time git-svn did the above, it would dutifully create and then
delete said temp file.  Unfortunately, this means that between 2-4
temporary files are created/deleted per file 'add/modify'-ed in
svn (O(n)).  This causes significant overhead and helps the inode
counter to spin beautifully.

By its nature, git-svn is a serial beast.  Thus, reusing a temp file
does not pose significant problems.  "truncate and seek" takes much
less time than "unlink and create".  This patch centralizes the
tempfile creation and holds onto the tempfile until they are deleted
on exit.  This significantly reduces file overhead, now requiring
at most three (3) temp files per run (O(1)).

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 git-svn.perl |   53 +++++++++++++++++++++++++++++++++++------------------
 1 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index fe78461..0937918 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -1260,7 +1260,7 @@ sub md5sum {
 	my $arg = shift;
 	my $ref = ref $arg;
 	my $md5 = Digest::MD5->new();
-        if ($ref eq 'GLOB' || $ref eq 'IO::File') {
+        if ($ref eq 'GLOB' || $ref eq 'IO::File' || $ref eq 'File::Temp') {
 		$md5->addfile($arg) or croak $!;
 	} elsif ($ref eq 'SCALAR') {
 		$md5->add($$arg) or croak $!;
@@ -1285,6 +1285,8 @@ use Carp qw/croak/;
 use File::Path qw/mkpath/;
 use File::Copy qw/copy/;
 use IPC::Open3;
+use File::Temp ();
+use File::Spec;
 
 my ($_gc_nr, $_gc_period);
 
@@ -1323,10 +1325,11 @@ BEGIN {
 	}
 }
 
-my (%LOCKFILES, %INDEX_FILES);
+my (%LOCKFILES, %INDEX_FILES, %TEMP_FILES);
 END {
 	unlink keys %LOCKFILES if %LOCKFILES;
 	unlink keys %INDEX_FILES if %INDEX_FILES;
+	unlink values %TEMP_FILES if %TEMP_FILES;
 }
 
 sub resolve_local_globs {
@@ -2935,6 +2938,21 @@ sub remove_username {
 	$_[0] =~ s{^([^:]*://)[^@]+@}{$1};
 }
 
+sub _temp_file {
+	my ($self, $fd) = @_;
+	if (defined $TEMP_FILES{$fd}) {
+		truncate $TEMP_FILES{$fd}, 0 or croak $!;
+		sysseek $TEMP_FILES{$fd}, 0, 0 or croak $!;
+	} else {
+		$TEMP_FILES{$fd} = File::Temp->new(
+			TEMPLATE => 'GitSvn_XXXXXX',
+			DIR => File::Spec->tmpdir
+			) or croak $!;
+		$TEMP_FILES{$fd}->autoflush(1);
+	}
+	$TEMP_FILES{$fd};
+}
+
 package Git::SVN::Prompt;
 use strict;
 use warnings;
@@ -3225,13 +3243,11 @@ sub change_file_prop {
 
 sub apply_textdelta {
 	my ($self, $fb, $exp) = @_;
-	my $fh = IO::File->new_tmpfile;
-	$fh->autoflush(1);
+	my $fh = Git::temp_acquire('svn_delta');
 	# $fh gets auto-closed() by SVN::TxDelta::apply(),
 	# (but $base does not,) so dup() it for reading in close_file
 	open my $dup, '<&', $fh or croak $!;
-	my $base = IO::File->new_tmpfile;
-	$base->autoflush(1);
+	my $base = Git::temp_acquire('git_blob');
 	if ($fb->{blob}) {
 		print $base 'link ' if ($fb->{mode_a} == 120000);
 		my $size = $::_repository->cat_blob($fb->{blob}, $base);
@@ -3246,9 +3262,10 @@ sub apply_textdelta {
 		}
 	}
 	seek $base, 0, 0 or croak $!;
-	$fb->{fh} = $dup;
+	$fb->{fh} = $fh;
 	$fb->{base} = $base;
-	[ SVN::TxDelta::apply($base, $fh, undef, $fb->{path}, $fb->{pool}) ];
+	my $return = [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
+	$return;
 }
 
 sub close_file {
@@ -3277,22 +3294,23 @@ sub close_file {
 			}
 		}
 
-		my ($tmp_fh, $tmp_filename) = File::Temp::tempfile(UNLINK => 1);
+		my $tmp_fh = Git::temp_acquire('svn_hash');
 		my $result;
 		while ($result = sysread($fh, my $string, 1024)) {
 			my $wrote = syswrite($tmp_fh, $string, $result);
 			defined($wrote) && $wrote == $result
-				or croak("write $tmp_filename: $!\n");
+				or croak("write $tmp_fh->filename: $!\n");
 		}
 		defined $result or croak $!;
-		close $tmp_fh or croak $!;
 
-		close $fh or croak $!;
 
-		$hash = $::_repository->hash_and_insert_object($tmp_filename);
-		unlink($tmp_filename);
+		Git::temp_release($fh, 1);
+
+		$hash = $::_repository->hash_and_insert_object($tmp_fh->filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
-		close $fb->{base} or croak $!;
+
+		Git::temp_release($fb->{base}, 1);
+		Git::temp_release($tmp_fh, 1);
 	} else {
 		$hash = $fb->{blob} or die "no blob information\n";
 	}
@@ -3662,7 +3680,7 @@ sub chg_file {
 	} elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 		$self->change_file_prop($fbat,'svn:executable',undef);
 	}
-	my $fh = IO::File->new_tmpfile or croak $!;
+	my $fh = Git::temp_acquire('git_blob');
 	if ($m->{mode_b} =~ /^120/) {
 		print $fh 'link ' or croak $!;
 		$self->change_file_prop($fbat,'svn:special','*');
@@ -3681,9 +3699,8 @@ sub chg_file {
 	my $atd = $self->apply_textdelta($fbat, undef, $pool);
 	my $got = SVN::TxDelta::send_stream($fh, @$atd, $pool);
 	die "Checksum mismatch\nexpected: $exp\ngot: $got\n" if ($got ne $exp);
+	Git::temp_release($fh, 1);
 	$pool->clear;
-
-	close $fh or croak $!;
 }
 
 sub D {
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
@ 2008-08-11 15:53           ` Marcus Griep
  2008-08-12  3:37             ` Eric Wong
  2008-08-12 16:01             ` Marcus Griep
  2008-08-12  3:14           ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Eric Wong
  2008-08-12 16:00           ` [PATCH 2/3] git-svn: Make it incrementally faster " Marcus Griep
  2 siblings, 2 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-11 15:53 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

Currently, in sub 'close_file', git-svn creates a temporary file and
copies the contents of the blob to be written into it. This is useful
for symlinks because svn stores symlinks in the form:

link $FILE_PATH

Git creates a blob only out of '$FILE_PATH' and uses file mode to
indicate that the blob should be interpreted as a symlink.

As git-hash-object is invoked with --stdin-paths, a duplicate of the
link from svn must be created that leaves off the first five bytes,
i.e. 'link '. However, this is wholly unnecessary for normal blobs,
though, as we already have a temp file with their contents. Copying
the entire file gains nothing, and effectively requires a file to be
written twice before making it into the object db.

This patch corrects that issue, holding onto the substr-like
duplication for symlinks, but skipping it altogether for normal blobs
by reusing the existing temp file.

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 git-svn.perl |   43 ++++++++++++++++++++-----------------------
 1 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 0937918..f53afaa 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -3281,36 +3281,33 @@ sub close_file {
 				    "expected: $exp\n    got: $got\n";
 			}
 		}
-		sysseek($fh, 0, 0) or croak $!;
 		if ($fb->{mode_b} == 120000) {
-			eval {
-				sysread($fh, my $buf, 5) == 5 or croak $!;
-				$buf eq 'link ' or die "$path has mode 120000",
-						       " but is not a link";
-			};
-			if ($@) {
-				warn "$@\n";
-				sysseek($fh, 0, 0) or croak $!;
-			}
-		}
-
-		my $tmp_fh = Git::temp_acquire('svn_hash');
-		my $result;
-		while ($result = sysread($fh, my $string, 1024)) {
-			my $wrote = syswrite($tmp_fh, $string, $result);
-			defined($wrote) && $wrote == $result
-				or croak("write $tmp_fh->filename: $!\n");
-		}
-		defined $result or croak $!;
+			sysseek($fh, 0, 0) or croak $!;
+			sysread($fh, my $buf, 5) == 5 or croak $!;
 
+			unless ($buf eq 'link ') {
+				warn "$path has mode 120000",
+						" but is not a link\n";
+			} else {
+				my $tmp_fh = Git::temp_acquire('svn_hash');
+				my $result;
+				while ($result = sysread($fh, my $string, 1024)) {
+					my $wrote = syswrite($tmp_fh, $string, $result);
+					defined($wrote) && $wrote == $result
+						or croak("write $tmp_fh->filename: $!\n");
+				}
+				defined $result or croak $!;
 
-		Git::temp_release($fh, 1);
+				($fh, $tmp_fh) = ($tmp_fh, $fh);
+				Git::temp_release($tmp_fh, 1);
+			}
+		}
 
-		$hash = $::_repository->hash_and_insert_object($tmp_fh->filename);
+		$hash = $::_repository->hash_and_insert_object($fh->filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 
 		Git::temp_release($fb->{base}, 1);
-		Git::temp_release($tmp_fh, 1);
+		Git::temp_release($fh, 1);
 	} else {
 		$hash = $fb->{blob} or die "no blob information\n";
 	}
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
  2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
@ 2008-08-12  3:08         ` Eric Wong
  2008-08-12 15:41           ` Marcus Griep
  2008-08-12 16:00         ` Marcus Griep
                           ` (2 subsequent siblings)
  4 siblings, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-12  3:08 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Junio C Hamano

Marcus Griep <marcus@griep.us> wrote:
> This patch offers a generic interface to allow temp files to be
> cached while using an instance of the 'Git' package. If many
> temp files are created and destroyed during the execution of a
> program, this caching mechanism can help reduce the amount of
> files created and destroyed by the filesystem.
> 
> There are two methods offered for creating a new file: a no-lock and
> a acquire-lock version. The no-lock version provides no
> guarantee that a file is not in use or that the temp file may be
> stolen by a subsequent request. The acquire-lock version provides a
> weak guarantee that a temp file will not be stolen by subsequent
> requests even from a no-lock request. If a file is locked when
> another acquire request is made, a simple error is thrown.

I'm not sure if the no-lock version is worth the potential for
buggy or dangerous code.  I like this new idea of locking the
files to prevent bugs.

> +=item temp_release ( NAME [, BOOL] )
> +
> +=item temp_release ( FILEHANDLE [, BOOL] )
> +
> +Releases a lock acquired through C<temp_acquire()>. Can be called either with
> +the C<NAME> mapping used when acquiring the temp file or with the C<FILEHANDLE>
> +referencing a locked temp file.
> +
> +Warns if an attempt is made to release a file that is not locked.
> +
> +If called with C<BOOL> true, then the temp file will be truncated before being
> +released. This can help to reduce disk I/O where the system is smart enough to
> +detect the truncation while data is in the output buffers.

Always truncating on release makes the interface simpler.  With locking,
we can probably *only* truncate on release if you're that worried about
the extra overhead :)

> +=item temp_reset ( FILEHANDLE )
> +
> +Truncates and resets the position of the C<FILEHANDLE>.  Uses C<sysseek>.
> +
> +=cut
> +
> +sub temp_reset {
> +	my ($self, $temp_fd) = _maybe_self(@_);
> +
> +	truncate $temp_fd, 0
> +		or throw Error::Simple("couldn't truncate file");

I would do a regular seek() here in addition to the sysseek() below. I
am not certain one of the many userspace buffering layers Perl can
potentially use doesn't do anything funky with its offset accounting.

> +	sysseek $temp_fd, 0, SEEK_SET
> +		or throw Error::Simple("couldn't seek to beginning of file");

I would also put a tell() here after the sysseek and throw an error if
it returns a non-zero value just in case.  Yes, I'm really paranoid
about this stuff and have a huge distrust of userspace I/O layers :)

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] git-svn: Make it scream by minimizing temp files
  2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
  2008-08-11 15:53           ` [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links Marcus Griep
@ 2008-08-12  3:14           ` Eric Wong
  2008-08-12 15:50             ` Marcus Griep
  2008-08-12 16:00           ` [PATCH 2/3] git-svn: Make it incrementally faster " Marcus Griep
  2 siblings, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-12  3:14 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Junio C Hamano

Marcus Griep <marcus@griep.us> wrote:
> ---
>  git-svn.perl |   53 +++++++++++++++++++++++++++++++++++------------------
>  1 files changed, 35 insertions(+), 18 deletions(-)
> 
> diff --git a/git-svn.perl b/git-svn.perl
> index fe78461..0937918 100755
> --- a/git-svn.perl
> +++ b/git-svn.perl
> @@ -1285,6 +1285,8 @@ use Carp qw/croak/;
>  use File::Path qw/mkpath/;
>  use File::Copy qw/copy/;
>  use IPC::Open3;
> +use File::Temp ();
> +use File::Spec;
>  
>  my ($_gc_nr, $_gc_period);
>  
> @@ -1323,10 +1325,11 @@ BEGIN {
>  	}
>  }
>  
> -my (%LOCKFILES, %INDEX_FILES);
> +my (%LOCKFILES, %INDEX_FILES, %TEMP_FILES);
>  END {
>  	unlink keys %LOCKFILES if %LOCKFILES;
>  	unlink keys %INDEX_FILES if %INDEX_FILES;
> +	unlink values %TEMP_FILES if %TEMP_FILES;
>  }
  
>  sub resolve_local_globs {
> @@ -2935,6 +2938,21 @@ sub remove_username {
>  	$_[0] =~ s{^([^:]*://)[^@]+@}{$1};
>  }
>  
> +sub _temp_file {
> +	my ($self, $fd) = @_;
> +	if (defined $TEMP_FILES{$fd}) {
> +		truncate $TEMP_FILES{$fd}, 0 or croak $!;
> +		sysseek $TEMP_FILES{$fd}, 0, 0 or croak $!;
> +	} else {
> +		$TEMP_FILES{$fd} = File::Temp->new(
> +			TEMPLATE => 'GitSvn_XXXXXX',
> +			DIR => File::Spec->tmpdir
> +			) or croak $!;
> +		$TEMP_FILES{$fd}->autoflush(1);
> +	}
> +	$TEMP_FILES{$fd};
> +}
> +

The above is dead code now that we're using the versions in Git::,
right?

> @@ -3246,9 +3262,10 @@ sub apply_textdelta {
> -	[ SVN::TxDelta::apply($base, $fh, undef, $fb->{path}, $fb->{pool}) ];
> +	my $return = [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
> +	$return;

Why create a temporary variable? (and break the sacred 80-column limit).

> @@ -3277,22 +3294,23 @@ sub close_file {
> -				or croak("write $tmp_filename: $!\n");
> +				or croak("write ", $tmp_fh->filename, ": $!\n");

I missed this before, but $tmp_fh->filename doesn't interpolate correctly.

("write ${\$tmp_fh->filename}: $!\n")
or
("write ", $tmp_fh->filename, ": $!\n") works.

I believe the latter form is more idiomatic so we should probably use
that...

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-11 15:53           ` [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links Marcus Griep
@ 2008-08-12  3:37             ` Eric Wong
  2008-08-12 15:53               ` Marcus Griep
  2008-08-12 16:01             ` Marcus Griep
  1 sibling, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-12  3:37 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Junio C Hamano

Marcus Griep <marcus@griep.us> wrote:
> Currently, in sub 'close_file', git-svn creates a temporary file and
> copies the contents of the blob to be written into it. This is useful
> for symlinks because svn stores symlinks in the form:
> 
> link $FILE_PATH
> 
> Git creates a blob only out of '$FILE_PATH' and uses file mode to
> indicate that the blob should be interpreted as a symlink.
> 
> As git-hash-object is invoked with --stdin-paths, a duplicate of the
> link from svn must be created that leaves off the first five bytes,
> i.e. 'link '. However, this is wholly unnecessary for normal blobs,
> though, as we already have a temp file with their contents. Copying
> the entire file gains nothing, and effectively requires a file to be
> written twice before making it into the object db.
> 
> This patch corrects that issue, holding onto the substr-like
> duplication for symlinks, but skipping it altogether for normal blobs
> by reusing the existing temp file.

Sweet optimization!  Thanks!


One thing, again, can you please make sure things don't exceed
80-columns when using 8 character-wide tabs?

I'm not sure how much it matters to the guys maintaining Git.pm, but
that's the standard for here and the Linux kernel (although it
unfortunately seems to have gotten more lax in recent years...).

Larger monitors can't help me because I use big fonts that would require
me to move my neck or eyes to see across the screen, leading to more eye
and neck strain (I have a bad neck).  I very much wish ANSI had
standardized on something even smaller, perhaps 64-char wide terminals
:)

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-12  3:08         ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
@ 2008-08-12 15:41           ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 15:41 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git Mailing List, Junio C Hamano

Eric Wong wrote:
> I'm not sure if the no-lock version is worth the potential for
> buggy or dangerous code.  I like this new idea of locking the
> files to prevent bugs.

I can agree with that, and the "unsafe" version was just a front to the
common function.  I've removed the unsafe version from @EXPORT_OK and
removed temp_unsafe, but _temp_cached is still available for those
that _really_ want the unsafe version.

> Always truncating on release makes the interface simpler.  With locking,
> we can probably *only* truncate on release if you're that worried about
> the extra overhead :)

I agree with this.  I introduced a nice bug on myself when just starting
with it though, which is why I made it optional.  Careful conversion and
testing should be good enough protection.

> I would do a regular seek() here in addition to the sysseek() below. I
> am not certain one of the many userspace buffering layers Perl can
> potentially use doesn't do anything funky with its offset accounting.
> 
> I would also put a tell() here after the sysseek and throw an error if
> it returns a non-zero value just in case.  Yes, I'm really paranoid
> about this stuff and have a huge distrust of userspace I/O layers :)

I went ahead and threw in a sysseek(,,SEEK_CUR) with the tell and added
a seek to the sysseek(,,SEEK_SET), so we should be protected on the
buffered and unbuffered sides.

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] git-svn: Make it scream by minimizing temp files
  2008-08-12  3:14           ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Eric Wong
@ 2008-08-12 15:50             ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 15:50 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git Mailing List, Junio C Hamano

Eric Wong wrote:
> The above is dead code now that we're using the versions in Git::,
> right?
> 
> Why create a temporary variable? (and break the sacred 80-column limit).

My cherry-picking & squashing skills are not yet up to snuff.  The dead
code and unnecessary variable have now been removed.

 > I missed this before, but $tmp_fh->filename doesn't interpolate correctly.
> 
> ("write ${\$tmp_fh->filename}: $!\n")
> or
> ("write ", $tmp_fh->filename, ": $!\n") works.
> 
> I believe the latter form is more idiomatic so we should probably use
> that...

Done, in the latter form, and fixed in a couple other places.

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-12  3:37             ` Eric Wong
@ 2008-08-12 15:53               ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 15:53 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git Mailing List, Junio C Hamano

Eric Wong wrote:
> Sweet optimization!  Thanks!

I'm glad to have found something significant.

> One thing, again, can you please make sure things don't exceed
> 80-columns when using 8 character-wide tabs?
> 
> I'm not sure how much it matters to the guys maintaining Git.pm, but
> that's the standard for here and the Linux kernel (although it
> unfortunately seems to have gotten more lax in recent years...).
> 
> Larger monitors can't help me because I use big fonts that would require
> me to move my neck or eyes to see across the screen, leading to more eye
> and neck strain (I have a bad neck).  I very much wish ANSI had
> standardized on something even smaller, perhaps 64-char wide terminals
> :)

I added a regex to the standard pre-commit hook to check line length, and
now it is working properly, even with tabs.  If people are interested, I
could submit a patch for the standard pre-commit hook that includes a line
length check.

Overall, expect new patches to be sent in reply to each email as version 2's.

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
  2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
  2008-08-12  3:08         ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
@ 2008-08-12 16:00         ` Marcus Griep
  2008-08-13  3:28           ` Eric Wong
  2008-08-13 20:05         ` Lea Wiemann
  2008-08-14  6:29         ` Junio C Hamano
  4 siblings, 1 reply; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 16:00 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

This patch offers a generic interface to allow temp files to be
cached while using an instance of the 'Git' package. If many
temp files are created and destroyed during the execution of a
program, this caching mechanism can help reduce the amount of
files created and destroyed by the filesystem.

The temp_acquire method provides a weak guarantee that a temp
file will not be stolen by subsequent requests. If a file is
locked when another acquire request is made, a simple error is
thrown.

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 perl/Git.pm |  125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/perl/Git.pm b/perl/Git.pm
index e1ca5b4..405f68f 100644
--- a/perl/Git.pm
+++ b/perl/Git.pm
@@ -57,7 +57,8 @@ require Exporter;
                 command_output_pipe command_input_pipe command_close_pipe
                 command_bidi_pipe command_close_bidi_pipe
                 version exec_path hash_object git_cmd_try
-                remote_refs);
+                remote_refs
+                temp_acquire temp_release temp_reset);
 
 
 =head1 DESCRIPTION
@@ -99,7 +100,9 @@ use Carp qw(carp croak); # but croak is bad - throw instead
 use Error qw(:try);
 use Cwd qw(abs_path);
 use IPC::Open2 qw(open2);
-
+use File::Temp ();
+require File::Spec;
+use Fcntl qw(SEEK_SET SEEK_CUR);
 }
 
 
@@ -933,6 +936,124 @@ sub _close_cat_blob {
 	delete @$self{@vars};
 }
 
+
+{ # %TEMP_* Lexical Context
+
+my (%TEMP_LOCKS, %TEMP_FILES);
+
+=item temp_acquire ( NAME )
+
+Attempts to retreive the temporary file mapped to the string C<NAME>. If an
+associated temp file has not been created this session or was closed, it is
+created, cached, and set for autoflush and binmode.
+
+Internally locks the file mapped to C<NAME>. This lock must be released with
+C<temp_release()> when the temp file is no longer needed. Subsequent attempts
+to retrieve temporary files mapped to the same C<NAME> while still locked will
+cause an error. This locking mechanism provides a weak guarantee and is not
+threadsafe. It does provide some error checking to help prevent temp file refs
+writing over one another.
+
+In general, the L<File::Handle> returned should not be closed by consumers as
+it defeats the purpose of this caching mechanism. If you need to close the temp
+file handle, then you should use L<File::Temp> or another temp file faculty
+directly. If a handle is closed and then requested again, then a warning will
+issue.
+
+=cut
+
+sub temp_acquire {
+	my ($self, $name) = _maybe_self(@_);
+
+	my $temp_fd = _temp_cache($name);
+
+	$TEMP_LOCKS{$temp_fd} = 1;
+	$temp_fd;
+}
+
+=item temp_release ( NAME )
+
+=item temp_release ( FILEHANDLE )
+
+Releases a lock acquired through C<temp_acquire()>. Can be called either with
+the C<NAME> mapping used when acquiring the temp file or with the C<FILEHANDLE>
+referencing a locked temp file.
+
+Warns if an attempt is made to release a file that is not locked.
+
+The temp file will be truncated before being released. This can help to reduce
+disk I/O where the system is smart enough to detect the truncation while data
+is in the output buffers. Beware that after the temp file is released and
+truncated, any operations on that file may fail miserably until it is
+re-acquired. All contents are lost between each release and acquire mapped to
+the same string.
+
+=cut
+
+sub temp_release {
+	my ($self, $temp_fd, $trunc) = _maybe_self(@_);
+
+	if (ref($temp_fd) ne 'File::Temp') {
+		$temp_fd = $TEMP_FILES{$temp_fd};
+	}
+	unless ($TEMP_LOCKS{$temp_fd}) {
+		carp "Attempt to release temp file '",
+			$temp_fd, "' that has not been locked";
+	}
+	temp_reset($temp_fd) if $trunc and $temp_fd->opened;
+
+	$TEMP_LOCKS{$temp_fd} = 0;
+	undef;
+}
+
+sub _temp_cache {
+	my ($name) = @_;
+
+	my $temp_fd = \$TEMP_FILES{$name};
+	if (defined $$temp_fd and $$temp_fd->opened) {
+		if ($TEMP_LOCKS{$$temp_fd}) {
+			throw Error::Simple("Temp file with moniker '",
+				$name, "' already in use");
+		}
+	} else {
+		if (defined $$temp_fd) {
+			# then we're here because of a closed handle.
+			carp "Temp file '", $name,
+				"' was closed. Opening replacement.";
+		}
+		$$temp_fd = File::Temp->new(
+			TEMPLATE => 'Git_XXXXXX',
+			DIR => File::Spec->tmpdir
+			) or throw Error::Simple("couldn't open new temp file");
+		$$temp_fd->autoflush;
+		binmode $$temp_fd;
+	}
+	$$temp_fd;
+}
+
+=item temp_reset ( FILEHANDLE )
+
+Truncates and resets the position of the C<FILEHANDLE>.
+
+=cut
+
+sub temp_reset {
+	my ($self, $temp_fd) = _maybe_self(@_);
+
+	truncate $temp_fd, 0
+		or throw Error::Simple("couldn't truncate file");
+	sysseek($temp_fd, 0, SEEK_SET) and seek($temp_fd, 0, SEEK_SET)
+		or throw Error::Simple("couldn't seek to beginning of file");
+	sysseek($temp_fd, 0, SEEK_CUR) == 0 and tell($temp_fd) == 0
+		or throw Error::Simple("expected file position to be reset");
+}
+
+sub END {
+	unlink values %TEMP_FILES if %TEMP_FILES;
+}
+
+} # %TEMP_* Lexical Context
+
 =back
 
 =head1 ERROR HANDLING
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 2/3] git-svn: Make it incrementally faster by minimizing temp files
  2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
  2008-08-11 15:53           ` [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links Marcus Griep
  2008-08-12  3:14           ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Eric Wong
@ 2008-08-12 16:00           ` Marcus Griep
  2008-08-13  3:29             ` Eric Wong
  2 siblings, 1 reply; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 16:00 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

Currently, git-svn would create a temp file on four occasions:
1. Reading a blob out of the object db
2. Creating a delta from svn
3. Hashing and writing a blob into the object db
4. Reading a blob out of the object db (in another place in code)

Any time git-svn did the above, it would dutifully create and then
delete said temp file.  Unfortunately, this means that between 2-4
temporary files are created/deleted per file 'add/modify'-ed in
svn (O(n)).  This causes significant overhead and helps the inode
counter to spin beautifully.

By its nature, git-svn is a serial beast.  Thus, reusing a temp file
does not pose significant problems.  "truncate and seek" takes much
less time than "unlink and create".  This patch centralizes the
tempfile creation and holds onto the tempfile until they are deleted
on exit.  This significantly reduces file overhead, now requiring
at most three (3) temp files per run (O(1)).

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 git-svn.perl |   35 ++++++++++++++++++-----------------
 1 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 4dc3380..9eae5e8 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -1265,7 +1265,7 @@ sub md5sum {
 	my $arg = shift;
 	my $ref = ref $arg;
 	my $md5 = Digest::MD5->new();
-        if ($ref eq 'GLOB' || $ref eq 'IO::File') {
+        if ($ref eq 'GLOB' || $ref eq 'IO::File' || $ref eq 'File::Temp') {
 		$md5->addfile($arg) or croak $!;
 	} elsif ($ref eq 'SCALAR') {
 		$md5->add($$arg) or croak $!;
@@ -1328,6 +1328,7 @@ BEGIN {
 	}
 }
 
+
 my (%LOCKFILES, %INDEX_FILES);
 END {
 	unlink keys %LOCKFILES if %LOCKFILES;
@@ -3230,13 +3231,11 @@ sub change_file_prop {
 
 sub apply_textdelta {
 	my ($self, $fb, $exp) = @_;
-	my $fh = IO::File->new_tmpfile;
-	$fh->autoflush(1);
+	my $fh = Git::temp_acquire('svn_delta');
 	# $fh gets auto-closed() by SVN::TxDelta::apply(),
 	# (but $base does not,) so dup() it for reading in close_file
 	open my $dup, '<&', $fh or croak $!;
-	my $base = IO::File->new_tmpfile;
-	$base->autoflush(1);
+	my $base = Git::temp_acquire('git_blob');
 	if ($fb->{blob}) {
 		print $base 'link ' if ($fb->{mode_a} == 120000);
 		my $size = $::_repository->cat_blob($fb->{blob}, $base);
@@ -3251,9 +3250,9 @@ sub apply_textdelta {
 		}
 	}
 	seek $base, 0, 0 or croak $!;
-	$fb->{fh} = $dup;
+	$fb->{fh} = $fh;
 	$fb->{base} = $base;
-	[ SVN::TxDelta::apply($base, $fh, undef, $fb->{path}, $fb->{pool}) ];
+	[ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
 }
 
 sub close_file {
@@ -3282,22 +3281,25 @@ sub close_file {
 			}
 		}
 
-		my ($tmp_fh, $tmp_filename) = File::Temp::tempfile(UNLINK => 1);
+		my $tmp_fh = Git::temp_acquire('svn_hash');
 		my $result;
 		while ($result = sysread($fh, my $string, 1024)) {
 			my $wrote = syswrite($tmp_fh, $string, $result);
 			defined($wrote) && $wrote == $result
-				or croak("write $tmp_filename: $!\n");
+				or croak("write ",
+					$tmp_fh->filename, ": $!\n");
 		}
 		defined $result or croak $!;
-		close $tmp_fh or croak $!;
 
-		close $fh or croak $!;
 
-		$hash = $::_repository->hash_and_insert_object($tmp_filename);
-		unlink($tmp_filename);
+		Git::temp_release($fh, 1);
+
+		$hash = $::_repository->hash_and_insert_object(
+				$tmp_fh->filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
-		close $fb->{base} or croak $!;
+
+		Git::temp_release($fb->{base}, 1);
+		Git::temp_release($tmp_fh, 1);
 	} else {
 		$hash = $fb->{blob} or die "no blob information\n";
 	}
@@ -3667,7 +3669,7 @@ sub chg_file {
 	} elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 		$self->change_file_prop($fbat,'svn:executable',undef);
 	}
-	my $fh = IO::File->new_tmpfile or croak $!;
+	my $fh = Git::temp_acquire('git_blob');
 	if ($m->{mode_b} =~ /^120/) {
 		print $fh 'link ' or croak $!;
 		$self->change_file_prop($fbat,'svn:special','*');
@@ -3686,9 +3688,8 @@ sub chg_file {
 	my $atd = $self->apply_textdelta($fbat, undef, $pool);
 	my $got = SVN::TxDelta::send_stream($fh, @$atd, $pool);
 	die "Checksum mismatch\nexpected: $exp\ngot: $got\n" if ($got ne $exp);
+	Git::temp_release($fh, 1);
 	$pool->clear;
-
-	close $fh or croak $!;
 }
 
 sub D {
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-11 15:53           ` [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links Marcus Griep
  2008-08-12  3:37             ` Eric Wong
@ 2008-08-12 16:01             ` Marcus Griep
  2008-08-12 16:45               ` [PATCH v2 " Marcus Griep
  2008-08-13  3:29               ` [PATCH " Eric Wong
  1 sibling, 2 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 16:01 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

Currently, in sub 'close_file', git-svn creates a temporary file and
copies the contents of the blob to be written into it. This is useful
for symlinks because svn stores symlinks in the form:

link $FILE_PATH

Git creates a blob only out of '$FILE_PATH' and uses file mode to
indicate that the blob should be interpreted as a symlink.

As git-hash-object is invoked with --stdin-paths, a duplicate of the
link from svn must be created that leaves off the first five bytes,
i.e. 'link '. However, this is wholly unnecessary for normal blobs,
though, as we already have a temp file with their contents. Copying
the entire file gains nothing, and effectively requires a file to be
written twice before making it into the object db.

This patch corrects that issue, holding onto the substr-like
duplication for symlinks, but skipping it altogether for normal blobs
by reusing the existing temp file.

Signed-off-by: Marcus Griep <marcus@griep.us>
---
 git-svn.perl |   46 ++++++++++++++++++++++------------------------
 1 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 9eae5e8..95d1510 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -3268,38 +3268,36 @@ sub close_file {
 				    "expected: $exp\n    got: $got\n";
 			}
 		}
-		sysseek($fh, 0, 0) or croak $!;
 		if ($fb->{mode_b} == 120000) {
-			eval {
-				sysread($fh, my $buf, 5) == 5 or croak $!;
-				$buf eq 'link ' or die "$path has mode 120000",
-						       " but is not a link";
-			};
-			if ($@) {
-				warn "$@\n";
-				sysseek($fh, 0, 0) or croak $!;
-			}
-		}
-
-		my $tmp_fh = Git::temp_acquire('svn_hash');
-		my $result;
-		while ($result = sysread($fh, my $string, 1024)) {
-			my $wrote = syswrite($tmp_fh, $string, $result);
-			defined($wrote) && $wrote == $result
-				or croak("write ",
-					$tmp_fh->filename, ": $!\n");
-		}
-		defined $result or croak $!;
+			sysseek($fh, 0, 0) or croak $!;
+			sysread($fh, my $buf, 5) == 5 or croak $!;
 
+			unless ($buf eq 'link ') {
+				warn "$path has mode 120000",
+						" but is not a link\n";
+			} else {
+				my $tmp_fh = Git::temp_acquire('svn_hash');
+				my $res;
+				while ($res = sysread($fh, my $str, 1024)) {
+					my $out = syswrite($tmp_fh, $str, $res);
+					defined($out) && $out == $res
+						or croak("write ",
+							$tmp_fh->filename,
+							": $!\n");
+				}
+				defined $result or croak $!;
 
-		Git::temp_release($fh, 1);
+				($fh, $tmp_fh) = ($tmp_fh, $fh);
+				Git::temp_release($tmp_fh, 1);
+			}
+		}
 
 		$hash = $::_repository->hash_and_insert_object(
-				$tmp_fh->filename);
+				$fh->filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 
 		Git::temp_release($fb->{base}, 1);
-		Git::temp_release($tmp_fh, 1);
+		Git::temp_release($fh, 1);
 	} else {
 		$hash = $fb->{blob} or die "no blob information\n";
 	}
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH v2 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-12 16:01             ` Marcus Griep
@ 2008-08-12 16:45               ` Marcus Griep
  2008-08-13  3:29               ` [PATCH " Eric Wong
  1 sibling, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-12 16:45 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

Currently, in sub 'close_file', git-svn creates a temporary file and
copies the contents of the blob to be written into it. This is useful
for symlinks because svn stores symlinks in the form:

link $FILE_PATH

Git creates a blob only out of '$FILE_PATH' and uses file mode to
indicate that the blob should be interpreted as a symlink.

As git-hash-object is invoked with --stdin-paths, a duplicate of the
link from svn must be created that leaves off the first five bytes,
i.e. 'link '. However, this is wholly unnecessary for normal blobs,
though, as we already have a temp file with their contents. Copying
the entire file gains nothing, and effectively requires a file to be
written twice before making it into the object db.

This patch corrects that issue, holding onto the substr-like
duplication for symlinks, but skipping it altogether for normal blobs
by reusing the existing temp file.

Signed-off-by: Marcus Griep <marcus@griep.us>
---

Sorry for the second version.  I was silly and didn't run the
"perl typo checker".  This is corrected and tested via "full-svn-test".

 git-svn.perl |   46 ++++++++++++++++++++++------------------------
 1 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 9eae5e8..099fd02 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -3268,38 +3268,36 @@ sub close_file {
 				    "expected: $exp\n    got: $got\n";
 			}
 		}
-		sysseek($fh, 0, 0) or croak $!;
 		if ($fb->{mode_b} == 120000) {
-			eval {
-				sysread($fh, my $buf, 5) == 5 or croak $!;
-				$buf eq 'link ' or die "$path has mode 120000",
-						       " but is not a link";
-			};
-			if ($@) {
-				warn "$@\n";
-				sysseek($fh, 0, 0) or croak $!;
-			}
-		}
-
-		my $tmp_fh = Git::temp_acquire('svn_hash');
-		my $result;
-		while ($result = sysread($fh, my $string, 1024)) {
-			my $wrote = syswrite($tmp_fh, $string, $result);
-			defined($wrote) && $wrote == $result
-				or croak("write ",
-					$tmp_fh->filename, ": $!\n");
-		}
-		defined $result or croak $!;
+			sysseek($fh, 0, 0) or croak $!;
+			sysread($fh, my $buf, 5) == 5 or croak $!;
 
+			unless ($buf eq 'link ') {
+				warn "$path has mode 120000",
+						" but is not a link\n";
+			} else {
+				my $tmp_fh = Git::temp_acquire('svn_hash');
+				my $res;
+				while ($res = sysread($fh, my $str, 1024)) {
+					my $out = syswrite($tmp_fh, $str, $res);
+					defined($out) && $out == $res
+						or croak("write ",
+							$tmp_fh->filename,
+							": $!\n");
+				}
+				defined $res or croak $!;
 
-		Git::temp_release($fh, 1);
+				($fh, $tmp_fh) = ($tmp_fh, $fh);
+				Git::temp_release($tmp_fh, 1);
+			}
+		}
 
 		$hash = $::_repository->hash_and_insert_object(
-				$tmp_fh->filename);
+				$fh->filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 
 		Git::temp_release($fb->{base}, 1);
-		Git::temp_release($tmp_fh, 1);
+		Git::temp_release($fh, 1);
 	} else {
 		$hash = $fb->{blob} or die "no blob information\n";
 	}
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-12 16:00         ` Marcus Griep
@ 2008-08-13  3:28           ` Eric Wong
  0 siblings, 0 replies; 43+ messages in thread
From: Eric Wong @ 2008-08-13  3:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Marcus Griep

Marcus Griep <marcus@griep.us> wrote:
> This patch offers a generic interface to allow temp files to be
> cached while using an instance of the 'Git' package. If many
> temp files are created and destroyed during the execution of a
> program, this caching mechanism can help reduce the amount of
> files created and destroyed by the filesystem.
> 
> The temp_acquire method provides a weak guarantee that a temp
> file will not be stolen by subsequent requests. If a file is
> locked when another acquire request is made, a simple error is
> thrown.
> 
> Signed-off-by: Marcus Griep <marcus@griep.us>

Acked-by: Eric Wong <normalperson@yhbt.net>

> ---
>  perl/Git.pm |  125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 123 insertions(+), 2 deletions(-)
> 
> diff --git a/perl/Git.pm b/perl/Git.pm
> index e1ca5b4..405f68f 100644
> --- a/perl/Git.pm
> +++ b/perl/Git.pm
> @@ -57,7 +57,8 @@ require Exporter;
>                  command_output_pipe command_input_pipe command_close_pipe
>                  command_bidi_pipe command_close_bidi_pipe
>                  version exec_path hash_object git_cmd_try
> -                remote_refs);
> +                remote_refs
> +                temp_acquire temp_release temp_reset);
>  
>  
>  =head1 DESCRIPTION
> @@ -99,7 +100,9 @@ use Carp qw(carp croak); # but croak is bad - throw instead
>  use Error qw(:try);
>  use Cwd qw(abs_path);
>  use IPC::Open2 qw(open2);
> -
> +use File::Temp ();
> +require File::Spec;
> +use Fcntl qw(SEEK_SET SEEK_CUR);
>  }
>  
>  
> @@ -933,6 +936,124 @@ sub _close_cat_blob {
>  	delete @$self{@vars};
>  }
>  
> +
> +{ # %TEMP_* Lexical Context
> +
> +my (%TEMP_LOCKS, %TEMP_FILES);
> +
> +=item temp_acquire ( NAME )
> +
> +Attempts to retreive the temporary file mapped to the string C<NAME>. If an
> +associated temp file has not been created this session or was closed, it is
> +created, cached, and set for autoflush and binmode.
> +
> +Internally locks the file mapped to C<NAME>. This lock must be released with
> +C<temp_release()> when the temp file is no longer needed. Subsequent attempts
> +to retrieve temporary files mapped to the same C<NAME> while still locked will
> +cause an error. This locking mechanism provides a weak guarantee and is not
> +threadsafe. It does provide some error checking to help prevent temp file refs
> +writing over one another.
> +
> +In general, the L<File::Handle> returned should not be closed by consumers as
> +it defeats the purpose of this caching mechanism. If you need to close the temp
> +file handle, then you should use L<File::Temp> or another temp file faculty
> +directly. If a handle is closed and then requested again, then a warning will
> +issue.
> +
> +=cut
> +
> +sub temp_acquire {
> +	my ($self, $name) = _maybe_self(@_);
> +
> +	my $temp_fd = _temp_cache($name);
> +
> +	$TEMP_LOCKS{$temp_fd} = 1;
> +	$temp_fd;
> +}
> +
> +=item temp_release ( NAME )
> +
> +=item temp_release ( FILEHANDLE )
> +
> +Releases a lock acquired through C<temp_acquire()>. Can be called either with
> +the C<NAME> mapping used when acquiring the temp file or with the C<FILEHANDLE>
> +referencing a locked temp file.
> +
> +Warns if an attempt is made to release a file that is not locked.
> +
> +The temp file will be truncated before being released. This can help to reduce
> +disk I/O where the system is smart enough to detect the truncation while data
> +is in the output buffers. Beware that after the temp file is released and
> +truncated, any operations on that file may fail miserably until it is
> +re-acquired. All contents are lost between each release and acquire mapped to
> +the same string.
> +
> +=cut
> +
> +sub temp_release {
> +	my ($self, $temp_fd, $trunc) = _maybe_self(@_);
> +
> +	if (ref($temp_fd) ne 'File::Temp') {
> +		$temp_fd = $TEMP_FILES{$temp_fd};
> +	}
> +	unless ($TEMP_LOCKS{$temp_fd}) {
> +		carp "Attempt to release temp file '",
> +			$temp_fd, "' that has not been locked";
> +	}
> +	temp_reset($temp_fd) if $trunc and $temp_fd->opened;
> +
> +	$TEMP_LOCKS{$temp_fd} = 0;
> +	undef;
> +}
> +
> +sub _temp_cache {
> +	my ($name) = @_;
> +
> +	my $temp_fd = \$TEMP_FILES{$name};
> +	if (defined $$temp_fd and $$temp_fd->opened) {
> +		if ($TEMP_LOCKS{$$temp_fd}) {
> +			throw Error::Simple("Temp file with moniker '",
> +				$name, "' already in use");
> +		}
> +	} else {
> +		if (defined $$temp_fd) {
> +			# then we're here because of a closed handle.
> +			carp "Temp file '", $name,
> +				"' was closed. Opening replacement.";
> +		}
> +		$$temp_fd = File::Temp->new(
> +			TEMPLATE => 'Git_XXXXXX',
> +			DIR => File::Spec->tmpdir
> +			) or throw Error::Simple("couldn't open new temp file");
> +		$$temp_fd->autoflush;
> +		binmode $$temp_fd;
> +	}
> +	$$temp_fd;
> +}
> +
> +=item temp_reset ( FILEHANDLE )
> +
> +Truncates and resets the position of the C<FILEHANDLE>.
> +
> +=cut
> +
> +sub temp_reset {
> +	my ($self, $temp_fd) = _maybe_self(@_);
> +
> +	truncate $temp_fd, 0
> +		or throw Error::Simple("couldn't truncate file");
> +	sysseek($temp_fd, 0, SEEK_SET) and seek($temp_fd, 0, SEEK_SET)
> +		or throw Error::Simple("couldn't seek to beginning of file");
> +	sysseek($temp_fd, 0, SEEK_CUR) == 0 and tell($temp_fd) == 0
> +		or throw Error::Simple("expected file position to be reset");
> +}
> +
> +sub END {
> +	unlink values %TEMP_FILES if %TEMP_FILES;
> +}
> +
> +} # %TEMP_* Lexical Context
> +
>  =back
>  
>  =head1 ERROR HANDLING
> -- 
> 1.6.0.rc2.6.g8eda3

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/3] git-svn: Make it incrementally faster by minimizing temp files
  2008-08-12 16:00           ` [PATCH 2/3] git-svn: Make it incrementally faster " Marcus Griep
@ 2008-08-13  3:29             ` Eric Wong
  0 siblings, 0 replies; 43+ messages in thread
From: Eric Wong @ 2008-08-13  3:29 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Marcus Griep

Marcus Griep <marcus@griep.us> wrote:
> Currently, git-svn would create a temp file on four occasions:
> 1. Reading a blob out of the object db
> 2. Creating a delta from svn
> 3. Hashing and writing a blob into the object db
> 4. Reading a blob out of the object db (in another place in code)
> 
> Any time git-svn did the above, it would dutifully create and then
> delete said temp file.  Unfortunately, this means that between 2-4
> temporary files are created/deleted per file 'add/modify'-ed in
> svn (O(n)).  This causes significant overhead and helps the inode
> counter to spin beautifully.
> 
> By its nature, git-svn is a serial beast.  Thus, reusing a temp file
> does not pose significant problems.  "truncate and seek" takes much
> less time than "unlink and create".  This patch centralizes the
> tempfile creation and holds onto the tempfile until they are deleted
> on exit.  This significantly reduces file overhead, now requiring
> at most three (3) temp files per run (O(1)).
> 
> Signed-off-by: Marcus Griep <marcus@griep.us>

Acked-by: Eric Wong <normalperson@yhbt.net>

> ---
>  git-svn.perl |   35 ++++++++++++++++++-----------------
>  1 files changed, 18 insertions(+), 17 deletions(-)
> 
> diff --git a/git-svn.perl b/git-svn.perl
> index 4dc3380..9eae5e8 100755
> --- a/git-svn.perl
> +++ b/git-svn.perl
> @@ -1265,7 +1265,7 @@ sub md5sum {
>  	my $arg = shift;
>  	my $ref = ref $arg;
>  	my $md5 = Digest::MD5->new();
> -        if ($ref eq 'GLOB' || $ref eq 'IO::File') {
> +        if ($ref eq 'GLOB' || $ref eq 'IO::File' || $ref eq 'File::Temp') {
>  		$md5->addfile($arg) or croak $!;
>  	} elsif ($ref eq 'SCALAR') {
>  		$md5->add($$arg) or croak $!;
> @@ -1328,6 +1328,7 @@ BEGIN {
>  	}
>  }
>  
> +
>  my (%LOCKFILES, %INDEX_FILES);
>  END {
>  	unlink keys %LOCKFILES if %LOCKFILES;
> @@ -3230,13 +3231,11 @@ sub change_file_prop {
>  
>  sub apply_textdelta {
>  	my ($self, $fb, $exp) = @_;
> -	my $fh = IO::File->new_tmpfile;
> -	$fh->autoflush(1);
> +	my $fh = Git::temp_acquire('svn_delta');
>  	# $fh gets auto-closed() by SVN::TxDelta::apply(),
>  	# (but $base does not,) so dup() it for reading in close_file
>  	open my $dup, '<&', $fh or croak $!;
> -	my $base = IO::File->new_tmpfile;
> -	$base->autoflush(1);
> +	my $base = Git::temp_acquire('git_blob');
>  	if ($fb->{blob}) {
>  		print $base 'link ' if ($fb->{mode_a} == 120000);
>  		my $size = $::_repository->cat_blob($fb->{blob}, $base);
> @@ -3251,9 +3250,9 @@ sub apply_textdelta {
>  		}
>  	}
>  	seek $base, 0, 0 or croak $!;
> -	$fb->{fh} = $dup;
> +	$fb->{fh} = $fh;
>  	$fb->{base} = $base;
> -	[ SVN::TxDelta::apply($base, $fh, undef, $fb->{path}, $fb->{pool}) ];
> +	[ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
>  }
>  
>  sub close_file {
> @@ -3282,22 +3281,25 @@ sub close_file {
>  			}
>  		}
>  
> -		my ($tmp_fh, $tmp_filename) = File::Temp::tempfile(UNLINK => 1);
> +		my $tmp_fh = Git::temp_acquire('svn_hash');
>  		my $result;
>  		while ($result = sysread($fh, my $string, 1024)) {
>  			my $wrote = syswrite($tmp_fh, $string, $result);
>  			defined($wrote) && $wrote == $result
> -				or croak("write $tmp_filename: $!\n");
> +				or croak("write ",
> +					$tmp_fh->filename, ": $!\n");
>  		}
>  		defined $result or croak $!;
> -		close $tmp_fh or croak $!;
>  
> -		close $fh or croak $!;
>  
> -		$hash = $::_repository->hash_and_insert_object($tmp_filename);
> -		unlink($tmp_filename);
> +		Git::temp_release($fh, 1);
> +
> +		$hash = $::_repository->hash_and_insert_object(
> +				$tmp_fh->filename);
>  		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
> -		close $fb->{base} or croak $!;
> +
> +		Git::temp_release($fb->{base}, 1);
> +		Git::temp_release($tmp_fh, 1);
>  	} else {
>  		$hash = $fb->{blob} or die "no blob information\n";
>  	}
> @@ -3667,7 +3669,7 @@ sub chg_file {
>  	} elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
>  		$self->change_file_prop($fbat,'svn:executable',undef);
>  	}
> -	my $fh = IO::File->new_tmpfile or croak $!;
> +	my $fh = Git::temp_acquire('git_blob');
>  	if ($m->{mode_b} =~ /^120/) {
>  		print $fh 'link ' or croak $!;
>  		$self->change_file_prop($fbat,'svn:special','*');
> @@ -3686,9 +3688,8 @@ sub chg_file {
>  	my $atd = $self->apply_textdelta($fbat, undef, $pool);
>  	my $got = SVN::TxDelta::send_stream($fh, @$atd, $pool);
>  	die "Checksum mismatch\nexpected: $exp\ngot: $got\n" if ($got ne $exp);
> +	Git::temp_release($fh, 1);
>  	$pool->clear;
> -
> -	close $fh or croak $!;
>  }
>  
>  sub D {
> -- 
> 1.6.0.rc2.6.g8eda3

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-12 16:01             ` Marcus Griep
  2008-08-12 16:45               ` [PATCH v2 " Marcus Griep
@ 2008-08-13  3:29               ` Eric Wong
  2008-08-13  3:42                 ` Marcus Griep
  1 sibling, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-13  3:29 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Marcus Griep

Marcus Griep <marcus@griep.us> wrote:
> Currently, in sub 'close_file', git-svn creates a temporary file and
> copies the contents of the blob to be written into it. This is useful
> for symlinks because svn stores symlinks in the form:
> 
> link $FILE_PATH
> 
> Git creates a blob only out of '$FILE_PATH' and uses file mode to
> indicate that the blob should be interpreted as a symlink.
> 
> As git-hash-object is invoked with --stdin-paths, a duplicate of the
> link from svn must be created that leaves off the first five bytes,
> i.e. 'link '. However, this is wholly unnecessary for normal blobs,
> though, as we already have a temp file with their contents. Copying
> the entire file gains nothing, and effectively requires a file to be
> written twice before making it into the object db.
> 
> This patch corrects that issue, holding onto the substr-like
> duplication for symlinks, but skipping it altogether for normal blobs
> by reusing the existing temp file.
> 
> Signed-off-by: Marcus Griep <marcus@griep.us>

Thank you Marcus!

Acked-by: Eric Wong <normalperson@yhbt.net>

> ---
>  git-svn.perl |   46 ++++++++++++++++++++++------------------------
>  1 files changed, 22 insertions(+), 24 deletions(-)
> 
> diff --git a/git-svn.perl b/git-svn.perl
> index 9eae5e8..95d1510 100755
> --- a/git-svn.perl
> +++ b/git-svn.perl
> @@ -3268,38 +3268,36 @@ sub close_file {
>  				    "expected: $exp\n    got: $got\n";
>  			}
>  		}
> -		sysseek($fh, 0, 0) or croak $!;
>  		if ($fb->{mode_b} == 120000) {
> -			eval {
> -				sysread($fh, my $buf, 5) == 5 or croak $!;
> -				$buf eq 'link ' or die "$path has mode 120000",
> -						       " but is not a link";
> -			};
> -			if ($@) {
> -				warn "$@\n";
> -				sysseek($fh, 0, 0) or croak $!;
> -			}
> -		}
> -
> -		my $tmp_fh = Git::temp_acquire('svn_hash');
> -		my $result;
> -		while ($result = sysread($fh, my $string, 1024)) {
> -			my $wrote = syswrite($tmp_fh, $string, $result);
> -			defined($wrote) && $wrote == $result
> -				or croak("write ",
> -					$tmp_fh->filename, ": $!\n");
> -		}
> -		defined $result or croak $!;
> +			sysseek($fh, 0, 0) or croak $!;
> +			sysread($fh, my $buf, 5) == 5 or croak $!;
>  
> +			unless ($buf eq 'link ') {
> +				warn "$path has mode 120000",
> +						" but is not a link\n";
> +			} else {
> +				my $tmp_fh = Git::temp_acquire('svn_hash');
> +				my $res;
> +				while ($res = sysread($fh, my $str, 1024)) {
> +					my $out = syswrite($tmp_fh, $str, $res);
> +					defined($out) && $out == $res
> +						or croak("write ",
> +							$tmp_fh->filename,
> +							": $!\n");
> +				}
> +				defined $result or croak $!;
>  
> -		Git::temp_release($fh, 1);
> +				($fh, $tmp_fh) = ($tmp_fh, $fh);
> +				Git::temp_release($tmp_fh, 1);
> +			}
> +		}
>  
>  		$hash = $::_repository->hash_and_insert_object(
> -				$tmp_fh->filename);
> +				$fh->filename);
>  		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
>  
>  		Git::temp_release($fb->{base}, 1);
> -		Git::temp_release($tmp_fh, 1);
> +		Git::temp_release($fh, 1);
>  	} else {
>  		$hash = $fb->{blob} or die "no blob information\n";
>  	}
> -- 
> 1.6.0.rc2.6.g8eda3

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-13  3:29               ` [PATCH " Eric Wong
@ 2008-08-13  3:42                 ` Marcus Griep
  2008-08-13  3:52                   ` Eric Wong
  0 siblings, 1 reply; 43+ messages in thread
From: Marcus Griep @ 2008-08-13  3:42 UTC (permalink / raw)
  To: Eric Wong; +Cc: Junio C Hamano, Git Mailing List

Eric Wong wrote:
> Thank you Marcus!
> 
> Acked-by: Eric Wong <normalperson@yhbt.net>

Errr, you want to Ack [PATCH v2 3/3]; not this one; there's one minor
typo when I shortened the variable $result to $res.

>> +				my $res;
>> +				while ($res = sysread($fh, my $str, 1024)) {
>> +					my $out = syswrite($tmp_fh, $str, $res);
>> +					defined($out) && $out == $res
>> +						or croak("write ",
>> +							$tmp_fh->filename,
>> +							": $!\n");
>> +				}
>> +				defined $result or croak $!;

That last line causes compilation errors with 'use strict'.  It is fixed
in the alternate version.

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links
  2008-08-13  3:42                 ` Marcus Griep
@ 2008-08-13  3:52                   ` Eric Wong
  0 siblings, 0 replies; 43+ messages in thread
From: Eric Wong @ 2008-08-13  3:52 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Junio C Hamano, Git Mailing List

Marcus Griep <marcus@griep.us> wrote:
> Eric Wong wrote:
> > Thank you Marcus!
> > 
> > Acked-by: Eric Wong <normalperson@yhbt.net>
> 
> Errr, you want to Ack [PATCH v2 3/3]; not this one; there's one minor
> typo when I shortened the variable $result to $res.
> 
> >> +				my $res;
> >> +				while ($res = sysread($fh, my $str, 1024)) {
> >> +					my $out = syswrite($tmp_fh, $str, $res);
> >> +					defined($out) && $out == $res
> >> +						or croak("write ",
> >> +							$tmp_fh->filename,
> >> +							": $!\n");
> >> +				}
> >> +				defined $result or croak $!;
> 
> That last line causes compilation errors with 'use strict'.  It is fixed
> in the alternate version.

Oops, I applied the right patch but managed to reopen the wrong one when
I acked it.

I've just pushed out my repository with acks to
  git://bogomips.org/git-svn.git

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
                           ` (2 preceding siblings ...)
  2008-08-12 16:00         ` Marcus Griep
@ 2008-08-13 20:05         ` Lea Wiemann
  2008-08-13 20:13           ` Marcus Griep
                             ` (2 more replies)
  2008-08-14  6:29         ` Junio C Hamano
  4 siblings, 3 replies; 43+ messages in thread
From: Lea Wiemann @ 2008-08-13 20:05 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Eric Wong, Junio C Hamano

Marcus Griep wrote:
> diff --git a/perl/Git.pm b/perl/Git.pm
>
> +require File::Spec;

This makes Git.pm dependent on Perl 5.6.1.  Some tests (like
t3701-add-interactive.sh) seem to work with pretty much any Perl version
out there, and requiring File::Spec changes this.  Hence to avoid
complaints about failing tests, I suggest that you add a check for
File::Spec availability at the beginning of any test that (indirectly)
uses Git.pm.

(All my statements are untested... ;-))

-- Lea

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-13 20:05         ` Lea Wiemann
@ 2008-08-13 20:13           ` Marcus Griep
  2008-08-13 20:31             ` Marcus Griep
  2008-08-13 20:38           ` Junio C Hamano
  2008-08-13 20:52           ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Miklos Vajna
  2 siblings, 1 reply; 43+ messages in thread
From: Marcus Griep @ 2008-08-13 20:13 UTC (permalink / raw)
  To: Lea Wiemann; +Cc: Git Mailing List, Eric Wong, Junio C Hamano

Yeesh; didn't realize it would create that heavy of a dependency.  Perhaps this should be split into a submodule so that Git.pm doesn't require the newer dependency.  Eric/Junio?

Lea Wiemann wrote:
> Marcus Griep wrote:
>> diff --git a/perl/Git.pm b/perl/Git.pm
>>
>> +require File::Spec;
> 
> This makes Git.pm dependent on Perl 5.6.1.  Some tests (like
> t3701-add-interactive.sh) seem to work with pretty much any Perl version
> out there, and requiring File::Spec changes this.  Hence to avoid
> complaints about failing tests, I suggest that you add a check for
> File::Spec availability at the beginning of any test that (indirectly)
> uses Git.pm.
> 
> (All my statements are untested... ;-))
> 
> -- Lea

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-13 20:13           ` Marcus Griep
@ 2008-08-13 20:31             ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-13 20:31 UTC (permalink / raw)
  To: Lea Wiemann; +Cc: Git Mailing List, Eric Wong, Junio C Hamano

Hrmmm...  From what I see in CPAN, File::Spec has been around 
perl since 1998 (around v5.4.7).  Based on this is it safe-ish 
to assume availability of File::Spec?

Or, as I said earlier, should we kick out a submodule for the tempfile
functions?

Marcus Griep wrote:
> Yeesh; didn't realize it would create that heavy of a dependency.
> Perhaps this should be split into a submodule so that Git.pm doesn't
> require the newer dependency.  Eric/Junio?
>
> Lea Wiemann wrote:
>> This makes Git.pm dependent on Perl 5.6.1.  Some tests (like
>> t3701-add-interactive.sh) seem to work with pretty much any Perl version
>> out there, and requiring File::Spec changes this.  Hence to avoid
>> complaints about failing tests, I suggest that you add a check for
>> File::Spec availability at the beginning of any test that (indirectly)
>> uses Git.pm.
>>
>> (All my statements are untested... ;-))
>>
>> -- Lea
> 

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-13 20:05         ` Lea Wiemann
  2008-08-13 20:13           ` Marcus Griep
@ 2008-08-13 20:38           ` Junio C Hamano
  2008-08-13 22:28             ` Lea Wiemann
  2008-08-14  6:58             ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
  2008-08-13 20:52           ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Miklos Vajna
  2 siblings, 2 replies; 43+ messages in thread
From: Junio C Hamano @ 2008-08-13 20:38 UTC (permalink / raw)
  To: Lea Wiemann; +Cc: Marcus Griep, Git Mailing List, Eric Wong

Lea Wiemann <lewiemann@gmail.com> writes:

> Marcus Griep wrote:
>> diff --git a/perl/Git.pm b/perl/Git.pm
>>
>> +require File::Spec;
>
> This makes Git.pm dependent on Perl 5.6.1.

Ouch.  Thanks for being extra careful.

Unfortunately I've already pulled these changes via Eric.

Among the existing Perl scripts, cvsexportcommit and cvsimport already do
use it, so do svnimport and cidaemon in contrib.

> ...  Hence to avoid
> complaints about failing tests, I suggest that you add a check for
> File::Spec availability at the beginning of any test that (indirectly)
> uses Git.pm.

Hmm, wouldn't something like this (untested) be more contained?

---
 perl/Git.pm |   16 ++++++++++++++--
 1 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/perl/Git.pm b/perl/Git.pm
index 405f68f..2a92945 100644
--- a/perl/Git.pm
+++ b/perl/Git.pm
@@ -95,14 +95,26 @@ increase notwithstanding).
 
 =cut
 
+my $tmpdir;
 
 use Carp qw(carp croak); # but croak is bad - throw instead
 use Error qw(:try);
 use Cwd qw(abs_path);
 use IPC::Open2 qw(open2);
 use File::Temp ();
-require File::Spec;
 use Fcntl qw(SEEK_SET SEEK_CUR);
+
+	eval { require File::Spec; };
+	if ($@) {
+		for (@ENV{qw(TMPDIR TEMP TMP)}, "/tmp") {
+			if (test -d $_) {
+				$tmpdir = $_;
+				last;
+			}
+		}
+	} else {
+		$tmpdir = File::Spec->tmpdir;
+	}
 }
 
 
@@ -1023,7 +1035,7 @@ sub _temp_cache {
 		}
 		$$temp_fd = File::Temp->new(
 			TEMPLATE => 'Git_XXXXXX',
-			DIR => File::Spec->tmpdir
+			DIR => $tmpdir,
 			) or throw Error::Simple("couldn't open new temp file");
 		$$temp_fd->autoflush;
 		binmode $$temp_fd;

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-13 20:05         ` Lea Wiemann
  2008-08-13 20:13           ` Marcus Griep
  2008-08-13 20:38           ` Junio C Hamano
@ 2008-08-13 20:52           ` Miklos Vajna
  2 siblings, 0 replies; 43+ messages in thread
From: Miklos Vajna @ 2008-08-13 20:52 UTC (permalink / raw)
  To: Lea Wiemann; +Cc: Marcus Griep, Git Mailing List, Eric Wong, Junio C Hamano

[-- Attachment #1: Type: text/plain, Size: 288 bytes --]

On Wed, Aug 13, 2008 at 10:05:04PM +0200, Lea Wiemann <lewiemann@gmail.com> wrote:
> This makes Git.pm dependent on Perl 5.6.1.

Huh, am I right about it was released on 2001.04.09?
(http://use.perl.org/article.pl?sid=01/04/09/123230) Sounds like
depending on it is really not a problem.

[-- Attachment #2: Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-13 20:38           ` Junio C Hamano
@ 2008-08-13 22:28             ` Lea Wiemann
  2008-08-13 22:30               ` [PATCH] Git.pm: require Perl 5.6.1 Lea Wiemann
  2008-08-14  6:58             ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
  1 sibling, 1 reply; 43+ messages in thread
From: Lea Wiemann @ 2008-08-13 22:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Marcus Griep, Git Mailing List, Eric Wong

Junio C Hamano wrote:
> Lea Wiemann <lewiemann@gmail.com> writes:
> 
>> Marcus Griep wrote:
>>> +require File::Spec;
>> This makes Git.pm dependent on Perl 5.6.1.

Ouch, I misquoted.  It's File::Temp that was introduced in Perl 5.6.1,
not File::Spec.  (I think it's probably save to assume that File::Spec
[added in 5.4.5] is available everywhere.)

> Hmm, wouldn't something like this (untested) be more contained?

Uh, sorry for making you write unnecessary code.  Replicating File::Temp
functionality is probably a bit too tricky because of temp-file safety,
though I haven't checked the code.  It's probably not worth the effort
anyway; I was really just concerned about not having the test suite fail
in the 0.1% of cases where someone doesn't have Perl >5.6.1.

Also, adding "use 5.006001" may help with erroring out with a proper
error message for older perl versions.  I'll send a follow-up to this
message.

-- Lea

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH] Git.pm: require Perl 5.6.1
  2008-08-13 22:28             ` Lea Wiemann
@ 2008-08-13 22:30               ` Lea Wiemann
  0 siblings, 0 replies; 43+ messages in thread
From: Lea Wiemann @ 2008-08-13 22:30 UTC (permalink / raw)
  To: git

File::Temp is only in Perl 5.6.1, so Git.pm won't usually work with
older Perl versions; this gives a more descriptive error message
('Perl version too old'), rather than erroring out with a 'File::Temp
not found' message.  (File::Temp is probably not installable on older
Perl version anyway.)

Signed-off-by: Lea Wiemann <LeWiemann@gmail.com>
---
 perl/Git.pm |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/perl/Git.pm b/perl/Git.pm
index 405f68f..91c3fc6 100644
--- a/perl/Git.pm
+++ b/perl/Git.pm
@@ -96,6 +96,9 @@ increase notwithstanding).
 =cut
 
 
+# File::Temp is only in core as of Perl 5.6.1.
+use 5.006001;
+
 use Carp qw(carp croak); # but croak is bad - throw instead
 use Error qw(:try);
 use Cwd qw(abs_path);
-- 
1.5.6.3.539.g5ceae

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
                           ` (3 preceding siblings ...)
  2008-08-13 20:05         ` Lea Wiemann
@ 2008-08-14  6:29         ` Junio C Hamano
  2008-08-14 14:35           ` Marcus Griep
  4 siblings, 1 reply; 43+ messages in thread
From: Junio C Hamano @ 2008-08-14  6:29 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Eric Wong

Marcus Griep <marcus@griep.us> writes:

> This patch offers a generic interface to allow temp files to be
> cached while using an instance of the 'Git' package....

By the way, I think your commit title has a typo: s/cul/cili/.  I've
already pulled this via Eric, so it will stay in the history forever,
though...

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-13 20:38           ` Junio C Hamano
  2008-08-13 22:28             ` Lea Wiemann
@ 2008-08-14  6:58             ` Eric Wong
  2008-08-15 15:10               ` [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy Marcus Griep
  1 sibling, 1 reply; 43+ messages in thread
From: Eric Wong @ 2008-08-14  6:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Lea Wiemann, Marcus Griep, Git Mailing List

Junio C Hamano <gitster@pobox.com> wrote:
> Lea Wiemann <lewiemann@gmail.com> writes:
> 
> > Marcus Griep wrote:
> >> diff --git a/perl/Git.pm b/perl/Git.pm
> >>
> >> +require File::Spec;
> >
> > This makes Git.pm dependent on Perl 5.6.1.
> 
> Ouch.  Thanks for being extra careful.
> 
> Unfortunately I've already pulled these changes via Eric.
> 
> Among the existing Perl scripts, cvsexportcommit and cvsimport already do
> use it, so do svnimport and cidaemon in contrib.
> 
> > ...  Hence to avoid
> > complaints about failing tests, I suggest that you add a check for
> > File::Spec availability at the beginning of any test that (indirectly)
> > uses Git.pm.
> 
> Hmm, wouldn't something like this (untested) be more contained?
> 
> ---
>  perl/Git.pm |   16 ++++++++++++++--
>  1 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/perl/Git.pm b/perl/Git.pm
> index 405f68f..2a92945 100644
> --- a/perl/Git.pm
> +++ b/perl/Git.pm
> @@ -1023,7 +1035,7 @@ sub _temp_cache {
>  		}

What about just lazy requiring inside _temp_cache() so it
won't get loaded by folks that don't need it? (completely untested):

		eval { require File::Temp };
		if ($@) {
			throw Error::Simple("couldn't require File::Temp: $@");
		}
		eval { require File::Spec };
		if ($@) {
			throw Error::Simple("couldn't require File::Spec: $@");
		}

It'll also remove the minor performance hit CGI/gitweb users got since
we won't load these extra modules during startup.

>  		$$temp_fd = File::Temp->new(
>  			TEMPLATE => 'Git_XXXXXX',
> -			DIR => File::Spec->tmpdir
> +			DIR => $tmpdir,
>  			) or throw Error::Simple("couldn't open new temp file");
>  		$$temp_fd->autoflush;
>  		binmode $$temp_fd;

Fwiw, git-svn has been a File::Temp user for a few months since Adam's
cat-file optimization; but it also has lower visibility since boxes
with <5.6.1 probably don't have SVN.  git-svn previously used
IO::File->new_tmpfile exclusively (and very heavily).

-- 
Eric Wong

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached
  2008-08-14  6:29         ` Junio C Hamano
@ 2008-08-14 14:35           ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-14 14:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Eric Wong

Junio C Hamano wrote:
> By the way, I think your commit title has a typo: s/cul/cili/.  I've
> already pulled this via Eric, so it will stay in the history forever,
> though...

Actually, I meant faculties.  While in common use faculty is
usually meant as a body of teachers, the primary definition is
"An inherent power or ability".  Adding the caching of temp files gave
Git.pm another power or ability, hence a new faculty.

The caching mechanism itself is also a facility that can be used by
others, and, of course, would also apply. :-P

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy
  2008-08-14  6:58             ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
@ 2008-08-15 15:10               ` Marcus Griep
  2008-08-15 19:31                 ` Bryan Donlan
  2008-08-15 19:53                 ` [PATCH v2] " Marcus Griep
  0 siblings, 2 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-15 15:10 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Marcus Griep

This will ensure that the API at large is accessible to nearly
all Perl versions, while only the temp file caching API is tied to
the File::Temp and File::Spec modules being available.

Signed-off-by: Marcus Griep <marcus@griep.us>
---

 Eric Wong wrote:
 > What about just lazy requiring inside _temp_cache() so it
 > won't get loaded by folks that don't need it? (completely untested):
 > 
 > 		eval { require File::Temp };
 > 		if ($@) {
 > 			throw Error::Simple("couldn't require File::Temp: $@");
 > 		}
 > 		eval { require File::Spec };
 > 		if ($@) {
 > 			throw Error::Simple("couldn't require File::Spec: $@");
 > 		}
 > 
 > It'll also remove the minor performance hit CGI/gitweb users got since
 > we won't load these extra modules during startup.

 This recommendation is implemented with this patch, but in such a way that only
 the first test will be used, and that result cached.  That way we aren't doing
 a compile _every_ time we want a temporary file, just the first time.

 perl/Git.pm |   14 ++++++++++++--
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/perl/Git.pm b/perl/Git.pm
index 405f68f..9b6b637 100644
--- a/perl/Git.pm
+++ b/perl/Git.pm
@@ -100,8 +100,6 @@ use Carp qw(carp croak); # but croak is bad - throw instead
 use Error qw(:try);
 use Cwd qw(abs_path);
 use IPC::Open2 qw(open2);
-use File::Temp ();
-require File::Spec;
 use Fcntl qw(SEEK_SET SEEK_CUR);
 }
 
@@ -940,6 +938,7 @@ sub _close_cat_blob {
 { # %TEMP_* Lexical Context
 
 my (%TEMP_LOCKS, %TEMP_FILES);
+my $require_test;
 
 =item temp_acquire ( NAME )
 
@@ -1009,6 +1008,8 @@ sub temp_release {
 sub _temp_cache {
 	my ($name) = @_;
 
+	_verify_require();
+
 	my $temp_fd = \$TEMP_FILES{$name};
 	if (defined $$temp_fd and $$temp_fd->opened) {
 		if ($TEMP_LOCKS{$$temp_fd}) {
@@ -1031,6 +1032,15 @@ sub _temp_cache {
 	$$temp_fd;
 }
 
+sub _verify_require {
+	unless (defined $require_test) {
+		$require_test = "";
+		eval { require File::Temp; require File::Spec; };
+		$require_test .= "$@";
+	}
+	$require_test and throw Error::Simple($require_test);
+}
+
 =item temp_reset ( FILEHANDLE )
 
 Truncates and resets the position of the C<FILEHANDLE>.
-- 
1.6.0.rc2.6.g8eda3

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy
  2008-08-15 15:10               ` [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy Marcus Griep
@ 2008-08-15 19:31                 ` Bryan Donlan
  2008-08-15 19:46                   ` Marcus Griep
  2008-08-15 19:53                 ` [PATCH v2] " Marcus Griep
  1 sibling, 1 reply; 43+ messages in thread
From: Bryan Donlan @ 2008-08-15 19:31 UTC (permalink / raw)
  To: Marcus Griep; +Cc: Git Mailing List, Eric Wong, Junio C Hamano

On Fri, Aug 15, 2008 at 11:10 AM, Marcus Griep <marcus@griep.us> wrote:
> This will ensure that the API at large is accessible to nearly
> all Perl versions, while only the temp file caching API is tied to
> the File::Temp and File::Spec modules being available.
>
> Signed-off-by: Marcus Griep <marcus@griep.us>
> ---
>
>  Eric Wong wrote:
>  > What about just lazy requiring inside _temp_cache() so it
>  > won't get loaded by folks that don't need it? (completely untested):
>  >
>  >              eval { require File::Temp };
>  >              if ($@) {
>  >                      throw Error::Simple("couldn't require File::Temp: $@");
>  >              }
>  >              eval { require File::Spec };
>  >              if ($@) {
>  >                      throw Error::Simple("couldn't require File::Spec: $@");
>  >              }
>  >
>  > It'll also remove the minor performance hit CGI/gitweb users got since
>  > we won't load these extra modules during startup.
>
>  This recommendation is implemented with this patch, but in such a way that only
>  the first test will be used, and that result cached.  That way we aren't doing
>  a compile _every_ time we want a temporary file, just the first time.

perl's 'require' will only attempt to load a module the first time it
is used; subsequent attempts will result in a quick no-op, so there's
no need to further cache. Unless you mean to cache a negative result?
In which case, the require should be quite fast, as it'll quickly get
a 'file not found' error without needing to do any compilation.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy
  2008-08-15 19:31                 ` Bryan Donlan
@ 2008-08-15 19:46                   ` Marcus Griep
  0 siblings, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-15 19:46 UTC (permalink / raw)
  To: Bryan Donlan; +Cc: Git Mailing List, Eric Wong, Junio C Hamano

Bryan Donlan wrote:
> perl's 'require' will only attempt to load a module the first time it
> is used; subsequent attempts will result in a quick no-op, so there's
> no need to further cache. Unless you mean to cache a negative result?
> In which case, the require should be quite fast, as it'll quickly get
> a 'file not found' error without needing to do any compilation.

Aha.  Point taken.  I was under the mistaken impression that an entirely
new context was set up in an 'eval', but it is much more lightweight
than that.  A simplified patch is forthcoming.

-- 
Marcus Griep
GPG Key ID: 0x5E968152
——
http://www.boohaunt.net
את.ψο´

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2] Git.pm: Make File::Spec and File::Temp requirement lazy
  2008-08-15 15:10               ` [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy Marcus Griep
  2008-08-15 19:31                 ` Bryan Donlan
@ 2008-08-15 19:53                 ` Marcus Griep
  1 sibling, 0 replies; 43+ messages in thread
From: Marcus Griep @ 2008-08-15 19:53 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Eric Wong, Junio C Hamano, Bryan Donlan, Marcus Griep

This will ensure that the API at large is accessible to nearly
all Perl versions, while only the temp file caching API is tied to
the File::Temp and File::Spec modules being available.

Signed-off-by: Marcus Griep <marcus@griep.us>
---

 Even shorter and sweeter now that I understand a bit more about Perl's
 exec functionality. This patch no longer has unnecessary caching and is
 about as short and sweet as it gets. Thanks for helping me learn more
 about Perl, Bryan.

 perl/Git.pm |    9 +++++++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/perl/Git.pm b/perl/Git.pm
index 405f68f..102e6a4 100644
--- a/perl/Git.pm
+++ b/perl/Git.pm
@@ -100,8 +100,6 @@ use Carp qw(carp croak); # but croak is bad - throw instead
 use Error qw(:try);
 use Cwd qw(abs_path);
 use IPC::Open2 qw(open2);
-use File::Temp ();
-require File::Spec;
 use Fcntl qw(SEEK_SET SEEK_CUR);
 }
 
@@ -1009,6 +1007,8 @@ sub temp_release {
 sub _temp_cache {
 	my ($name) = @_;
 
+	_verify_require();
+
 	my $temp_fd = \$TEMP_FILES{$name};
 	if (defined $$temp_fd and $$temp_fd->opened) {
 		if ($TEMP_LOCKS{$$temp_fd}) {
@@ -1031,6 +1031,11 @@ sub _temp_cache {
 	$$temp_fd;
 }
 
+sub _verify_require {
+	eval { require File::Temp; require File::Spec; };
+	$@ and throw Error::Simple($@);
+}
+
 =item temp_reset ( FILEHANDLE )
 
 Truncates and resets the position of the C<FILEHANDLE>.
-- 
1.6.0.rc3.10.g5a13c

^ permalink raw reply related	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2008-08-15 19:55 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-08 22:41 [PATCH] git-svn: Make it scream by minimizing temp files Marcus Griep
2008-08-08 22:59 ` Junio C Hamano
2008-08-09  1:12   ` Marcus Griep
2008-08-09  6:25 ` Eric Wong
2008-08-09 15:45   ` Marcus Griep
2008-08-10  1:46     ` Eric Wong
2008-08-10  3:53       ` Junio C Hamano
2008-08-10  7:47         ` Eric Wong
2008-08-10  8:26           ` Junio C Hamano
2008-08-10  8:09     ` Eric Wong
2008-08-11 15:53     ` [PATCH 0/3] git-svn and temporary file improvements Marcus Griep
2008-08-11 15:53       ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Marcus Griep
2008-08-11 15:53         ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Marcus Griep
2008-08-11 15:53           ` [PATCH 3/3] git-svn: Reduce temp file usage when dealing with non-links Marcus Griep
2008-08-12  3:37             ` Eric Wong
2008-08-12 15:53               ` Marcus Griep
2008-08-12 16:01             ` Marcus Griep
2008-08-12 16:45               ` [PATCH v2 " Marcus Griep
2008-08-13  3:29               ` [PATCH " Eric Wong
2008-08-13  3:42                 ` Marcus Griep
2008-08-13  3:52                   ` Eric Wong
2008-08-12  3:14           ` [PATCH 2/3] git-svn: Make it scream by minimizing temp files Eric Wong
2008-08-12 15:50             ` Marcus Griep
2008-08-12 16:00           ` [PATCH 2/3] git-svn: Make it incrementally faster " Marcus Griep
2008-08-13  3:29             ` Eric Wong
2008-08-12  3:08         ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
2008-08-12 15:41           ` Marcus Griep
2008-08-12 16:00         ` Marcus Griep
2008-08-13  3:28           ` Eric Wong
2008-08-13 20:05         ` Lea Wiemann
2008-08-13 20:13           ` Marcus Griep
2008-08-13 20:31             ` Marcus Griep
2008-08-13 20:38           ` Junio C Hamano
2008-08-13 22:28             ` Lea Wiemann
2008-08-13 22:30               ` [PATCH] Git.pm: require Perl 5.6.1 Lea Wiemann
2008-08-14  6:58             ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Eric Wong
2008-08-15 15:10               ` [PATCH] Git.pm: Make File::Spec and File::Temp requirement lazy Marcus Griep
2008-08-15 19:31                 ` Bryan Donlan
2008-08-15 19:46                   ` Marcus Griep
2008-08-15 19:53                 ` [PATCH v2] " Marcus Griep
2008-08-13 20:52           ` [PATCH 1/3] Git.pm: Add faculties to allow temp files to be cached Miklos Vajna
2008-08-14  6:29         ` Junio C Hamano
2008-08-14 14:35           ` Marcus Griep

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).